index.js 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. 'use strict'
  2. module.exports = function encodeUtf8 (input) {
  3. var result = []
  4. var size = input.length
  5. for (var index = 0; index < size; index++) {
  6. var point = input.charCodeAt(index)
  7. if (point >= 0xD800 && point <= 0xDBFF && size > index + 1) {
  8. var second = input.charCodeAt(index + 1)
  9. if (second >= 0xDC00 && second <= 0xDFFF) {
  10. // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  11. point = (point - 0xD800) * 0x400 + second - 0xDC00 + 0x10000
  12. index += 1
  13. }
  14. }
  15. // US-ASCII
  16. if (point < 0x80) {
  17. result.push(point)
  18. continue
  19. }
  20. // 2-byte UTF-8
  21. if (point < 0x800) {
  22. result.push((point >> 6) | 192)
  23. result.push((point & 63) | 128)
  24. continue
  25. }
  26. // 3-byte UTF-8
  27. if (point < 0xD800 || (point >= 0xE000 && point < 0x10000)) {
  28. result.push((point >> 12) | 224)
  29. result.push(((point >> 6) & 63) | 128)
  30. result.push((point & 63) | 128)
  31. continue
  32. }
  33. // 4-byte UTF-8
  34. if (point >= 0x10000 && point <= 0x10FFFF) {
  35. result.push((point >> 18) | 240)
  36. result.push(((point >> 12) & 63) | 128)
  37. result.push(((point >> 6) & 63) | 128)
  38. result.push((point & 63) | 128)
  39. continue
  40. }
  41. // Invalid character
  42. result.push(0xEF, 0xBF, 0xBD)
  43. }
  44. return new Uint8Array(result).buffer
  45. }