12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- 'use strict'
- module.exports = function encodeUtf8 (input) {
- var result = []
- var size = input.length
- for (var index = 0; index < size; index++) {
- var point = input.charCodeAt(index)
- if (point >= 0xD800 && point <= 0xDBFF && size > index + 1) {
- var second = input.charCodeAt(index + 1)
- if (second >= 0xDC00 && second <= 0xDFFF) {
- // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
- point = (point - 0xD800) * 0x400 + second - 0xDC00 + 0x10000
- index += 1
- }
- }
- // US-ASCII
- if (point < 0x80) {
- result.push(point)
- continue
- }
- // 2-byte UTF-8
- if (point < 0x800) {
- result.push((point >> 6) | 192)
- result.push((point & 63) | 128)
- continue
- }
- // 3-byte UTF-8
- if (point < 0xD800 || (point >= 0xE000 && point < 0x10000)) {
- result.push((point >> 12) | 224)
- result.push(((point >> 6) & 63) | 128)
- result.push((point & 63) | 128)
- continue
- }
- // 4-byte UTF-8
- if (point >= 0x10000 && point <= 0x10FFFF) {
- result.push((point >> 18) | 240)
- result.push(((point >> 12) & 63) | 128)
- result.push(((point >> 6) & 63) | 128)
- result.push((point & 63) | 128)
- continue
- }
- // Invalid character
- result.push(0xEF, 0xBF, 0xBD)
- }
- return new Uint8Array(result).buffer
- }
|