GraphemeBreak.js 7.0 KB


  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. exports.splitGraphemes = exports.GraphemeBreaker = exports.graphemeBreakAtIndex = exports.codePointToClass = exports.BREAK_ALLOWED = exports.BREAK_NOT_ALLOWED = exports.UnicodeTrie = exports.fromCodePoint = exports.toCodePoints = exports.classes = void 0;
  4. var grapheme_break_trie_1 = require("./grapheme-break-trie");
  5. var utrie_1 = require("utrie");
  6. var Other = 0;
  7. var Prepend = 1;
  8. var CR = 2;
  9. var LF = 3;
  10. var Control = 4;
  11. var Extend = 5;
  12. var Regional_Indicator = 6;
  13. var SpacingMark = 7;
  14. var L = 8;
  15. var V = 9;
  16. var T = 10;
  17. var LV = 11;
  18. var LVT = 12;
  19. var ZWJ = 13;
  20. var Extended_Pictographic = 14;
  21. var RI = 15;
  22. exports.classes = {
  23. Other: Other,
  24. Prepend: Prepend,
  25. CR: CR,
  26. LF: LF,
  27. Control: Control,
  28. Extend: Extend,
  29. Regional_Indicator: Regional_Indicator,
  30. SpacingMark: SpacingMark,
  31. L: L,
  32. V: V,
  33. T: T,
  34. LV: LV,
  35. LVT: LVT,
  36. ZWJ: ZWJ,
  37. Extended_Pictographic: Extended_Pictographic,
  38. RI: RI,
  39. };
  40. var toCodePoints = function (str) {
  41. var codePoints = [];
  42. var i = 0;
  43. var length = str.length;
  44. while (i < length) {
  45. var value = str.charCodeAt(i++);
  46. if (value >= 0xd800 && value <= 0xdbff && i < length) {
  47. var extra = str.charCodeAt(i++);
  48. if ((extra & 0xfc00) === 0xdc00) {
  49. codePoints.push(((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000);
  50. }
  51. else {
  52. codePoints.push(value);
  53. i--;
  54. }
  55. }
  56. else {
  57. codePoints.push(value);
  58. }
  59. }
  60. return codePoints;
  61. };
  62. exports.toCodePoints = toCodePoints;
  63. var fromCodePoint = function () {
  64. var codePoints = [];
  65. for (var _i = 0; _i < arguments.length; _i++) {
  66. codePoints[_i] = arguments[_i];
  67. }
  68. if (String.fromCodePoint) {
  69. return String.fromCodePoint.apply(String, codePoints);
  70. }
  71. var length = codePoints.length;
  72. if (!length) {
  73. return '';
  74. }
  75. var codeUnits = [];
  76. var index = -1;
  77. var result = '';
  78. while (++index < length) {
  79. var codePoint = codePoints[index];
  80. if (codePoint <= 0xffff) {
  81. codeUnits.push(codePoint);
  82. }
  83. else {
  84. codePoint -= 0x10000;
  85. codeUnits.push((codePoint >> 10) + 0xd800, (codePoint % 0x400) + 0xdc00);
  86. }
  87. if (index + 1 === length || codeUnits.length > 0x4000) {
  88. result += String.fromCharCode.apply(String, codeUnits);
  89. codeUnits.length = 0;
  90. }
  91. }
  92. return result;
  93. };
  94. exports.fromCodePoint = fromCodePoint;
  95. exports.UnicodeTrie = utrie_1.createTrieFromBase64(grapheme_break_trie_1.base64, grapheme_break_trie_1.byteLength);
  96. exports.BREAK_NOT_ALLOWED = '×';
  97. exports.BREAK_ALLOWED = '÷';
  98. var codePointToClass = function (codePoint) { return exports.UnicodeTrie.get(codePoint); };
  99. exports.codePointToClass = codePointToClass;
  100. var _graphemeBreakAtIndex = function (_codePoints, classTypes, index) {
  101. var prevIndex = index - 2;
  102. var prev = classTypes[prevIndex];
  103. var current = classTypes[index - 1];
  104. var next = classTypes[index];
  105. // GB3 Do not break between a CR and LF
  106. if (current === CR && next === LF) {
  107. return exports.BREAK_NOT_ALLOWED;
  108. }
  109. // GB4 Otherwise, break before and after controls.
  110. if (current === CR || current === LF || current === Control) {
  111. return exports.BREAK_ALLOWED;
  112. }
  113. // GB5
  114. if (next === CR || next === LF || next === Control) {
  115. return exports.BREAK_ALLOWED;
  116. }
  117. // Do not break Hangul syllable sequences.
  118. // GB6
  119. if (current === L && [L, V, LV, LVT].indexOf(next) !== -1) {
  120. return exports.BREAK_NOT_ALLOWED;
  121. }
  122. // GB7
  123. if ((current === LV || current === V) && (next === V || next === T)) {
  124. return exports.BREAK_NOT_ALLOWED;
  125. }
  126. // GB8
  127. if ((current === LVT || current === T) && next === T) {
  128. return exports.BREAK_NOT_ALLOWED;
  129. }
  130. // GB9 Do not break before extending characters or ZWJ.
  131. if (next === ZWJ || next === Extend) {
  132. return exports.BREAK_NOT_ALLOWED;
  133. }
  134. // Do not break before SpacingMarks, or after Prepend characters.
  135. // GB9a
  136. if (next === SpacingMark) {
  137. return exports.BREAK_NOT_ALLOWED;
  138. }
  139. // GB9a
  140. if (current === Prepend) {
  141. return exports.BREAK_NOT_ALLOWED;
  142. }
  143. // GB11 Do not break within emoji modifier sequences or emoji zwj sequences.
  144. if (current === ZWJ && next === Extended_Pictographic) {
  145. while (prev === Extend) {
  146. prev = classTypes[--prevIndex];
  147. }
  148. if (prev === Extended_Pictographic) {
  149. return exports.BREAK_NOT_ALLOWED;
  150. }
  151. }
  152. // GB12 Do not break within emoji flag sequences.
  153. // That is, do not break between regional indicator (RI) symbols
  154. // if there is an odd number of RI characters before the break point.
  155. if (current === RI && next === RI) {
  156. var countRI = 0;
  157. while (prev === RI) {
  158. countRI++;
  159. prev = classTypes[--prevIndex];
  160. }
  161. if (countRI % 2 === 0) {
  162. return exports.BREAK_NOT_ALLOWED;
  163. }
  164. }
  165. return exports.BREAK_ALLOWED;
  166. };
  167. var graphemeBreakAtIndex = function (codePoints, index) {
  168. // GB1 Break at the start and end of text, unless the text is empty.
  169. if (index === 0) {
  170. return exports.BREAK_ALLOWED;
  171. }
  172. // GB2
  173. if (index >= codePoints.length) {
  174. return exports.BREAK_ALLOWED;
  175. }
  176. var classTypes = codePoints.map(exports.codePointToClass);
  177. return _graphemeBreakAtIndex(codePoints, classTypes, index);
  178. };
  179. exports.graphemeBreakAtIndex = graphemeBreakAtIndex;
  180. var GraphemeBreaker = function (str) {
  181. var codePoints = exports.toCodePoints(str);
  182. var length = codePoints.length;
  183. var index = 0;
  184. var lastEnd = 0;
  185. var classTypes = codePoints.map(exports.codePointToClass);
  186. return {
  187. next: function () {
  188. if (index >= length) {
  189. return { done: true, value: null };
  190. }
  191. var graphemeBreak = exports.BREAK_NOT_ALLOWED;
  192. while (index < length &&
  193. (graphemeBreak = _graphemeBreakAtIndex(codePoints, classTypes, ++index)) === exports.BREAK_NOT_ALLOWED) { }
  194. if (graphemeBreak !== exports.BREAK_NOT_ALLOWED || index === length) {
  195. var value = exports.fromCodePoint.apply(null, codePoints.slice(lastEnd, index));
  196. lastEnd = index;
  197. return { value: value, done: false };
  198. }
  199. return { done: true, value: null };
  200. while (index < length) { }
  201. return { done: true, value: null };
  202. },
  203. };
  204. };
  205. exports.GraphemeBreaker = GraphemeBreaker;
  206. var splitGraphemes = function (str) {
  207. var breaker = exports.GraphemeBreaker(str);
  208. var graphemes = [];
  209. var bk;
  210. while (!(bk = breaker.next()).done) {
  211. if (bk.value) {
  212. graphemes.push(bk.value.slice());
  213. }
  214. }
  215. return graphemes;
  216. };
  217. exports.splitGraphemes = splitGraphemes;
  218. //# sourceMappingURL=GraphemeBreak.js.map