/ src / text-utils.js
text-utils.js
  1  const isHighSurrogate = charCode => charCode >= 0xd800 && charCode <= 0xdbff;
  2  
  3  const isLowSurrogate = charCode => charCode >= 0xdc00 && charCode <= 0xdfff;
  4  
  5  const isVariationSelector = charCode =>
  6    charCode >= 0xfe00 && charCode <= 0xfe0f;
  7  
  8  const isCombiningCharacter = charCode =>
  9    (charCode >= 0x0300 && charCode <= 0x036f) ||
 10    (charCode >= 0x1ab0 && charCode <= 0x1aff) ||
 11    (charCode >= 0x1dc0 && charCode <= 0x1dff) ||
 12    (charCode >= 0x20d0 && charCode <= 0x20ff) ||
 13    (charCode >= 0xfe20 && charCode <= 0xfe2f);
 14  
 15  // Are the given character codes a high/low surrogate pair?
 16  //
 17  // * `charCodeA` The first character code {Number}.
 18  // * `charCode2` The second character code {Number}.
 19  //
 20  // Return a {Boolean}.
 21  const isSurrogatePair = (charCodeA, charCodeB) =>
 22    isHighSurrogate(charCodeA) && isLowSurrogate(charCodeB);
 23  
 24  // Are the given character codes a variation sequence?
 25  //
 26  // * `charCodeA` The first character code {Number}.
 27  // * `charCode2` The second character code {Number}.
 28  //
 29  // Return a {Boolean}.
 30  const isVariationSequence = (charCodeA, charCodeB) =>
 31    !isVariationSelector(charCodeA) && isVariationSelector(charCodeB);
 32  
 33  // Are the given character codes a combined character pair?
 34  //
 35  // * `charCodeA` The first character code {Number}.
 36  // * `charCode2` The second character code {Number}.
 37  //
 38  // Return a {Boolean}.
 39  const isCombinedCharacter = (charCodeA, charCodeB) =>
 40    !isCombiningCharacter(charCodeA) && isCombiningCharacter(charCodeB);
 41  
 42  // Is the character at the given index the start of high/low surrogate pair
 43  // a variation sequence, or a combined character?
 44  //
 45  // * `string` The {String} to check for a surrogate pair, variation sequence,
 46  //            or combined character.
 47  // * `index`  The {Number} index to look for a surrogate pair, variation
 48  //            sequence, or combined character.
 49  //
 50  // Return a {Boolean}.
 51  const isPairedCharacter = (string, index = 0) => {
 52    const charCodeA = string.charCodeAt(index);
 53    const charCodeB = string.charCodeAt(index + 1);
 54    return (
 55      isSurrogatePair(charCodeA, charCodeB) ||
 56      isVariationSequence(charCodeA, charCodeB) ||
 57      isCombinedCharacter(charCodeA, charCodeB)
 58    );
 59  };
 60  
 61  const IsJapaneseKanaCharacter = charCode =>
 62    charCode >= 0x3000 && charCode <= 0x30ff;
 63  
 64  const isCJKUnifiedIdeograph = charCode =>
 65    charCode >= 0x4e00 && charCode <= 0x9fff;
 66  
 67  const isFullWidthForm = charCode =>
 68    (charCode >= 0xff01 && charCode <= 0xff5e) ||
 69    (charCode >= 0xffe0 && charCode <= 0xffe6);
 70  
 71  const isDoubleWidthCharacter = character => {
 72    const charCode = character.charCodeAt(0);
 73  
 74    return (
 75      IsJapaneseKanaCharacter(charCode) ||
 76      isCJKUnifiedIdeograph(charCode) ||
 77      isFullWidthForm(charCode)
 78    );
 79  };
 80  
 81  const isHalfWidthCharacter = character => {
 82    const charCode = character.charCodeAt(0);
 83  
 84    return (
 85      (charCode >= 0xff65 && charCode <= 0xffdc) ||
 86      (charCode >= 0xffe8 && charCode <= 0xffee)
 87    );
 88  };
 89  
 90  const isKoreanCharacter = character => {
 91    const charCode = character.charCodeAt(0);
 92  
 93    return (
 94      (charCode >= 0xac00 && charCode <= 0xd7a3) ||
 95      (charCode >= 0x1100 && charCode <= 0x11ff) ||
 96      (charCode >= 0x3130 && charCode <= 0x318f) ||
 97      (charCode >= 0xa960 && charCode <= 0xa97f) ||
 98      (charCode >= 0xd7b0 && charCode <= 0xd7ff)
 99    );
100  };
101  
102  const isCJKCharacter = character =>
103    isDoubleWidthCharacter(character) ||
104    isHalfWidthCharacter(character) ||
105    isKoreanCharacter(character);
106  
107  const isWordStart = (previousCharacter, character) =>
108    (previousCharacter === ' ' ||
109      previousCharacter === '\t' ||
110      previousCharacter === '-' ||
111      previousCharacter === '/') &&
112    (character !== ' ' && character !== '\t');
113  
114  const isWrapBoundary = (previousCharacter, character) =>
115    isWordStart(previousCharacter, character) || isCJKCharacter(character);
116  
117  // Does the given string contain at least surrogate pair, variation sequence,
118  // or combined character?
119  //
120  // * `string` The {String} to check for the presence of paired characters.
121  //
122  // Returns a {Boolean}.
123  const hasPairedCharacter = string => {
124    let index = 0;
125    while (index < string.length) {
126      if (isPairedCharacter(string, index)) {
127        return true;
128      }
129      index++;
130    }
131    return false;
132  };
133  
134  module.exports = {
135    isPairedCharacter,
136    hasPairedCharacter,
137    isDoubleWidthCharacter,
138    isHalfWidthCharacter,
139    isKoreanCharacter,
140    isWrapBoundary
141  };