text-utils.js
1 const isHighSurrogate = charCode => charCode >= 0xd800 && charCode <= 0xdbff; 2 3 const isLowSurrogate = charCode => charCode >= 0xdc00 && charCode <= 0xdfff; 4 5 const isVariationSelector = charCode => 6 charCode >= 0xfe00 && charCode <= 0xfe0f; 7 8 const isCombiningCharacter = charCode => 9 (charCode >= 0x0300 && charCode <= 0x036f) || 10 (charCode >= 0x1ab0 && charCode <= 0x1aff) || 11 (charCode >= 0x1dc0 && charCode <= 0x1dff) || 12 (charCode >= 0x20d0 && charCode <= 0x20ff) || 13 (charCode >= 0xfe20 && charCode <= 0xfe2f); 14 15 // Are the given character codes a high/low surrogate pair? 16 // 17 // * `charCodeA` The first character code {Number}. 18 // * `charCode2` The second character code {Number}. 19 // 20 // Return a {Boolean}. 21 const isSurrogatePair = (charCodeA, charCodeB) => 22 isHighSurrogate(charCodeA) && isLowSurrogate(charCodeB); 23 24 // Are the given character codes a variation sequence? 25 // 26 // * `charCodeA` The first character code {Number}. 27 // * `charCode2` The second character code {Number}. 28 // 29 // Return a {Boolean}. 30 const isVariationSequence = (charCodeA, charCodeB) => 31 !isVariationSelector(charCodeA) && isVariationSelector(charCodeB); 32 33 // Are the given character codes a combined character pair? 34 // 35 // * `charCodeA` The first character code {Number}. 36 // * `charCode2` The second character code {Number}. 37 // 38 // Return a {Boolean}. 39 const isCombinedCharacter = (charCodeA, charCodeB) => 40 !isCombiningCharacter(charCodeA) && isCombiningCharacter(charCodeB); 41 42 // Is the character at the given index the start of high/low surrogate pair 43 // a variation sequence, or a combined character? 44 // 45 // * `string` The {String} to check for a surrogate pair, variation sequence, 46 // or combined character. 47 // * `index` The {Number} index to look for a surrogate pair, variation 48 // sequence, or combined character. 49 // 50 // Return a {Boolean}. 51 const isPairedCharacter = (string, index = 0) => { 52 const charCodeA = string.charCodeAt(index); 53 const charCodeB = string.charCodeAt(index + 1); 54 return ( 55 isSurrogatePair(charCodeA, charCodeB) || 56 isVariationSequence(charCodeA, charCodeB) || 57 isCombinedCharacter(charCodeA, charCodeB) 58 ); 59 }; 60 61 const IsJapaneseKanaCharacter = charCode => 62 charCode >= 0x3000 && charCode <= 0x30ff; 63 64 const isCJKUnifiedIdeograph = charCode => 65 charCode >= 0x4e00 && charCode <= 0x9fff; 66 67 const isFullWidthForm = charCode => 68 (charCode >= 0xff01 && charCode <= 0xff5e) || 69 (charCode >= 0xffe0 && charCode <= 0xffe6); 70 71 const isDoubleWidthCharacter = character => { 72 const charCode = character.charCodeAt(0); 73 74 return ( 75 IsJapaneseKanaCharacter(charCode) || 76 isCJKUnifiedIdeograph(charCode) || 77 isFullWidthForm(charCode) 78 ); 79 }; 80 81 const isHalfWidthCharacter = character => { 82 const charCode = character.charCodeAt(0); 83 84 return ( 85 (charCode >= 0xff65 && charCode <= 0xffdc) || 86 (charCode >= 0xffe8 && charCode <= 0xffee) 87 ); 88 }; 89 90 const isKoreanCharacter = character => { 91 const charCode = character.charCodeAt(0); 92 93 return ( 94 (charCode >= 0xac00 && charCode <= 0xd7a3) || 95 (charCode >= 0x1100 && charCode <= 0x11ff) || 96 (charCode >= 0x3130 && charCode <= 0x318f) || 97 (charCode >= 0xa960 && charCode <= 0xa97f) || 98 (charCode >= 0xd7b0 && charCode <= 0xd7ff) 99 ); 100 }; 101 102 const isCJKCharacter = character => 103 isDoubleWidthCharacter(character) || 104 isHalfWidthCharacter(character) || 105 isKoreanCharacter(character); 106 107 const isWordStart = (previousCharacter, character) => 108 (previousCharacter === ' ' || 109 previousCharacter === '\t' || 110 previousCharacter === '-' || 111 previousCharacter === '/') && 112 (character !== ' ' && character !== '\t'); 113 114 const isWrapBoundary = (previousCharacter, character) => 115 isWordStart(previousCharacter, character) || isCJKCharacter(character); 116 117 // Does the given string contain at least surrogate pair, variation sequence, 118 // or combined character? 119 // 120 // * `string` The {String} to check for the presence of paired characters. 121 // 122 // Returns a {Boolean}. 123 const hasPairedCharacter = string => { 124 let index = 0; 125 while (index < string.length) { 126 if (isPairedCharacter(string, index)) { 127 return true; 128 } 129 index++; 130 } 131 return false; 132 }; 133 134 module.exports = { 135 isPairedCharacter, 136 hasPairedCharacter, 137 isDoubleWidthCharacter, 138 isHalfWidthCharacter, 139 isKoreanCharacter, 140 isWrapBoundary 141 };