/ ink / stringWidth.ts
stringWidth.ts
  1  import emojiRegex from 'emoji-regex'
  2  import { eastAsianWidth } from 'get-east-asian-width'
  3  import stripAnsi from 'strip-ansi'
  4  import { getGraphemeSegmenter } from '../utils/intl.js'
  5  
  6  const EMOJI_REGEX = emojiRegex()
  7  
  8  /**
  9   * Fallback JavaScript implementation of stringWidth when Bun.stringWidth is not available.
 10   *
 11   * Get the display width of a string as it would appear in a terminal.
 12   *
 13   * This is a more accurate alternative to the string-width package that correctly handles
 14   * characters like ⚠ (U+26A0) which string-width incorrectly reports as width 2.
 15   *
 16   * The implementation uses eastAsianWidth directly with ambiguousAsWide: false,
 17   * which correctly treats ambiguous-width characters as narrow (width 1) as
 18   * recommended by the Unicode standard for Western contexts.
 19   */
 20  function stringWidthJavaScript(str: string): number {
 21    if (typeof str !== 'string' || str.length === 0) {
 22      return 0
 23    }
 24  
 25    // Fast path: pure ASCII string (no ANSI codes, no wide chars)
 26    let isPureAscii = true
 27    for (let i = 0; i < str.length; i++) {
 28      const code = str.charCodeAt(i)
 29      // Check for non-ASCII or ANSI escape (0x1b)
 30      if (code >= 127 || code === 0x1b) {
 31        isPureAscii = false
 32        break
 33      }
 34    }
 35    if (isPureAscii) {
 36      // Count printable characters (exclude control chars)
 37      let width = 0
 38      for (let i = 0; i < str.length; i++) {
 39        const code = str.charCodeAt(i)
 40        if (code > 0x1f) {
 41          width++
 42        }
 43      }
 44      return width
 45    }
 46  
 47    // Strip ANSI if escape character is present
 48    if (str.includes('\x1b')) {
 49      str = stripAnsi(str)
 50      if (str.length === 0) {
 51        return 0
 52      }
 53    }
 54  
 55    // Fast path: simple Unicode (no emoji, variation selectors, or joiners)
 56    if (!needsSegmentation(str)) {
 57      let width = 0
 58      for (const char of str) {
 59        const codePoint = char.codePointAt(0)!
 60        if (!isZeroWidth(codePoint)) {
 61          width += eastAsianWidth(codePoint, { ambiguousAsWide: false })
 62        }
 63      }
 64      return width
 65    }
 66  
 67    let width = 0
 68  
 69    for (const { segment: grapheme } of getGraphemeSegmenter().segment(str)) {
 70      // Check for emoji first (most emoji sequences are width 2)
 71      EMOJI_REGEX.lastIndex = 0
 72      if (EMOJI_REGEX.test(grapheme)) {
 73        width += getEmojiWidth(grapheme)
 74        continue
 75      }
 76  
 77      // Calculate width for non-emoji graphemes
 78      // For grapheme clusters (like Devanagari conjuncts with virama+ZWJ), only count
 79      // the first non-zero-width character's width since the cluster renders as one glyph
 80      for (const char of grapheme) {
 81        const codePoint = char.codePointAt(0)!
 82        if (!isZeroWidth(codePoint)) {
 83          width += eastAsianWidth(codePoint, { ambiguousAsWide: false })
 84          break
 85        }
 86      }
 87    }
 88  
 89    return width
 90  }
 91  
 92  function needsSegmentation(str: string): boolean {
 93    for (const char of str) {
 94      const cp = char.codePointAt(0)!
 95      // Emoji ranges
 96      if (cp >= 0x1f300 && cp <= 0x1faff) return true
 97      if (cp >= 0x2600 && cp <= 0x27bf) return true
 98      if (cp >= 0x1f1e6 && cp <= 0x1f1ff) return true
 99      // Variation selectors, ZWJ
100      if (cp >= 0xfe00 && cp <= 0xfe0f) return true
101      if (cp === 0x200d) return true
102    }
103    return false
104  }
105  
106  function getEmojiWidth(grapheme: string): number {
107    // Regional indicators: single = 1, pair = 2
108    const first = grapheme.codePointAt(0)!
109    if (first >= 0x1f1e6 && first <= 0x1f1ff) {
110      let count = 0
111      for (const _ of grapheme) count++
112      return count === 1 ? 1 : 2
113    }
114  
115    // Incomplete keycap: digit/symbol + VS16 without U+20E3
116    if (grapheme.length === 2) {
117      const second = grapheme.codePointAt(1)
118      if (
119        second === 0xfe0f &&
120        ((first >= 0x30 && first <= 0x39) || first === 0x23 || first === 0x2a)
121      ) {
122        return 1
123      }
124    }
125  
126    return 2
127  }
128  
129  function isZeroWidth(codePoint: number): boolean {
130    // Fast path for common printable range
131    if (codePoint >= 0x20 && codePoint < 0x7f) return false
132    if (codePoint >= 0xa0 && codePoint < 0x0300) return codePoint === 0x00ad
133  
134    // Control characters
135    if (codePoint <= 0x1f || (codePoint >= 0x7f && codePoint <= 0x9f)) return true
136  
137    // Zero-width and invisible characters
138    if (
139      (codePoint >= 0x200b && codePoint <= 0x200d) || // ZW space/joiner
140      codePoint === 0xfeff || // BOM
141      (codePoint >= 0x2060 && codePoint <= 0x2064) // Word joiner etc.
142    ) {
143      return true
144    }
145  
146    // Variation selectors
147    if (
148      (codePoint >= 0xfe00 && codePoint <= 0xfe0f) ||
149      (codePoint >= 0xe0100 && codePoint <= 0xe01ef)
150    ) {
151      return true
152    }
153  
154    // Combining diacritical marks
155    if (
156      (codePoint >= 0x0300 && codePoint <= 0x036f) ||
157      (codePoint >= 0x1ab0 && codePoint <= 0x1aff) ||
158      (codePoint >= 0x1dc0 && codePoint <= 0x1dff) ||
159      (codePoint >= 0x20d0 && codePoint <= 0x20ff) ||
160      (codePoint >= 0xfe20 && codePoint <= 0xfe2f)
161    ) {
162      return true
163    }
164  
165    // Indic script combining marks (covers Devanagari through Malayalam)
166    if (codePoint >= 0x0900 && codePoint <= 0x0d4f) {
167      // Signs and vowel marks at start of each script block
168      const offset = codePoint & 0x7f
169      if (offset <= 0x03) return true // Signs at block start
170      if (offset >= 0x3a && offset <= 0x4f) return true // Vowel signs, virama
171      if (offset >= 0x51 && offset <= 0x57) return true // Stress signs
172      if (offset >= 0x62 && offset <= 0x63) return true // Vowel signs
173    }
174  
175    // Thai/Lao combining marks
176    // Note: U+0E32 (SARA AA), U+0E33 (SARA AM), U+0EB2, U+0EB3 are spacing vowels (width 1), not combining marks
177    if (
178      codePoint === 0x0e31 || // Thai MAI HAN-AKAT
179      (codePoint >= 0x0e34 && codePoint <= 0x0e3a) || // Thai vowel signs (skip U+0E32, U+0E33)
180      (codePoint >= 0x0e47 && codePoint <= 0x0e4e) || // Thai vowel signs and marks
181      codePoint === 0x0eb1 || // Lao MAI KAN
182      (codePoint >= 0x0eb4 && codePoint <= 0x0ebc) || // Lao vowel signs (skip U+0EB2, U+0EB3)
183      (codePoint >= 0x0ec8 && codePoint <= 0x0ecd) // Lao tone marks
184    ) {
185      return true
186    }
187  
188    // Arabic formatting
189    if (
190      (codePoint >= 0x0600 && codePoint <= 0x0605) ||
191      codePoint === 0x06dd ||
192      codePoint === 0x070f ||
193      codePoint === 0x08e2
194    ) {
195      return true
196    }
197  
198    // Surrogates, tag characters
199    if (codePoint >= 0xd800 && codePoint <= 0xdfff) return true
200    if (codePoint >= 0xe0000 && codePoint <= 0xe007f) return true
201  
202    return false
203  }
204  
205  // Note: complex-script graphemes like Devanagari क्ष (ka+virama+ZWJ+ssa) render
206  // as a single ligature glyph but occupy 2 terminal cells (wcwidth sums the base
207  // consonants). Bun.stringWidth=2 matches terminal cell allocation, which is what
208  // we need for cursor positioning — the JS fallback's grapheme-cluster width of 1
209  // would desync Ink's layout from the terminal.
210  //
211  // Bun.stringWidth is resolved once at module scope rather than checked on every
212  // call — typeof guards deopt property access and this is a hot path (~100k calls/frame).
213  const bunStringWidth =
214    typeof Bun !== 'undefined' && typeof Bun.stringWidth === 'function'
215      ? Bun.stringWidth
216      : null
217  
218  const BUN_STRING_WIDTH_OPTS = { ambiguousIsNarrow: true } as const
219  
220  export const stringWidth: (str: string) => number = bunStringWidth
221    ? str => bunStringWidth(str, BUN_STRING_WIDTH_OPTS)
222    : stringWidthJavaScript