Cradicle Explorer

/ utils / sliceAnsi.ts
sliceAnsi.ts
 1  import {
 2    type AnsiCode,
 3    ansiCodesToString,
 4    reduceAnsiCodes,
 5    tokenize,
 6    undoAnsiCodes,
 7  } from '@alcalzone/ansi-tokenize'
 8  import { stringWidth } from '../ink/stringWidth.js'
 9  
10  // A code is an "end code" if its code equals its endCode (e.g., hyperlink close)
11  function isEndCode(code: AnsiCode): boolean {
12    return code.code === code.endCode
13  }
14  
15  // Filter to only include "start codes" (not end codes)
16  function filterStartCodes(codes: AnsiCode[]): AnsiCode[] {
17    return codes.filter(c => !isEndCode(c))
18  }
19  
20  /**
21   * Slice a string containing ANSI escape codes.
22   *
23   * Unlike the slice-ansi package, this properly handles OSC 8 hyperlink
24   * sequences because @alcalzone/ansi-tokenize tokenizes them correctly.
25   */
26  export default function sliceAnsi(
27    str: string,
28    start: number,
29    end?: number,
30  ): string {
31    // Don't pass `end` to tokenize — it counts code units, not display cells,
32    // so it drops tokens early for text with zero-width combining marks.
33    const tokens = tokenize(str)
34    let activeCodes: AnsiCode[] = []
35    let position = 0
36    let result = ''
37    let include = false
38  
39    for (const token of tokens) {
40      // Advance by display width, not code units. Combining marks (Devanagari
41      // matras, virama, diacritics) are width 0 — counting them via .length
42      // advanced position past `end` early and truncated the slice. Callers
43      // pass start/end in display cells (via stringWidth), so position must
44      // track the same units.
45      const width =
46        token.type === 'ansi' ? 0 : token.fullWidth ? 2 : stringWidth(token.value)
47  
48      // Break AFTER trailing zero-width marks — a combining mark attaches to
49      // the preceding base char, so "भा" (भ + ा, 1 display cell) sliced at
50      // end=1 must include the ा. Breaking on position >= end BEFORE the
51      // zero-width check would drop it and render भ bare. ANSI codes are
52      // width 0 but must NOT be included past end (they open new style runs
53      // that leak into the undo sequence), so gate on char type too. The
54      // !include guard ensures empty slices (start===end) stay empty even
55      // when the string starts with a zero-width char (BOM, ZWJ).
56      if (end !== undefined && position >= end) {
57        if (token.type === 'ansi' || width > 0 || !include) break
58      }
59  
60      if (token.type === 'ansi') {
61        activeCodes.push(token)
62        if (include) {
63          // Emit all ANSI codes during the slice
64          result += token.code
65        }
66      } else {
67        if (!include && position >= start) {
68          // Skip leading zero-width marks at the start boundary — they belong
69          // to the preceding base char in the left half. Without this, the
70          // mark appears in BOTH halves: left+right ≠ original. Only applies
71          // when start > 0 (otherwise there's no preceding char to own it).
72          if (start > 0 && width === 0) continue
73          include = true
74          // Reduce and filter to only active start codes
75          activeCodes = filterStartCodes(reduceAnsiCodes(activeCodes))
76          result = ansiCodesToString(activeCodes)
77        }
78  
79        if (include) {
80          result += token.value
81        }
82  
83        position += width
84      }
85    }
86  
87    // Only undo start codes that are still active
88    const activeStartCodes = filterStartCodes(reduceAnsiCodes(activeCodes))
89    result += ansiCodesToString(undoAnsiCodes(activeStartCodes))
90    return result
91  }