Cradicle Explorer

/ utils / bash / bashParser.ts
bashParser.ts
   1  /**
   2   * Pure-TypeScript bash parser producing tree-sitter-bash-compatible ASTs.
   3   *
   4   * Downstream code in parser.ts, ast.ts, prefix.ts, ParsedCommand.ts walks this
   5   * by field name. startIndex/endIndex are UTF-8 BYTE offsets (not JS string
   6   * indices).
   7   *
   8   * Grammar reference: tree-sitter-bash. Validated against a 3449-input golden
   9   * corpus generated from the WASM parser.
  10   */
  11  
  12  export type TsNode = {
  13    type: string
  14    text: string
  15    startIndex: number
  16    endIndex: number
  17    children: TsNode[]
  18  }
  19  
  20  type ParserModule = {
  21    parse: (source: string, timeoutMs?: number) => TsNode | null
  22  }
  23  
  24  /**
  25   * 50ms wall-clock cap — bails out on pathological/adversarial input.
  26   * Pass `Infinity` via `parse(src, Infinity)` to disable (e.g. correctness
  27   * tests, where CI jitter would otherwise cause spurious null returns).
  28   */
  29  const PARSE_TIMEOUT_MS = 50
  30  
  31  /** Node budget cap — bails out before OOM on deeply nested input. */
  32  const MAX_NODES = 50_000
  33  
  34  const MODULE: ParserModule = { parse: parseSource }
  35  
  36  const READY = Promise.resolve()
  37  
  38  /** No-op: pure-TS parser needs no async init. Kept for API compatibility. */
  39  export function ensureParserInitialized(): Promise<void> {
  40    return READY
  41  }
  42  
  43  /** Always succeeds — pure-TS needs no init. */
  44  export function getParserModule(): ParserModule | null {
  45    return MODULE
  46  }
  47  
  48  // ───────────────────────────── Tokenizer ─────────────────────────────
  49  
  50  type TokenType =
  51    | 'WORD'
  52    | 'NUMBER'
  53    | 'OP'
  54    | 'NEWLINE'
  55    | 'COMMENT'
  56    | 'DQUOTE'
  57    | 'SQUOTE'
  58    | 'ANSI_C'
  59    | 'DOLLAR'
  60    | 'DOLLAR_PAREN'
  61    | 'DOLLAR_BRACE'
  62    | 'DOLLAR_DPAREN'
  63    | 'BACKTICK'
  64    | 'LT_PAREN'
  65    | 'GT_PAREN'
  66    | 'EOF'
  67  
  68  type Token = {
  69    type: TokenType
  70    value: string
  71    /** UTF-8 byte offset of first char */
  72    start: number
  73    /** UTF-8 byte offset one past last char */
  74    end: number
  75  }
  76  
  77  const SPECIAL_VARS = new Set(['?', '$', '@', '*', '#', '-', '!', '_'])
  78  
  79  const DECL_KEYWORDS = new Set([
  80    'export',
  81    'declare',
  82    'typeset',
  83    'readonly',
  84    'local',
  85  ])
  86  
  87  export const SHELL_KEYWORDS = new Set([
  88    'if',
  89    'then',
  90    'elif',
  91    'else',
  92    'fi',
  93    'while',
  94    'until',
  95    'for',
  96    'in',
  97    'do',
  98    'done',
  99    'case',
 100    'esac',
 101    'function',
 102    'select',
 103  ])
 104  
 105  /**
 106   * Lexer state. Tracks both JS-string index (for charAt) and UTF-8 byte offset
 107   * (for TsNode positions). ASCII fast path: byte == char index. Non-ASCII
 108   * advances byte count per-codepoint.
 109   */
 110  type Lexer = {
 111    src: string
 112    len: number
 113    /** JS string index */
 114    i: number
 115    /** UTF-8 byte offset */
 116    b: number
 117    /** Pending heredoc delimiters awaiting body scan at next newline */
 118    heredocs: HeredocPending[]
 119    /** Precomputed byte offset for each char index (lazy for non-ASCII) */
 120    byteTable: Uint32Array | null
 121  }
 122  
 123  type HeredocPending = {
 124    delim: string
 125    stripTabs: boolean
 126    quoted: boolean
 127    /** Filled after body scan */
 128    bodyStart: number
 129    bodyEnd: number
 130    endStart: number
 131    endEnd: number
 132  }
 133  
 134  function makeLexer(src: string): Lexer {
 135    return {
 136      src,
 137      len: src.length,
 138      i: 0,
 139      b: 0,
 140      heredocs: [],
 141      byteTable: null,
 142    }
 143  }
 144  
 145  /** Advance one JS char, updating byte offset for UTF-8. */
 146  function advance(L: Lexer): void {
 147    const c = L.src.charCodeAt(L.i)
 148    L.i++
 149    if (c < 0x80) {
 150      L.b++
 151    } else if (c < 0x800) {
 152      L.b += 2
 153    } else if (c >= 0xd800 && c <= 0xdbff) {
 154      // High surrogate — next char completes the pair, total 4 UTF-8 bytes
 155      L.b += 4
 156      L.i++
 157    } else {
 158      L.b += 3
 159    }
 160  }
 161  
 162  function peek(L: Lexer, off = 0): string {
 163    return L.i + off < L.len ? L.src[L.i + off]! : ''
 164  }
 165  
 166  function byteAt(L: Lexer, charIdx: number): number {
 167    // Fast path: ASCII-only prefix means char idx == byte idx
 168    if (L.byteTable) return L.byteTable[charIdx]!
 169    // Build table on first non-trivial lookup
 170    const t = new Uint32Array(L.len + 1)
 171    let b = 0
 172    let i = 0
 173    while (i < L.len) {
 174      t[i] = b
 175      const c = L.src.charCodeAt(i)
 176      if (c < 0x80) {
 177        b++
 178        i++
 179      } else if (c < 0x800) {
 180        b += 2
 181        i++
 182      } else if (c >= 0xd800 && c <= 0xdbff) {
 183        t[i + 1] = b + 2
 184        b += 4
 185        i += 2
 186      } else {
 187        b += 3
 188        i++
 189      }
 190    }
 191    t[L.len] = b
 192    L.byteTable = t
 193    return t[charIdx]!
 194  }
 195  
 196  function isWordChar(c: string): boolean {
 197    // Bash word chars: alphanumeric + various punctuation that doesn't start operators
 198    return (
 199      (c >= 'a' && c <= 'z') ||
 200      (c >= 'A' && c <= 'Z') ||
 201      (c >= '0' && c <= '9') ||
 202      c === '_' ||
 203      c === '/' ||
 204      c === '.' ||
 205      c === '-' ||
 206      c === '+' ||
 207      c === ':' ||
 208      c === '@' ||
 209      c === '%' ||
 210      c === ',' ||
 211      c === '~' ||
 212      c === '^' ||
 213      c === '?' ||
 214      c === '*' ||
 215      c === '!' ||
 216      c === '=' ||
 217      c === '[' ||
 218      c === ']'
 219    )
 220  }
 221  
 222  function isWordStart(c: string): boolean {
 223    return isWordChar(c) || c === '\\'
 224  }
 225  
 226  function isIdentStart(c: string): boolean {
 227    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c === '_'
 228  }
 229  
 230  function isIdentChar(c: string): boolean {
 231    return isIdentStart(c) || (c >= '0' && c <= '9')
 232  }
 233  
 234  function isDigit(c: string): boolean {
 235    return c >= '0' && c <= '9'
 236  }
 237  
 238  function isHexDigit(c: string): boolean {
 239    return isDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
 240  }
 241  
 242  function isBaseDigit(c: string): boolean {
 243    // Bash BASE#DIGITS: digits, letters, @ and _ (up to base 64)
 244    return isIdentChar(c) || c === '@'
 245  }
 246  
 247  /**
 248   * Unquoted heredoc delimiter chars. Bash accepts most non-metacharacters —
 249   * not just identifiers. Stop at whitespace, redirects, pipe/list operators,
 250   * and structural tokens. Allows !, -, ., +, etc. (e.g. <<!HEREDOC!).
 251   */
 252  function isHeredocDelimChar(c: string): boolean {
 253    return (
 254      c !== '' &&
 255      c !== ' ' &&
 256      c !== '\t' &&
 257      c !== '\n' &&
 258      c !== '<' &&
 259      c !== '>' &&
 260      c !== '|' &&
 261      c !== '&' &&
 262      c !== ';' &&
 263      c !== '(' &&
 264      c !== ')' &&
 265      c !== "'" &&
 266      c !== '"' &&
 267      c !== '`' &&
 268      c !== '\\'
 269    )
 270  }
 271  
 272  function skipBlanks(L: Lexer): void {
 273    while (L.i < L.len) {
 274      const c = L.src[L.i]!
 275      if (c === ' ' || c === '\t' || c === '\r') {
 276        // \r is whitespace per tree-sitter-bash extras /\s/ — handles CRLF inputs
 277        advance(L)
 278      } else if (c === '\\') {
 279        const nx = L.src[L.i + 1]
 280        if (nx === '\n' || (nx === '\r' && L.src[L.i + 2] === '\n')) {
 281          // Line continuation — tree-sitter extras: /\\\r?\n/
 282          advance(L)
 283          advance(L)
 284          if (nx === '\r') advance(L)
 285        } else if (nx === ' ' || nx === '\t') {
 286          // \<space> or \<tab> — tree-sitter's _whitespace is /\\?[ \t\v]+/
 287          advance(L)
 288          advance(L)
 289        } else {
 290          break
 291        }
 292      } else {
 293        break
 294      }
 295    }
 296  }
 297  
 298  /**
 299   * Scan next token. Context-sensitive: `cmd` mode treats [ as operator (test
 300   * command start), `arg` mode treats [ as word char (glob/subscript).
 301   */
 302  function nextToken(L: Lexer, ctx: 'cmd' | 'arg' = 'arg'): Token {
 303    skipBlanks(L)
 304    const start = L.b
 305    if (L.i >= L.len) return { type: 'EOF', value: '', start, end: start }
 306  
 307    const c = L.src[L.i]!
 308    const c1 = peek(L, 1)
 309    const c2 = peek(L, 2)
 310  
 311    if (c === '\n') {
 312      advance(L)
 313      return { type: 'NEWLINE', value: '\n', start, end: L.b }
 314    }
 315  
 316    if (c === '#') {
 317      const si = L.i
 318      while (L.i < L.len && L.src[L.i] !== '\n') advance(L)
 319      return {
 320        type: 'COMMENT',
 321        value: L.src.slice(si, L.i),
 322        start,
 323        end: L.b,
 324      }
 325    }
 326  
 327    // Multi-char operators (longest match first)
 328    if (c === '&' && c1 === '&') {
 329      advance(L)
 330      advance(L)
 331      return { type: 'OP', value: '&&', start, end: L.b }
 332    }
 333    if (c === '|' && c1 === '|') {
 334      advance(L)
 335      advance(L)
 336      return { type: 'OP', value: '||', start, end: L.b }
 337    }
 338    if (c === '|' && c1 === '&') {
 339      advance(L)
 340      advance(L)
 341      return { type: 'OP', value: '|&', start, end: L.b }
 342    }
 343    if (c === ';' && c1 === ';' && c2 === '&') {
 344      advance(L)
 345      advance(L)
 346      advance(L)
 347      return { type: 'OP', value: ';;&', start, end: L.b }
 348    }
 349    if (c === ';' && c1 === ';') {
 350      advance(L)
 351      advance(L)
 352      return { type: 'OP', value: ';;', start, end: L.b }
 353    }
 354    if (c === ';' && c1 === '&') {
 355      advance(L)
 356      advance(L)
 357      return { type: 'OP', value: ';&', start, end: L.b }
 358    }
 359    if (c === '>' && c1 === '>') {
 360      advance(L)
 361      advance(L)
 362      return { type: 'OP', value: '>>', start, end: L.b }
 363    }
 364    if (c === '>' && c1 === '&' && c2 === '-') {
 365      advance(L)
 366      advance(L)
 367      advance(L)
 368      return { type: 'OP', value: '>&-', start, end: L.b }
 369    }
 370    if (c === '>' && c1 === '&') {
 371      advance(L)
 372      advance(L)
 373      return { type: 'OP', value: '>&', start, end: L.b }
 374    }
 375    if (c === '>' && c1 === '|') {
 376      advance(L)
 377      advance(L)
 378      return { type: 'OP', value: '>|', start, end: L.b }
 379    }
 380    if (c === '&' && c1 === '>' && c2 === '>') {
 381      advance(L)
 382      advance(L)
 383      advance(L)
 384      return { type: 'OP', value: '&>>', start, end: L.b }
 385    }
 386    if (c === '&' && c1 === '>') {
 387      advance(L)
 388      advance(L)
 389      return { type: 'OP', value: '&>', start, end: L.b }
 390    }
 391    if (c === '<' && c1 === '<' && c2 === '<') {
 392      advance(L)
 393      advance(L)
 394      advance(L)
 395      return { type: 'OP', value: '<<<', start, end: L.b }
 396    }
 397    if (c === '<' && c1 === '<' && c2 === '-') {
 398      advance(L)
 399      advance(L)
 400      advance(L)
 401      return { type: 'OP', value: '<<-', start, end: L.b }
 402    }
 403    if (c === '<' && c1 === '<') {
 404      advance(L)
 405      advance(L)
 406      return { type: 'OP', value: '<<', start, end: L.b }
 407    }
 408    if (c === '<' && c1 === '&' && c2 === '-') {
 409      advance(L)
 410      advance(L)
 411      advance(L)
 412      return { type: 'OP', value: '<&-', start, end: L.b }
 413    }
 414    if (c === '<' && c1 === '&') {
 415      advance(L)
 416      advance(L)
 417      return { type: 'OP', value: '<&', start, end: L.b }
 418    }
 419    if (c === '<' && c1 === '(') {
 420      advance(L)
 421      advance(L)
 422      return { type: 'LT_PAREN', value: '<(', start, end: L.b }
 423    }
 424    if (c === '>' && c1 === '(') {
 425      advance(L)
 426      advance(L)
 427      return { type: 'GT_PAREN', value: '>(', start, end: L.b }
 428    }
 429    if (c === '(' && c1 === '(') {
 430      advance(L)
 431      advance(L)
 432      return { type: 'OP', value: '((', start, end: L.b }
 433    }
 434    if (c === ')' && c1 === ')') {
 435      advance(L)
 436      advance(L)
 437      return { type: 'OP', value: '))', start, end: L.b }
 438    }
 439  
 440    if (c === '|' || c === '&' || c === ';' || c === '>' || c === '<') {
 441      advance(L)
 442      return { type: 'OP', value: c, start, end: L.b }
 443    }
 444    if (c === '(' || c === ')') {
 445      advance(L)
 446      return { type: 'OP', value: c, start, end: L.b }
 447    }
 448  
 449    // In cmd position, [ [[ { start test/group; in arg position they're word chars
 450    if (ctx === 'cmd') {
 451      if (c === '[' && c1 === '[') {
 452        advance(L)
 453        advance(L)
 454        return { type: 'OP', value: '[[', start, end: L.b }
 455      }
 456      if (c === '[') {
 457        advance(L)
 458        return { type: 'OP', value: '[', start, end: L.b }
 459      }
 460      if (c === '{' && (c1 === ' ' || c1 === '\t' || c1 === '\n')) {
 461        advance(L)
 462        return { type: 'OP', value: '{', start, end: L.b }
 463      }
 464      if (c === '}') {
 465        advance(L)
 466        return { type: 'OP', value: '}', start, end: L.b }
 467      }
 468      if (c === '!' && (c1 === ' ' || c1 === '\t')) {
 469        advance(L)
 470        return { type: 'OP', value: '!', start, end: L.b }
 471      }
 472    }
 473  
 474    if (c === '"') {
 475      advance(L)
 476      return { type: 'DQUOTE', value: '"', start, end: L.b }
 477    }
 478    if (c === "'") {
 479      const si = L.i
 480      advance(L)
 481      while (L.i < L.len && L.src[L.i] !== "'") advance(L)
 482      if (L.i < L.len) advance(L)
 483      return {
 484        type: 'SQUOTE',
 485        value: L.src.slice(si, L.i),
 486        start,
 487        end: L.b,
 488      }
 489    }
 490  
 491    if (c === '$') {
 492      if (c1 === '(' && c2 === '(') {
 493        advance(L)
 494        advance(L)
 495        advance(L)
 496        return { type: 'DOLLAR_DPAREN', value: '$((', start, end: L.b }
 497      }
 498      if (c1 === '(') {
 499        advance(L)
 500        advance(L)
 501        return { type: 'DOLLAR_PAREN', value: '$(', start, end: L.b }
 502      }
 503      if (c1 === '{') {
 504        advance(L)
 505        advance(L)
 506        return { type: 'DOLLAR_BRACE', value: '${', start, end: L.b }
 507      }
 508      if (c1 === "'") {
 509        // ANSI-C string $'...'
 510        const si = L.i
 511        advance(L)
 512        advance(L)
 513        while (L.i < L.len && L.src[L.i] !== "'") {
 514          if (L.src[L.i] === '\\' && L.i + 1 < L.len) advance(L)
 515          advance(L)
 516        }
 517        if (L.i < L.len) advance(L)
 518        return {
 519          type: 'ANSI_C',
 520          value: L.src.slice(si, L.i),
 521          start,
 522          end: L.b,
 523        }
 524      }
 525      advance(L)
 526      return { type: 'DOLLAR', value: '$', start, end: L.b }
 527    }
 528  
 529    if (c === '`') {
 530      advance(L)
 531      return { type: 'BACKTICK', value: '`', start, end: L.b }
 532    }
 533  
 534    // File descriptor before redirect: digit+ immediately followed by > or <
 535    if (isDigit(c)) {
 536      let j = L.i
 537      while (j < L.len && isDigit(L.src[j]!)) j++
 538      const after = j < L.len ? L.src[j]! : ''
 539      if (after === '>' || after === '<') {
 540        const si = L.i
 541        while (L.i < j) advance(L)
 542        return {
 543          type: 'WORD',
 544          value: L.src.slice(si, L.i),
 545          start,
 546          end: L.b,
 547        }
 548      }
 549    }
 550  
 551    // Word / number
 552    if (isWordStart(c) || c === '{' || c === '}') {
 553      const si = L.i
 554      while (L.i < L.len) {
 555        const ch = L.src[L.i]!
 556        if (ch === '\\') {
 557          if (L.i + 1 >= L.len) {
 558            // Trailing `\` at EOF — tree-sitter excludes it from the word and
 559            // emits a sibling ERROR. Stop here so the word ends before `\`.
 560            break
 561          }
 562          // Escape next char (including \n for line continuation mid-word)
 563          if (L.src[L.i + 1] === '\n') {
 564            advance(L)
 565            advance(L)
 566            continue
 567          }
 568          advance(L)
 569          advance(L)
 570          continue
 571        }
 572        if (!isWordChar(ch) && ch !== '{' && ch !== '}') {
 573          break
 574        }
 575        advance(L)
 576      }
 577      if (L.i > si) {
 578        const v = L.src.slice(si, L.i)
 579        // Number: optional sign then digits only
 580        if (/^-?\d+$/.test(v)) {
 581          return { type: 'NUMBER', value: v, start, end: L.b }
 582        }
 583        return { type: 'WORD', value: v, start, end: L.b }
 584      }
 585      // Empty word (lone `\` at EOF) — fall through to single-char consumer
 586    }
 587  
 588    // Unknown char — consume as single-char word
 589    advance(L)
 590    return { type: 'WORD', value: c, start, end: L.b }
 591  }
 592  
 593  // ───────────────────────────── Parser ─────────────────────────────
 594  
 595  type ParseState = {
 596    L: Lexer
 597    src: string
 598    srcBytes: number
 599    /** True when byte offsets == char indices (no multi-byte UTF-8) */
 600    isAscii: boolean
 601    nodeCount: number
 602    deadline: number
 603    aborted: boolean
 604    /** Depth of backtick nesting — inside `...`, ` terminates words */
 605    inBacktick: number
 606    /** When set, parseSimpleCommand stops at this token (for `[` backtrack) */
 607    stopToken: string | null
 608  }
 609  
 610  function parseSource(source: string, timeoutMs?: number): TsNode | null {
 611    const L = makeLexer(source)
 612    const srcBytes = byteLengthUtf8(source)
 613    const P: ParseState = {
 614      L,
 615      src: source,
 616      srcBytes,
 617      isAscii: srcBytes === source.length,
 618      nodeCount: 0,
 619      deadline: performance.now() + (timeoutMs ?? PARSE_TIMEOUT_MS),
 620      aborted: false,
 621      inBacktick: 0,
 622      stopToken: null,
 623    }
 624    try {
 625      const program = parseProgram(P)
 626      if (P.aborted) return null
 627      return program
 628    } catch {
 629      return null
 630    }
 631  }
 632  
 633  function byteLengthUtf8(s: string): number {
 634    let b = 0
 635    for (let i = 0; i < s.length; i++) {
 636      const c = s.charCodeAt(i)
 637      if (c < 0x80) b++
 638      else if (c < 0x800) b += 2
 639      else if (c >= 0xd800 && c <= 0xdbff) {
 640        b += 4
 641        i++
 642      } else b += 3
 643    }
 644    return b
 645  }
 646  
 647  function checkBudget(P: ParseState): void {
 648    P.nodeCount++
 649    if (P.nodeCount > MAX_NODES) {
 650      P.aborted = true
 651      throw new Error('budget')
 652    }
 653    if ((P.nodeCount & 0x7f) === 0 && performance.now() > P.deadline) {
 654      P.aborted = true
 655      throw new Error('timeout')
 656    }
 657  }
 658  
 659  /** Build a node. Slices text from source by byte range via char-index lookup. */
 660  function mk(
 661    P: ParseState,
 662    type: string,
 663    start: number,
 664    end: number,
 665    children: TsNode[],
 666  ): TsNode {
 667    checkBudget(P)
 668    return {
 669      type,
 670      text: sliceBytes(P, start, end),
 671      startIndex: start,
 672      endIndex: end,
 673      children,
 674    }
 675  }
 676  
 677  function sliceBytes(P: ParseState, startByte: number, endByte: number): string {
 678    if (P.isAscii) return P.src.slice(startByte, endByte)
 679    // Find char indices for byte offsets. Build byte table if needed.
 680    const L = P.L
 681    if (!L.byteTable) byteAt(L, 0)
 682    const t = L.byteTable!
 683    // Binary search for char index where byte offset matches
 684    let lo = 0
 685    let hi = P.src.length
 686    while (lo < hi) {
 687      const m = (lo + hi) >>> 1
 688      if (t[m]! < startByte) lo = m + 1
 689      else hi = m
 690    }
 691    const sc = lo
 692    lo = sc
 693    hi = P.src.length
 694    while (lo < hi) {
 695      const m = (lo + hi) >>> 1
 696      if (t[m]! < endByte) lo = m + 1
 697      else hi = m
 698    }
 699    return P.src.slice(sc, lo)
 700  }
 701  
 702  function leaf(P: ParseState, type: string, tok: Token): TsNode {
 703    return mk(P, type, tok.start, tok.end, [])
 704  }
 705  
 706  function parseProgram(P: ParseState): TsNode {
 707    const children: TsNode[] = []
 708    // Skip leading whitespace & newlines — program start is first content byte
 709    skipBlanks(P.L)
 710    while (true) {
 711      const save = saveLex(P.L)
 712      const t = nextToken(P.L, 'cmd')
 713      if (t.type === 'NEWLINE') {
 714        skipBlanks(P.L)
 715        continue
 716      }
 717      restoreLex(P.L, save)
 718      break
 719    }
 720    const progStart = P.L.b
 721    while (P.L.i < P.L.len) {
 722      const save = saveLex(P.L)
 723      const t = nextToken(P.L, 'cmd')
 724      if (t.type === 'EOF') break
 725      if (t.type === 'NEWLINE') continue
 726      if (t.type === 'COMMENT') {
 727        children.push(leaf(P, 'comment', t))
 728        continue
 729      }
 730      restoreLex(P.L, save)
 731      const stmts = parseStatements(P, null)
 732      for (const s of stmts) children.push(s)
 733      if (stmts.length === 0) {
 734        // Couldn't parse — emit ERROR and skip one token
 735        const errTok = nextToken(P.L, 'cmd')
 736        if (errTok.type === 'EOF') break
 737        // Stray `;;` at program level (e.g., `var=;;` outside case) — tree-sitter
 738        // silently elides. Keep leading `;` as ERROR (security: paste artifact).
 739        if (
 740          errTok.type === 'OP' &&
 741          errTok.value === ';;' &&
 742          children.length > 0
 743        ) {
 744          continue
 745        }
 746        children.push(mk(P, 'ERROR', errTok.start, errTok.end, []))
 747      }
 748    }
 749    // tree-sitter includes trailing whitespace in program extent
 750    const progEnd = children.length > 0 ? P.srcBytes : progStart
 751    return mk(P, 'program', progStart, progEnd, children)
 752  }
 753  
 754  /** Packed as (b << 16) | i — avoids heap alloc on every backtrack. */
 755  type LexSave = number
 756  function saveLex(L: Lexer): LexSave {
 757    return L.b * 0x10000 + L.i
 758  }
 759  function restoreLex(L: Lexer, s: LexSave): void {
 760    L.i = s & 0xffff
 761    L.b = s >>> 16
 762  }
 763  
 764  /**
 765   * Parse a sequence of statements separated by ; & newline. Returns a flat list
 766   * where ; and & are sibling leaves (NOT wrapped in 'list' — only && || get
 767   * that). Stops at terminator or EOF.
 768   */
 769  function parseStatements(P: ParseState, terminator: string | null): TsNode[] {
 770    const out: TsNode[] = []
 771    while (true) {
 772      skipBlanks(P.L)
 773      const save = saveLex(P.L)
 774      const t = nextToken(P.L, 'cmd')
 775      if (t.type === 'EOF') {
 776        restoreLex(P.L, save)
 777        break
 778      }
 779      if (t.type === 'NEWLINE') {
 780        // Process pending heredocs
 781        if (P.L.heredocs.length > 0) {
 782          scanHeredocBodies(P)
 783        }
 784        continue
 785      }
 786      if (t.type === 'COMMENT') {
 787        out.push(leaf(P, 'comment', t))
 788        continue
 789      }
 790      if (terminator && t.type === 'OP' && t.value === terminator) {
 791        restoreLex(P.L, save)
 792        break
 793      }
 794      if (
 795        t.type === 'OP' &&
 796        (t.value === ')' ||
 797          t.value === '}' ||
 798          t.value === ';;' ||
 799          t.value === ';&' ||
 800          t.value === ';;&' ||
 801          t.value === '))' ||
 802          t.value === ']]' ||
 803          t.value === ']')
 804      ) {
 805        restoreLex(P.L, save)
 806        break
 807      }
 808      if (t.type === 'BACKTICK' && P.inBacktick > 0) {
 809        restoreLex(P.L, save)
 810        break
 811      }
 812      if (
 813        t.type === 'WORD' &&
 814        (t.value === 'then' ||
 815          t.value === 'elif' ||
 816          t.value === 'else' ||
 817          t.value === 'fi' ||
 818          t.value === 'do' ||
 819          t.value === 'done' ||
 820          t.value === 'esac')
 821      ) {
 822        restoreLex(P.L, save)
 823        break
 824      }
 825      restoreLex(P.L, save)
 826      const stmt = parseAndOr(P)
 827      if (!stmt) break
 828      out.push(stmt)
 829      // Look for separator
 830      skipBlanks(P.L)
 831      const save2 = saveLex(P.L)
 832      const sep = nextToken(P.L, 'cmd')
 833      if (sep.type === 'OP' && (sep.value === ';' || sep.value === '&')) {
 834        // Check if terminator follows — if so, emit separator but stop
 835        const save3 = saveLex(P.L)
 836        const after = nextToken(P.L, 'cmd')
 837        restoreLex(P.L, save3)
 838        out.push(leaf(P, sep.value, sep))
 839        if (
 840          after.type === 'EOF' ||
 841          (after.type === 'OP' &&
 842            (after.value === ')' ||
 843              after.value === '}' ||
 844              after.value === ';;' ||
 845              after.value === ';&' ||
 846              after.value === ';;&')) ||
 847          (after.type === 'WORD' &&
 848            (after.value === 'then' ||
 849              after.value === 'elif' ||
 850              after.value === 'else' ||
 851              after.value === 'fi' ||
 852              after.value === 'do' ||
 853              after.value === 'done' ||
 854              after.value === 'esac'))
 855        ) {
 856          // Trailing separator — don't include it at program level unless
 857          // there's content after. But at inner levels we keep it.
 858          continue
 859        }
 860      } else if (sep.type === 'NEWLINE') {
 861        if (P.L.heredocs.length > 0) {
 862          scanHeredocBodies(P)
 863        }
 864        continue
 865      } else {
 866        restoreLex(P.L, save2)
 867      }
 868    }
 869    // Trim trailing separator if at program level
 870    return out
 871  }
 872  
 873  /**
 874   * Parse pipeline chains joined by && ||. Left-associative nesting.
 875   * tree-sitter quirk: trailing redirect on the last pipeline wraps the ENTIRE
 876   * list in a redirected_statement — `a > x && b > y` becomes
 877   * redirected_statement(list(redirected_statement(a,>x), &&, b), >y).
 878   */
 879  function parseAndOr(P: ParseState): TsNode | null {
 880    let left = parsePipeline(P)
 881    if (!left) return null
 882    while (true) {
 883      const save = saveLex(P.L)
 884      const t = nextToken(P.L, 'cmd')
 885      if (t.type === 'OP' && (t.value === '&&' || t.value === '||')) {
 886        const op = leaf(P, t.value, t)
 887        skipNewlines(P)
 888        const right = parsePipeline(P)
 889        if (!right) {
 890          left = mk(P, 'list', left.startIndex, op.endIndex, [left, op])
 891          break
 892        }
 893        // If right is a redirected_statement, hoist its redirects to wrap the list.
 894        if (right.type === 'redirected_statement' && right.children.length >= 2) {
 895          const inner = right.children[0]!
 896          const redirs = right.children.slice(1)
 897          const listNode = mk(P, 'list', left.startIndex, inner.endIndex, [
 898            left,
 899            op,
 900            inner,
 901          ])
 902          const lastR = redirs[redirs.length - 1]!
 903          left = mk(
 904            P,
 905            'redirected_statement',
 906            listNode.startIndex,
 907            lastR.endIndex,
 908            [listNode, ...redirs],
 909          )
 910        } else {
 911          left = mk(P, 'list', left.startIndex, right.endIndex, [left, op, right])
 912        }
 913      } else {
 914        restoreLex(P.L, save)
 915        break
 916      }
 917    }
 918    return left
 919  }
 920  
 921  function skipNewlines(P: ParseState): void {
 922    while (true) {
 923      const save = saveLex(P.L)
 924      const t = nextToken(P.L, 'cmd')
 925      if (t.type !== 'NEWLINE') {
 926        restoreLex(P.L, save)
 927        break
 928      }
 929    }
 930  }
 931  
 932  /**
 933   * Parse commands joined by | or |&. Flat children with operator leaves.
 934   * tree-sitter quirk: `a | b 2>nul | c` hoists the redirect on `b` to wrap
 935   * the preceding pipeline fragment — pipeline(redirected_statement(
 936   * pipeline(a,|,b), 2>nul), |, c).
 937   */
 938  function parsePipeline(P: ParseState): TsNode | null {
 939    let first = parseCommand(P)
 940    if (!first) return null
 941    const parts: TsNode[] = [first]
 942    while (true) {
 943      const save = saveLex(P.L)
 944      const t = nextToken(P.L, 'cmd')
 945      if (t.type === 'OP' && (t.value === '|' || t.value === '|&')) {
 946        const op = leaf(P, t.value, t)
 947        skipNewlines(P)
 948        const next = parseCommand(P)
 949        if (!next) {
 950          parts.push(op)
 951          break
 952        }
 953        // Hoist trailing redirect on `next` to wrap current pipeline fragment
 954        if (
 955          next.type === 'redirected_statement' &&
 956          next.children.length >= 2 &&
 957          parts.length >= 1
 958        ) {
 959          const inner = next.children[0]!
 960          const redirs = next.children.slice(1)
 961          // Wrap existing parts + op + inner as a pipeline
 962          const pipeKids = [...parts, op, inner]
 963          const pipeNode = mk(
 964            P,
 965            'pipeline',
 966            pipeKids[0]!.startIndex,
 967            inner.endIndex,
 968            pipeKids,
 969          )
 970          const lastR = redirs[redirs.length - 1]!
 971          const wrapped = mk(
 972            P,
 973            'redirected_statement',
 974            pipeNode.startIndex,
 975            lastR.endIndex,
 976            [pipeNode, ...redirs],
 977          )
 978          parts.length = 0
 979          parts.push(wrapped)
 980          first = wrapped
 981          continue
 982        }
 983        parts.push(op, next)
 984      } else {
 985        restoreLex(P.L, save)
 986        break
 987      }
 988    }
 989    if (parts.length === 1) return parts[0]!
 990    const last = parts[parts.length - 1]!
 991    return mk(P, 'pipeline', parts[0]!.startIndex, last.endIndex, parts)
 992  }
 993  
 994  /** Parse a single command: simple, compound, or control structure. */
 995  function parseCommand(P: ParseState): TsNode | null {
 996    skipBlanks(P.L)
 997    const save = saveLex(P.L)
 998    const t = nextToken(P.L, 'cmd')
 999  
1000    if (t.type === 'EOF') {
1001      restoreLex(P.L, save)
1002      return null
1003    }
1004  
1005    // Negation — tree-sitter wraps just the command, redirects go outside.
1006    // `! cmd > out` → redirected_statement(negated_command(!, cmd), >out)
1007    if (t.type === 'OP' && t.value === '!') {
1008      const bang = leaf(P, '!', t)
1009      const inner = parseCommand(P)
1010      if (!inner) {
1011        restoreLex(P.L, save)
1012        return null
1013      }
1014      // If inner is a redirected_statement, hoist redirects outside negation
1015      if (inner.type === 'redirected_statement' && inner.children.length >= 2) {
1016        const cmd = inner.children[0]!
1017        const redirs = inner.children.slice(1)
1018        const neg = mk(P, 'negated_command', bang.startIndex, cmd.endIndex, [
1019          bang,
1020          cmd,
1021        ])
1022        const lastR = redirs[redirs.length - 1]!
1023        return mk(P, 'redirected_statement', neg.startIndex, lastR.endIndex, [
1024          neg,
1025          ...redirs,
1026        ])
1027      }
1028      return mk(P, 'negated_command', bang.startIndex, inner.endIndex, [
1029        bang,
1030        inner,
1031      ])
1032    }
1033  
1034    if (t.type === 'OP' && t.value === '(') {
1035      const open = leaf(P, '(', t)
1036      const body = parseStatements(P, ')')
1037      const closeTok = nextToken(P.L, 'cmd')
1038      const close =
1039        closeTok.type === 'OP' && closeTok.value === ')'
1040          ? leaf(P, ')', closeTok)
1041          : mk(P, ')', open.endIndex, open.endIndex, [])
1042      const node = mk(P, 'subshell', open.startIndex, close.endIndex, [
1043        open,
1044        ...body,
1045        close,
1046      ])
1047      return maybeRedirect(P, node)
1048    }
1049  
1050    if (t.type === 'OP' && t.value === '((') {
1051      const open = leaf(P, '((', t)
1052      const exprs = parseArithCommaList(P, '))', 'var')
1053      const closeTok = nextToken(P.L, 'cmd')
1054      const close =
1055        closeTok.value === '))'
1056          ? leaf(P, '))', closeTok)
1057          : mk(P, '))', open.endIndex, open.endIndex, [])
1058      return mk(P, 'compound_statement', open.startIndex, close.endIndex, [
1059        open,
1060        ...exprs,
1061        close,
1062      ])
1063    }
1064  
1065    if (t.type === 'OP' && t.value === '{') {
1066      const open = leaf(P, '{', t)
1067      const body = parseStatements(P, '}')
1068      const closeTok = nextToken(P.L, 'cmd')
1069      const close =
1070        closeTok.type === 'OP' && closeTok.value === '}'
1071          ? leaf(P, '}', closeTok)
1072          : mk(P, '}', open.endIndex, open.endIndex, [])
1073      const node = mk(P, 'compound_statement', open.startIndex, close.endIndex, [
1074        open,
1075        ...body,
1076        close,
1077      ])
1078      return maybeRedirect(P, node)
1079    }
1080  
1081    if (t.type === 'OP' && (t.value === '[' || t.value === '[[')) {
1082      const open = leaf(P, t.value, t)
1083      const closer = t.value === '[' ? ']' : ']]'
1084      // Grammar: `[` can contain choice(_expression, redirected_statement).
1085      // Try _expression first; if we don't reach `]`, backtrack and parse as
1086      // redirected_statement (handles `[ ! cmd -v go &>/dev/null ]`).
1087      const exprSave = saveLex(P.L)
1088      let expr = parseTestExpr(P, closer)
1089      skipBlanks(P.L)
1090      if (t.value === '[' && peek(P.L) !== ']') {
1091        // Expression parse didn't reach `]` — try as redirected_statement.
1092        // Thread `]` stop-token so parseSimpleCommand doesn't eat it as arg.
1093        restoreLex(P.L, exprSave)
1094        const prevStop = P.stopToken
1095        P.stopToken = ']'
1096        const rstmt = parseCommand(P)
1097        P.stopToken = prevStop
1098        if (rstmt && rstmt.type === 'redirected_statement') {
1099          expr = rstmt
1100        } else {
1101          // Neither worked — restore and keep the expression result
1102          restoreLex(P.L, exprSave)
1103          expr = parseTestExpr(P, closer)
1104        }
1105        skipBlanks(P.L)
1106      }
1107      const closeTok = nextToken(P.L, 'arg')
1108      let close: TsNode
1109      if (closeTok.value === closer) {
1110        close = leaf(P, closer, closeTok)
1111      } else {
1112        close = mk(P, closer, open.endIndex, open.endIndex, [])
1113      }
1114      const kids = expr ? [open, expr, close] : [open, close]
1115      return mk(P, 'test_command', open.startIndex, close.endIndex, kids)
1116    }
1117  
1118    if (t.type === 'WORD') {
1119      if (t.value === 'if') return maybeRedirect(P, parseIf(P, t), true)
1120      if (t.value === 'while' || t.value === 'until')
1121        return maybeRedirect(P, parseWhile(P, t), true)
1122      if (t.value === 'for') return maybeRedirect(P, parseFor(P, t), true)
1123      if (t.value === 'select') return maybeRedirect(P, parseFor(P, t), true)
1124      if (t.value === 'case') return maybeRedirect(P, parseCase(P, t), true)
1125      if (t.value === 'function') return parseFunction(P, t)
1126      if (DECL_KEYWORDS.has(t.value))
1127        return maybeRedirect(P, parseDeclaration(P, t))
1128      if (t.value === 'unset' || t.value === 'unsetenv') {
1129        return maybeRedirect(P, parseUnset(P, t))
1130      }
1131    }
1132  
1133    restoreLex(P.L, save)
1134    return parseSimpleCommand(P)
1135  }
1136  
1137  /**
1138   * Parse a simple command: [assignment]* word [arg|redirect]*
1139   * Returns variable_assignment if only one assignment and no command.
1140   */
1141  function parseSimpleCommand(P: ParseState): TsNode | null {
1142    const start = P.L.b
1143    const assignments: TsNode[] = []
1144    const preRedirects: TsNode[] = []
1145  
1146    while (true) {
1147      skipBlanks(P.L)
1148      const a = tryParseAssignment(P)
1149      if (a) {
1150        assignments.push(a)
1151        continue
1152      }
1153      const r = tryParseRedirect(P)
1154      if (r) {
1155        preRedirects.push(r)
1156        continue
1157      }
1158      break
1159    }
1160  
1161    skipBlanks(P.L)
1162    const save = saveLex(P.L)
1163    const nameTok = nextToken(P.L, 'cmd')
1164    if (
1165      nameTok.type === 'EOF' ||
1166      nameTok.type === 'NEWLINE' ||
1167      nameTok.type === 'COMMENT' ||
1168      (nameTok.type === 'OP' &&
1169        nameTok.value !== '{' &&
1170        nameTok.value !== '[' &&
1171        nameTok.value !== '[[') ||
1172      (nameTok.type === 'WORD' &&
1173        SHELL_KEYWORDS.has(nameTok.value) &&
1174        nameTok.value !== 'in')
1175    ) {
1176      restoreLex(P.L, save)
1177      // No command — standalone assignment(s) or redirect
1178      if (assignments.length === 1 && preRedirects.length === 0) {
1179        return assignments[0]!
1180      }
1181      if (preRedirects.length > 0 && assignments.length === 0) {
1182        // Bare redirect → redirected_statement with just file_redirect children
1183        const last = preRedirects[preRedirects.length - 1]!
1184        return mk(
1185          P,
1186          'redirected_statement',
1187          preRedirects[0]!.startIndex,
1188          last.endIndex,
1189          preRedirects,
1190        )
1191      }
1192      if (assignments.length > 1 && preRedirects.length === 0) {
1193        // `A=1 B=2` with no command → variable_assignments (plural)
1194        const last = assignments[assignments.length - 1]!
1195        return mk(
1196          P,
1197          'variable_assignments',
1198          assignments[0]!.startIndex,
1199          last.endIndex,
1200          assignments,
1201        )
1202      }
1203      if (assignments.length > 0 || preRedirects.length > 0) {
1204        const all = [...assignments, ...preRedirects]
1205        const last = all[all.length - 1]!
1206        return mk(P, 'command', start, last.endIndex, all)
1207      }
1208      return null
1209    }
1210    restoreLex(P.L, save)
1211  
1212    // Check for function definition: name() { ... }
1213    const fnSave = saveLex(P.L)
1214    const nm = parseWord(P, 'cmd')
1215    if (nm && nm.type === 'word') {
1216      skipBlanks(P.L)
1217      if (peek(P.L) === '(' && peek(P.L, 1) === ')') {
1218        const oTok = nextToken(P.L, 'cmd')
1219        const cTok = nextToken(P.L, 'cmd')
1220        const oParen = leaf(P, '(', oTok)
1221        const cParen = leaf(P, ')', cTok)
1222        skipBlanks(P.L)
1223        skipNewlines(P)
1224        const body = parseCommand(P)
1225        if (body) {
1226          // If body is redirected_statement(compound_statement, file_redirect...),
1227          // hoist redirects to function_definition level per tree-sitter grammar
1228          let bodyKids: TsNode[] = [body]
1229          if (
1230            body.type === 'redirected_statement' &&
1231            body.children.length >= 2 &&
1232            body.children[0]!.type === 'compound_statement'
1233          ) {
1234            bodyKids = body.children
1235          }
1236          const last = bodyKids[bodyKids.length - 1]!
1237          return mk(P, 'function_definition', nm.startIndex, last.endIndex, [
1238            nm,
1239            oParen,
1240            cParen,
1241            ...bodyKids,
1242          ])
1243        }
1244      }
1245    }
1246    restoreLex(P.L, fnSave)
1247  
1248    const nameArg = parseWord(P, 'cmd')
1249    if (!nameArg) {
1250      if (assignments.length === 1) return assignments[0]!
1251      return null
1252    }
1253  
1254    const cmdName = mk(P, 'command_name', nameArg.startIndex, nameArg.endIndex, [
1255      nameArg,
1256    ])
1257  
1258    const args: TsNode[] = []
1259    const redirects: TsNode[] = []
1260    let heredocRedirect: TsNode | null = null
1261  
1262    while (true) {
1263      skipBlanks(P.L)
1264      // Post-command redirects are greedy (repeat1 $._literal) — once a redirect
1265      // appears after command_name, subsequent literals attach to it per grammar's
1266      // prec.left. `grep 2>/dev/null -q foo` → file_redirect eats `-q foo`.
1267      // Args parsed BEFORE the first redirect still go to command (cat a b > out).
1268      const r = tryParseRedirect(P, true)
1269      if (r) {
1270        if (r.type === 'heredoc_redirect') {
1271          heredocRedirect = r
1272        } else if (r.type === 'herestring_redirect') {
1273          args.push(r)
1274        } else {
1275          redirects.push(r)
1276        }
1277        continue
1278      }
1279      // Once a file_redirect has been seen, command args are done — grammar's
1280      // command rule doesn't allow file_redirect in its post-name choice, so
1281      // anything after belongs to redirected_statement's file_redirect children.
1282      if (redirects.length > 0) break
1283      // `[` test_command backtrack — stop at `]` so outer handler can consume it
1284      if (P.stopToken === ']' && peek(P.L) === ']') break
1285      const save2 = saveLex(P.L)
1286      const pk = nextToken(P.L, 'arg')
1287      if (
1288        pk.type === 'EOF' ||
1289        pk.type === 'NEWLINE' ||
1290        pk.type === 'COMMENT' ||
1291        (pk.type === 'OP' &&
1292          (pk.value === '|' ||
1293            pk.value === '|&' ||
1294            pk.value === '&&' ||
1295            pk.value === '||' ||
1296            pk.value === ';' ||
1297            pk.value === ';;' ||
1298            pk.value === ';&' ||
1299            pk.value === ';;&' ||
1300            pk.value === '&' ||
1301            pk.value === ')' ||
1302            pk.value === '}' ||
1303            pk.value === '))'))
1304      ) {
1305        restoreLex(P.L, save2)
1306        break
1307      }
1308      restoreLex(P.L, save2)
1309      const arg = parseWord(P, 'arg')
1310      if (!arg) {
1311        // Lone `(` in arg position — tree-sitter parses this as subshell arg
1312        // e.g., `echo =(cmd)` → command has ERROR(=), subshell(cmd) as args
1313        if (peek(P.L) === '(') {
1314          const oTok = nextToken(P.L, 'cmd')
1315          const open = leaf(P, '(', oTok)
1316          const body = parseStatements(P, ')')
1317          const cTok = nextToken(P.L, 'cmd')
1318          const close =
1319            cTok.type === 'OP' && cTok.value === ')'
1320              ? leaf(P, ')', cTok)
1321              : mk(P, ')', open.endIndex, open.endIndex, [])
1322          args.push(
1323            mk(P, 'subshell', open.startIndex, close.endIndex, [
1324              open,
1325              ...body,
1326              close,
1327            ]),
1328          )
1329          continue
1330        }
1331        break
1332      }
1333      // Lone `=` in arg position is a parse error in bash — tree-sitter wraps
1334      // it in ERROR for recovery. Happens in `echo =(cmd)` (zsh process-sub).
1335      if (arg.type === 'word' && arg.text === '=') {
1336        args.push(mk(P, 'ERROR', arg.startIndex, arg.endIndex, [arg]))
1337        continue
1338      }
1339      // Word immediately followed by `(` (no whitespace) is a parse error —
1340      // bash doesn't allow glob-then-subshell adjacency. tree-sitter wraps the
1341      // word in ERROR. Catches zsh glob qualifiers like `*.(e:'cmd':)`.
1342      if (
1343        (arg.type === 'word' || arg.type === 'concatenation') &&
1344        peek(P.L) === '(' &&
1345        P.L.b === arg.endIndex
1346      ) {
1347        args.push(mk(P, 'ERROR', arg.startIndex, arg.endIndex, [arg]))
1348        continue
1349      }
1350      args.push(arg)
1351    }
1352  
1353    // preRedirects (e.g., `2>&1 cat`, `<<<str cmd`) go INSIDE the command node
1354    // before command_name per tree-sitter grammar, not in redirected_statement
1355    const cmdChildren = [...assignments, ...preRedirects, cmdName, ...args]
1356    const cmdEnd =
1357      cmdChildren.length > 0
1358        ? cmdChildren[cmdChildren.length - 1]!.endIndex
1359        : cmdName.endIndex
1360    const cmdStart = cmdChildren[0]!.startIndex
1361    const cmd = mk(P, 'command', cmdStart, cmdEnd, cmdChildren)
1362  
1363    if (heredocRedirect) {
1364      // Scan heredoc body now
1365      scanHeredocBodies(P)
1366      const hd = P.L.heredocs.shift()
1367      if (hd && heredocRedirect.children.length >= 2) {
1368        const bodyNode = mk(
1369          P,
1370          'heredoc_body',
1371          hd.bodyStart,
1372          hd.bodyEnd,
1373          hd.quoted ? [] : parseHeredocBodyContent(P, hd.bodyStart, hd.bodyEnd),
1374        )
1375        const endNode = mk(P, 'heredoc_end', hd.endStart, hd.endEnd, [])
1376        heredocRedirect.children.push(bodyNode, endNode)
1377        heredocRedirect.endIndex = hd.endEnd
1378        heredocRedirect.text = sliceBytes(
1379          P,
1380          heredocRedirect.startIndex,
1381          hd.endEnd,
1382        )
1383      }
1384      const allR = [...preRedirects, heredocRedirect, ...redirects]
1385      const rStart =
1386        preRedirects.length > 0
1387          ? Math.min(cmd.startIndex, preRedirects[0]!.startIndex)
1388          : cmd.startIndex
1389      return mk(P, 'redirected_statement', rStart, heredocRedirect.endIndex, [
1390        cmd,
1391        ...allR,
1392      ])
1393    }
1394  
1395    if (redirects.length > 0) {
1396      const last = redirects[redirects.length - 1]!
1397      return mk(P, 'redirected_statement', cmd.startIndex, last.endIndex, [
1398        cmd,
1399        ...redirects,
1400      ])
1401    }
1402  
1403    return cmd
1404  }
1405  
1406  function maybeRedirect(
1407    P: ParseState,
1408    node: TsNode,
1409    allowHerestring = false,
1410  ): TsNode {
1411    const redirects: TsNode[] = []
1412    while (true) {
1413      skipBlanks(P.L)
1414      const save = saveLex(P.L)
1415      const r = tryParseRedirect(P)
1416      if (!r) break
1417      if (r.type === 'herestring_redirect' && !allowHerestring) {
1418        restoreLex(P.L, save)
1419        break
1420      }
1421      redirects.push(r)
1422    }
1423    if (redirects.length === 0) return node
1424    const last = redirects[redirects.length - 1]!
1425    return mk(P, 'redirected_statement', node.startIndex, last.endIndex, [
1426      node,
1427      ...redirects,
1428    ])
1429  }
1430  
1431  function tryParseAssignment(P: ParseState): TsNode | null {
1432    const save = saveLex(P.L)
1433    skipBlanks(P.L)
1434    const startB = P.L.b
1435    // Must start with identifier
1436    if (!isIdentStart(peek(P.L))) {
1437      restoreLex(P.L, save)
1438      return null
1439    }
1440    while (isIdentChar(peek(P.L))) advance(P.L)
1441    const nameEnd = P.L.b
1442    // Optional subscript
1443    let subEnd = nameEnd
1444    if (peek(P.L) === '[') {
1445      advance(P.L)
1446      let depth = 1
1447      while (P.L.i < P.L.len && depth > 0) {
1448        const c = peek(P.L)
1449        if (c === '[') depth++
1450        else if (c === ']') depth--
1451        advance(P.L)
1452      }
1453      subEnd = P.L.b
1454    }
1455    const c = peek(P.L)
1456    const c1 = peek(P.L, 1)
1457    let op: string
1458    if (c === '=' && c1 !== '=') {
1459      op = '='
1460    } else if (c === '+' && c1 === '=') {
1461      op = '+='
1462    } else {
1463      restoreLex(P.L, save)
1464      return null
1465    }
1466    const nameNode = mk(P, 'variable_name', startB, nameEnd, [])
1467    // Subscript handling: wrap in subscript node if present
1468    let lhs: TsNode = nameNode
1469    if (subEnd > nameEnd) {
1470      const brOpen = mk(P, '[', nameEnd, nameEnd + 1, [])
1471      const idx = parseSubscriptIndex(P, nameEnd + 1, subEnd - 1)
1472      const brClose = mk(P, ']', subEnd - 1, subEnd, [])
1473      lhs = mk(P, 'subscript', startB, subEnd, [nameNode, brOpen, idx, brClose])
1474    }
1475    const opStart = P.L.b
1476    advance(P.L)
1477    if (op === '+=') advance(P.L)
1478    const opEnd = P.L.b
1479    const opNode = mk(P, op, opStart, opEnd, [])
1480    let val: TsNode | null = null
1481    if (peek(P.L) === '(') {
1482      // Array
1483      const aoTok = nextToken(P.L, 'cmd')
1484      const aOpen = leaf(P, '(', aoTok)
1485      const elems: TsNode[] = [aOpen]
1486      while (true) {
1487        skipBlanks(P.L)
1488        if (peek(P.L) === ')') break
1489        const e = parseWord(P, 'arg')
1490        if (!e) break
1491        elems.push(e)
1492      }
1493      const acTok = nextToken(P.L, 'cmd')
1494      const aClose =
1495        acTok.value === ')'
1496          ? leaf(P, ')', acTok)
1497          : mk(P, ')', aOpen.endIndex, aOpen.endIndex, [])
1498      elems.push(aClose)
1499      val = mk(P, 'array', aOpen.startIndex, aClose.endIndex, elems)
1500    } else {
1501      const c2 = peek(P.L)
1502      if (
1503        c2 &&
1504        c2 !== ' ' &&
1505        c2 !== '\t' &&
1506        c2 !== '\n' &&
1507        c2 !== ';' &&
1508        c2 !== '&' &&
1509        c2 !== '|' &&
1510        c2 !== ')' &&
1511        c2 !== '}'
1512      ) {
1513        val = parseWord(P, 'arg')
1514      }
1515    }
1516    const kids = val ? [lhs, opNode, val] : [lhs, opNode]
1517    const end = val ? val.endIndex : opEnd
1518    return mk(P, 'variable_assignment', startB, end, kids)
1519  }
1520  
1521  /**
1522   * Parse subscript index content. Parsed arithmetically per tree-sitter grammar:
1523   * `${a[1+2]}` → binary_expression; `${a[++i]}` → unary_expression(word);
1524   * `${a[(($n+1))]}` → compound_statement(binary_expression). Falls back to
1525   * simple patterns (@, *) as word.
1526   */
1527  function parseSubscriptIndexInline(P: ParseState): TsNode | null {
1528    skipBlanks(P.L)
1529    const c = peek(P.L)
1530    // @ or * alone → word (associative array all-keys)
1531    if ((c === '@' || c === '*') && peek(P.L, 1) === ']') {
1532      const s = P.L.b
1533      advance(P.L)
1534      return mk(P, 'word', s, P.L.b, [])
1535    }
1536    // ((expr)) → compound_statement wrapping the inner arithmetic
1537    if (c === '(' && peek(P.L, 1) === '(') {
1538      const oStart = P.L.b
1539      advance(P.L)
1540      advance(P.L)
1541      const open = mk(P, '((', oStart, P.L.b, [])
1542      const inner = parseArithExpr(P, '))', 'var')
1543      skipBlanks(P.L)
1544      let close: TsNode
1545      if (peek(P.L) === ')' && peek(P.L, 1) === ')') {
1546        const cs = P.L.b
1547        advance(P.L)
1548        advance(P.L)
1549        close = mk(P, '))', cs, P.L.b, [])
1550      } else {
1551        close = mk(P, '))', P.L.b, P.L.b, [])
1552      }
1553      const kids = inner ? [open, inner, close] : [open, close]
1554      return mk(P, 'compound_statement', open.startIndex, close.endIndex, kids)
1555    }
1556    // Arithmetic — but bare identifiers in subscript use 'word' mode per
1557    // tree-sitter (${words[++counter]} → unary_expression(word)).
1558    return parseArithExpr(P, ']', 'word')
1559  }
1560  
1561  /** Legacy byte-range subscript index parser — kept for callers that pre-scan. */
1562  function parseSubscriptIndex(
1563    P: ParseState,
1564    startB: number,
1565    endB: number,
1566  ): TsNode {
1567    const text = sliceBytes(P, startB, endB)
1568    if (/^\d+$/.test(text)) return mk(P, 'number', startB, endB, [])
1569    const m = /^\$([a-zA-Z_]\w*)$/.exec(text)
1570    if (m) {
1571      const dollar = mk(P, '$', startB, startB + 1, [])
1572      const vn = mk(P, 'variable_name', startB + 1, endB, [])
1573      return mk(P, 'simple_expansion', startB, endB, [dollar, vn])
1574    }
1575    if (text.length === 2 && text[0] === '$' && SPECIAL_VARS.has(text[1]!)) {
1576      const dollar = mk(P, '$', startB, startB + 1, [])
1577      const vn = mk(P, 'special_variable_name', startB + 1, endB, [])
1578      return mk(P, 'simple_expansion', startB, endB, [dollar, vn])
1579    }
1580    return mk(P, 'word', startB, endB, [])
1581  }
1582  
1583  /**
1584   * Can the current position start a redirect destination literal?
1585   * Returns false at redirect ops, terminators, or file-descriptor-prefixed ops
1586   * so file_redirect's repeat1($._literal) stops at the right boundary.
1587   */
1588  function isRedirectLiteralStart(P: ParseState): boolean {
1589    const c = peek(P.L)
1590    if (c === '' || c === '\n') return false
1591    // Shell terminators and operators
1592    if (c === '|' || c === '&' || c === ';' || c === '(' || c === ')')
1593      return false
1594    // Redirect operators (< > with any suffix; <( >( handled by caller)
1595    if (c === '<' || c === '>') {
1596      // <( >( are process substitutions — those ARE literals
1597      return peek(P.L, 1) === '('
1598    }
1599    // N< N> file descriptor prefix — starts a new redirect, not a literal
1600    if (isDigit(c)) {
1601      let j = P.L.i
1602      while (j < P.L.len && isDigit(P.L.src[j]!)) j++
1603      const after = j < P.L.len ? P.L.src[j]! : ''
1604      if (after === '>' || after === '<') return false
1605    }
1606    // `}` only terminates if we're in a context where it's a closer — but
1607    // file_redirect sees `}` as word char (e.g., `>$HOME}` is valid path char).
1608    // Actually `}` at top level terminates compound_statement — need to stop.
1609    if (c === '}') return false
1610    // Test command closer — when parseSimpleCommand is called from `[` context,
1611    // `]` must terminate so parseCommand can return and `[` handler consume it.
1612    if (P.stopToken === ']' && c === ']') return false
1613    return true
1614  }
1615  
1616  /**
1617   * Parse a redirect operator + destination(s).
1618   * @param greedy When true, file_redirect consumes repeat1($._literal) per
1619   *   grammar's prec.left — `cmd >f a b c` attaches `a b c` to the redirect.
1620   *   When false (preRedirect context), takes only 1 destination because
1621   *   command's dynamic precedence beats redirected_statement's prec(-1).
1622   */
1623  function tryParseRedirect(P: ParseState, greedy = false): TsNode | null {
1624    const save = saveLex(P.L)
1625    skipBlanks(P.L)
1626    // File descriptor prefix?
1627    let fd: TsNode | null = null
1628    if (isDigit(peek(P.L))) {
1629      const startB = P.L.b
1630      let j = P.L.i
1631      while (j < P.L.len && isDigit(P.L.src[j]!)) j++
1632      const after = j < P.L.len ? P.L.src[j]! : ''
1633      if (after === '>' || after === '<') {
1634        while (P.L.i < j) advance(P.L)
1635        fd = mk(P, 'file_descriptor', startB, P.L.b, [])
1636      }
1637    }
1638    const t = nextToken(P.L, 'arg')
1639    if (t.type !== 'OP') {
1640      restoreLex(P.L, save)
1641      return null
1642    }
1643    const v = t.value
1644    if (v === '<<<') {
1645      const op = leaf(P, '<<<', t)
1646      skipBlanks(P.L)
1647      const target = parseWord(P, 'arg')
1648      const end = target ? target.endIndex : op.endIndex
1649      const kids = target ? [op, target] : [op]
1650      return mk(
1651        P,
1652        'herestring_redirect',
1653        fd ? fd.startIndex : op.startIndex,
1654        end,
1655        fd ? [fd, ...kids] : kids,
1656      )
1657    }
1658    if (v === '<<' || v === '<<-') {
1659      const op = leaf(P, v, t)
1660      // Heredoc start — delimiter word (may be quoted)
1661      skipBlanks(P.L)
1662      const dStart = P.L.b
1663      let quoted = false
1664      let delim = ''
1665      const dc = peek(P.L)
1666      if (dc === "'" || dc === '"') {
1667        quoted = true
1668        advance(P.L)
1669        while (P.L.i < P.L.len && peek(P.L) !== dc) {
1670          delim += peek(P.L)
1671          advance(P.L)
1672        }
1673        if (P.L.i < P.L.len) advance(P.L)
1674      } else if (dc === '\\') {
1675        // Backslash-escaped delimiter: \X — exactly one escaped char, body is
1676        // quoted (literal). Covers <<\EOF <<\' <<\\ etc.
1677        quoted = true
1678        advance(P.L)
1679        if (P.L.i < P.L.len && peek(P.L) !== '\n') {
1680          delim += peek(P.L)
1681          advance(P.L)
1682        }
1683        // May be followed by more ident chars (e.g. <<\EOF → delim "EOF")
1684        while (P.L.i < P.L.len && isIdentChar(peek(P.L))) {
1685          delim += peek(P.L)
1686          advance(P.L)
1687        }
1688      } else {
1689        // Unquoted delimiter: bash accepts most non-metacharacters (not just
1690        // identifiers). Allow !, -, ., etc. — stop at shell metachars.
1691        while (P.L.i < P.L.len && isHeredocDelimChar(peek(P.L))) {
1692          delim += peek(P.L)
1693          advance(P.L)
1694        }
1695      }
1696      const dEnd = P.L.b
1697      const startNode = mk(P, 'heredoc_start', dStart, dEnd, [])
1698      // Register pending heredoc — body scanned at next newline
1699      P.L.heredocs.push({
1700        delim,
1701        stripTabs: v === '<<-',
1702        quoted,
1703        bodyStart: 0,
1704        bodyEnd: 0,
1705        endStart: 0,
1706        endEnd: 0,
1707      })
1708      const kids = fd ? [fd, op, startNode] : [op, startNode]
1709      const startIdx = fd ? fd.startIndex : op.startIndex
1710      // SECURITY: tree-sitter nests any pipeline/list/file_redirect appearing
1711      // between heredoc_start and the newline as a CHILD of heredoc_redirect.
1712      // `ls <<'EOF' | rm -rf /tmp/evil` must not silently drop the rm. Parse
1713      // trailing words and file_redirects properly (ast.ts walkHeredocRedirect
1714      // fails closed on any unrecognized child via tooComplex). Pipeline / list
1715      // operators (| && || ;) are structurally complex — emit ERROR so the same
1716      // fail-closed path rejects them.
1717      while (true) {
1718        skipBlanks(P.L)
1719        const tc = peek(P.L)
1720        if (tc === '\n' || tc === '' || P.L.i >= P.L.len) break
1721        // File redirect after delimiter: cat <<EOF > out.txt
1722        if (tc === '>' || tc === '<' || isDigit(tc)) {
1723          const rSave = saveLex(P.L)
1724          const r = tryParseRedirect(P)
1725          if (r && r.type === 'file_redirect') {
1726            kids.push(r)
1727            continue
1728          }
1729          restoreLex(P.L, rSave)
1730        }
1731        // Pipeline after heredoc_start: `one <<EOF | grep two` — tree-sitter
1732        // nests the pipeline as a child of heredoc_redirect. ast.ts
1733        // walkHeredocRedirect fails closed on pipeline/command via tooComplex.
1734        if (tc === '|' && peek(P.L, 1) !== '|') {
1735          advance(P.L)
1736          skipBlanks(P.L)
1737          const pipeCmds: TsNode[] = []
1738          while (true) {
1739            const cmd = parseCommand(P)
1740            if (!cmd) break
1741            pipeCmds.push(cmd)
1742            skipBlanks(P.L)
1743            if (peek(P.L) === '|' && peek(P.L, 1) !== '|') {
1744              const ps = P.L.b
1745              advance(P.L)
1746              pipeCmds.push(mk(P, '|', ps, P.L.b, []))
1747              skipBlanks(P.L)
1748              continue
1749            }
1750            break
1751          }
1752          if (pipeCmds.length > 0) {
1753            const pl = pipeCmds[pipeCmds.length - 1]!
1754            // tree-sitter always wraps in pipeline after `|`, even single command
1755            kids.push(
1756              mk(P, 'pipeline', pipeCmds[0]!.startIndex, pl.endIndex, pipeCmds),
1757            )
1758          }
1759          continue
1760        }
1761        // && / || after heredoc_start: `cat <<-EOF || die "..."` — tree-sitter
1762        // nests just the RHS command (not a list) as a child of heredoc_redirect.
1763        if (
1764          (tc === '&' && peek(P.L, 1) === '&') ||
1765          (tc === '|' && peek(P.L, 1) === '|')
1766        ) {
1767          advance(P.L)
1768          advance(P.L)
1769          skipBlanks(P.L)
1770          const rhs = parseCommand(P)
1771          if (rhs) kids.push(rhs)
1772          continue
1773        }
1774        // Terminator / unhandled metachar — consume rest of line as ERROR so
1775        // ast.ts rejects it. Covers ; & ( )
1776        if (tc === '&' || tc === ';' || tc === '(' || tc === ')') {
1777          const eStart = P.L.b
1778          while (P.L.i < P.L.len && peek(P.L) !== '\n') advance(P.L)
1779          kids.push(mk(P, 'ERROR', eStart, P.L.b, []))
1780          break
1781        }
1782        // Trailing word argument: newins <<-EOF - org.freedesktop.service
1783        const w = parseWord(P, 'arg')
1784        if (w) {
1785          kids.push(w)
1786          continue
1787        }
1788        // Unrecognized — consume rest of line as ERROR
1789        const eStart = P.L.b
1790        while (P.L.i < P.L.len && peek(P.L) !== '\n') advance(P.L)
1791        if (P.L.b > eStart) kids.push(mk(P, 'ERROR', eStart, P.L.b, []))
1792        break
1793      }
1794      return mk(P, 'heredoc_redirect', startIdx, P.L.b, kids)
1795    }
1796    // Close-fd variants: `<&-` `>&-` have OPTIONAL destination (0 or 1)
1797    if (v === '<&-' || v === '>&-') {
1798      const op = leaf(P, v, t)
1799      const kids: TsNode[] = []
1800      if (fd) kids.push(fd)
1801      kids.push(op)
1802      // Optional single destination — only consume if next is a literal
1803      skipBlanks(P.L)
1804      const dSave = saveLex(P.L)
1805      const dest = isRedirectLiteralStart(P) ? parseWord(P, 'arg') : null
1806      if (dest) {
1807        kids.push(dest)
1808      } else {
1809        restoreLex(P.L, dSave)
1810      }
1811      const startIdx = fd ? fd.startIndex : op.startIndex
1812      const end = dest ? dest.endIndex : op.endIndex
1813      return mk(P, 'file_redirect', startIdx, end, kids)
1814    }
1815    if (
1816      v === '>' ||
1817      v === '>>' ||
1818      v === '>&' ||
1819      v === '>|' ||
1820      v === '&>' ||
1821      v === '&>>' ||
1822      v === '<' ||
1823      v === '<&'
1824    ) {
1825      const op = leaf(P, v, t)
1826      const kids: TsNode[] = []
1827      if (fd) kids.push(fd)
1828      kids.push(op)
1829      // Grammar: destination is repeat1($._literal) — greedily consume literals
1830      // until a non-literal (redirect op, terminator, etc). tree-sitter's
1831      // prec.left makes `cmd >f a b c` attach `a b c` to the file_redirect,
1832      // NOT to the command. Structural quirk but required for corpus parity.
1833      // In preRedirect context (greedy=false), take only 1 literal because
1834      // command's dynamic precedence beats redirected_statement's prec(-1).
1835      let end = op.endIndex
1836      let taken = 0
1837      while (true) {
1838        skipBlanks(P.L)
1839        if (!isRedirectLiteralStart(P)) break
1840        if (!greedy && taken >= 1) break
1841        const tc = peek(P.L)
1842        const tc1 = peek(P.L, 1)
1843        let target: TsNode | null = null
1844        if ((tc === '<' || tc === '>') && tc1 === '(') {
1845          target = parseProcessSub(P)
1846        } else {
1847          target = parseWord(P, 'arg')
1848        }
1849        if (!target) break
1850        kids.push(target)
1851        end = target.endIndex
1852        taken++
1853      }
1854      const startIdx = fd ? fd.startIndex : op.startIndex
1855      return mk(P, 'file_redirect', startIdx, end, kids)
1856    }
1857    restoreLex(P.L, save)
1858    return null
1859  }
1860  
1861  function parseProcessSub(P: ParseState): TsNode | null {
1862    const c = peek(P.L)
1863    if ((c !== '<' && c !== '>') || peek(P.L, 1) !== '(') return null
1864    const start = P.L.b
1865    advance(P.L)
1866    advance(P.L)
1867    const open = mk(P, c + '(', start, P.L.b, [])
1868    const body = parseStatements(P, ')')
1869    skipBlanks(P.L)
1870    let close: TsNode
1871    if (peek(P.L) === ')') {
1872      const cs = P.L.b
1873      advance(P.L)
1874      close = mk(P, ')', cs, P.L.b, [])
1875    } else {
1876      close = mk(P, ')', P.L.b, P.L.b, [])
1877    }
1878    return mk(P, 'process_substitution', start, close.endIndex, [
1879      open,
1880      ...body,
1881      close,
1882    ])
1883  }
1884  
1885  function scanHeredocBodies(P: ParseState): void {
1886    // Skip to newline if not already there
1887    while (P.L.i < P.L.len && P.L.src[P.L.i] !== '\n') advance(P.L)
1888    if (P.L.i < P.L.len) advance(P.L)
1889    for (const hd of P.L.heredocs) {
1890      hd.bodyStart = P.L.b
1891      const delimLen = hd.delim.length
1892      while (P.L.i < P.L.len) {
1893        const lineStart = P.L.i
1894        const lineStartB = P.L.b
1895        // Skip leading tabs if <<-
1896        let checkI = lineStart
1897        if (hd.stripTabs) {
1898          while (checkI < P.L.len && P.L.src[checkI] === '\t') checkI++
1899        }
1900        // Check if this line is the delimiter
1901        if (
1902          P.L.src.startsWith(hd.delim, checkI) &&
1903          (checkI + delimLen >= P.L.len ||
1904            P.L.src[checkI + delimLen] === '\n' ||
1905            P.L.src[checkI + delimLen] === '\r')
1906        ) {
1907          hd.bodyEnd = lineStartB
1908          // Advance past tabs
1909          while (P.L.i < checkI) advance(P.L)
1910          hd.endStart = P.L.b
1911          // Advance past delimiter
1912          for (let k = 0; k < delimLen; k++) advance(P.L)
1913          hd.endEnd = P.L.b
1914          // Skip trailing newline
1915          if (P.L.i < P.L.len && P.L.src[P.L.i] === '\n') advance(P.L)
1916          return
1917        }
1918        // Consume line
1919        while (P.L.i < P.L.len && P.L.src[P.L.i] !== '\n') advance(P.L)
1920        if (P.L.i < P.L.len) advance(P.L)
1921      }
1922      // Unterminated
1923      hd.bodyEnd = P.L.b
1924      hd.endStart = P.L.b
1925      hd.endEnd = P.L.b
1926    }
1927  }
1928  
1929  function parseHeredocBodyContent(
1930    P: ParseState,
1931    start: number,
1932    end: number,
1933  ): TsNode[] {
1934    // Parse expansions inside an unquoted heredoc body.
1935    const saved = saveLex(P.L)
1936    // Position lexer at body start
1937    restoreLexToByte(P, start)
1938    const out: TsNode[] = []
1939    let contentStart = P.L.b
1940    // tree-sitter-bash's heredoc_body rule hides the initial text segment
1941    // (_heredoc_body_beginning) — only content AFTER the first expansion is
1942    // emitted as heredoc_content. Track whether we've seen an expansion yet.
1943    let sawExpansion = false
1944    while (P.L.b < end) {
1945      const c = peek(P.L)
1946      // Backslash escapes suppress expansion: \$ \` stay literal in heredoc.
1947      if (c === '\\') {
1948        const nxt = peek(P.L, 1)
1949        if (nxt === '$' || nxt === '`' || nxt === '\\') {
1950          advance(P.L)
1951          advance(P.L)
1952          continue
1953        }
1954        advance(P.L)
1955        continue
1956      }
1957      if (c === '$' || c === '`') {
1958        const preB = P.L.b
1959        const exp = parseDollarLike(P)
1960        // Bare `$` followed by non-name (e.g. `$'` in a regex) returns a lone
1961        // '$' leaf, not an expansion — treat as literal content, don't split.
1962        if (
1963          exp &&
1964          (exp.type === 'simple_expansion' ||
1965            exp.type === 'expansion' ||
1966            exp.type === 'command_substitution' ||
1967            exp.type === 'arithmetic_expansion')
1968        ) {
1969          if (sawExpansion && preB > contentStart) {
1970            out.push(mk(P, 'heredoc_content', contentStart, preB, []))
1971          }
1972          out.push(exp)
1973          contentStart = P.L.b
1974          sawExpansion = true
1975        }
1976        continue
1977      }
1978      advance(P.L)
1979    }
1980    // Only emit heredoc_content children if there were expansions — otherwise
1981    // the heredoc_body is a leaf node (tree-sitter convention).
1982    if (sawExpansion) {
1983      out.push(mk(P, 'heredoc_content', contentStart, end, []))
1984    }
1985    restoreLex(P.L, saved)
1986    return out
1987  }
1988  
1989  function restoreLexToByte(P: ParseState, targetByte: number): void {
1990    if (!P.L.byteTable) byteAt(P.L, 0)
1991    const t = P.L.byteTable!
1992    let lo = 0
1993    let hi = P.src.length
1994    while (lo < hi) {
1995      const m = (lo + hi) >>> 1
1996      if (t[m]! < targetByte) lo = m + 1
1997      else hi = m
1998    }
1999    P.L.i = lo
2000    P.L.b = targetByte
2001  }
2002  
2003  /**
2004   * Parse a word-position element: bare word, string, expansion, or concatenation
2005   * thereof. Returns a single node; if multiple adjacent fragments, wraps in
2006   * concatenation.
2007   */
2008  function parseWord(P: ParseState, _ctx: 'cmd' | 'arg'): TsNode | null {
2009    skipBlanks(P.L)
2010    const parts: TsNode[] = []
2011    while (P.L.i < P.L.len) {
2012      const c = peek(P.L)
2013      if (
2014        c === ' ' ||
2015        c === '\t' ||
2016        c === '\n' ||
2017        c === '\r' ||
2018        c === '' ||
2019        c === '|' ||
2020        c === '&' ||
2021        c === ';' ||
2022        c === '(' ||
2023        c === ')'
2024      ) {
2025        break
2026      }
2027      // < > are redirect operators unless <( >( (process substitution)
2028      if (c === '<' || c === '>') {
2029        if (peek(P.L, 1) === '(') {
2030          const ps = parseProcessSub(P)
2031          if (ps) parts.push(ps)
2032          continue
2033        }
2034        break
2035      }
2036      if (c === '"') {
2037        parts.push(parseDoubleQuoted(P))
2038        continue
2039      }
2040      if (c === "'") {
2041        const tok = nextToken(P.L, 'arg')
2042        parts.push(leaf(P, 'raw_string', tok))
2043        continue
2044      }
2045      if (c === '$') {
2046        const c1 = peek(P.L, 1)
2047        if (c1 === "'") {
2048          const tok = nextToken(P.L, 'arg')
2049          parts.push(leaf(P, 'ansi_c_string', tok))
2050          continue
2051        }
2052        if (c1 === '"') {
2053          // Translated string: emit $ leaf + string node
2054          const dTok: Token = {
2055            type: 'DOLLAR',
2056            value: '$',
2057            start: P.L.b,
2058            end: P.L.b + 1,
2059          }
2060          advance(P.L)
2061          parts.push(leaf(P, '$', dTok))
2062          parts.push(parseDoubleQuoted(P))
2063          continue
2064        }
2065        if (c1 === '`') {
2066          // `$` followed by backtick — tree-sitter elides the $ entirely
2067          // and emits just (command_substitution). Consume $ and let next
2068          // iteration handle the backtick.
2069          advance(P.L)
2070          continue
2071        }
2072        const exp = parseDollarLike(P)
2073        if (exp) parts.push(exp)
2074        continue
2075      }
2076      if (c === '`') {
2077        if (P.inBacktick > 0) break
2078        const bt = parseBacktick(P)
2079        if (bt) parts.push(bt)
2080        continue
2081      }
2082      // Brace expression {1..5} or {a,b,c} — only if looks like one
2083      if (c === '{') {
2084        const be = tryParseBraceExpr(P)
2085        if (be) {
2086          parts.push(be)
2087          continue
2088        }
2089        // SECURITY: if `{` is immediately followed by a command terminator
2090        // (; | & newline or EOF), it's a standalone word — don't slurp the
2091        // rest of the line via tryParseBraceLikeCat. `echo {;touch /tmp/evil`
2092        // must split on `;` so the security walker sees `touch`.
2093        const nc = peek(P.L, 1)
2094        if (
2095          nc === ';' ||
2096          nc === '|' ||
2097          nc === '&' ||
2098          nc === '\n' ||
2099          nc === '' ||
2100          nc === ')' ||
2101          nc === ' ' ||
2102          nc === '\t'
2103        ) {
2104          const bStart = P.L.b
2105          advance(P.L)
2106          parts.push(mk(P, 'word', bStart, P.L.b, []))
2107          continue
2108        }
2109        // Otherwise treat { and } as word fragments
2110        const cat = tryParseBraceLikeCat(P)
2111        if (cat) {
2112          for (const p of cat) parts.push(p)
2113          continue
2114        }
2115      }
2116      // Standalone `}` in arg position is a word (e.g., `echo }foo`).
2117      // parseBareWord breaks on `}` so handle it here.
2118      if (c === '}') {
2119        const bStart = P.L.b
2120        advance(P.L)
2121        parts.push(mk(P, 'word', bStart, P.L.b, []))
2122        continue
2123      }
2124      // `[` and `]` are single-char word fragments (tree-sitter splits at
2125      // brackets: `[:lower:]` → `[` `:lower:` `]`, `{o[k]}` → 6 words).
2126      if (c === '[' || c === ']') {
2127        const bStart = P.L.b
2128        advance(P.L)
2129        parts.push(mk(P, 'word', bStart, P.L.b, []))
2130        continue
2131      }
2132      // Bare word fragment
2133      const frag = parseBareWord(P)
2134      if (!frag) break
2135      // `NN#${...}` or `NN#$(...)` → (number (expansion|command_substitution)).
2136      // Grammar: number can be seq(/-?(0x)?[0-9]+#/, choice(expansion, cmd_sub)).
2137      // `10#${cmd}` must NOT be concatenation — it's a single number node with
2138      // the expansion as child. Detect here: frag ends with `#`, next is $ {/(.
2139      if (
2140        frag.type === 'word' &&
2141        /^-?(0x)?[0-9]+#$/.test(frag.text) &&
2142        peek(P.L) === '$' &&
2143        (peek(P.L, 1) === '{' || peek(P.L, 1) === '(')
2144      ) {
2145        const exp = parseDollarLike(P)
2146        if (exp) {
2147          // Prefix `NN#` is an anonymous pattern in grammar — only the
2148          // expansion/cmd_sub is a named child.
2149          parts.push(mk(P, 'number', frag.startIndex, exp.endIndex, [exp]))
2150          continue
2151        }
2152      }
2153      parts.push(frag)
2154    }
2155    if (parts.length === 0) return null
2156    if (parts.length === 1) return parts[0]!
2157    // Concatenation
2158    const first = parts[0]!
2159    const last = parts[parts.length - 1]!
2160    return mk(P, 'concatenation', first.startIndex, last.endIndex, parts)
2161  }
2162  
2163  function parseBareWord(P: ParseState): TsNode | null {
2164    const start = P.L.b
2165    const startI = P.L.i
2166    while (P.L.i < P.L.len) {
2167      const c = peek(P.L)
2168      if (c === '\\') {
2169        if (P.L.i + 1 >= P.L.len) {
2170          // Trailing unpaired `\` at true EOF — tree-sitter emits word WITHOUT
2171          // the `\` plus a sibling ERROR node. Stop here; caller emits ERROR.
2172          break
2173        }
2174        const nx = P.L.src[P.L.i + 1]
2175        if (nx === '\n' || (nx === '\r' && P.L.src[P.L.i + 2] === '\n')) {
2176          // Line continuation BREAKS the word (tree-sitter quirk) — handles \r?\n
2177          break
2178        }
2179        advance(P.L)
2180        advance(P.L)
2181        continue
2182      }
2183      if (
2184        c === ' ' ||
2185        c === '\t' ||
2186        c === '\n' ||
2187        c === '\r' ||
2188        c === '' ||
2189        c === '|' ||
2190        c === '&' ||
2191        c === ';' ||
2192        c === '(' ||
2193        c === ')' ||
2194        c === '<' ||
2195        c === '>' ||
2196        c === '"' ||
2197        c === "'" ||
2198        c === '$' ||
2199        c === '`' ||
2200        c === '{' ||
2201        c === '}' ||
2202        c === '[' ||
2203        c === ']'
2204      ) {
2205        break
2206      }
2207      advance(P.L)
2208    }
2209    if (P.L.b === start) return null
2210    const text = P.src.slice(startI, P.L.i)
2211    const type = /^-?\d+$/.test(text) ? 'number' : 'word'
2212    return mk(P, type, start, P.L.b, [])
2213  }
2214  
2215  function tryParseBraceExpr(P: ParseState): TsNode | null {
2216    // {N..M} where N, M are numbers or single chars
2217    const save = saveLex(P.L)
2218    if (peek(P.L) !== '{') return null
2219    const oStart = P.L.b
2220    advance(P.L)
2221    const oEnd = P.L.b
2222    // First part
2223    const p1Start = P.L.b
2224    while (isDigit(peek(P.L)) || isIdentStart(peek(P.L))) advance(P.L)
2225    const p1End = P.L.b
2226    if (p1End === p1Start || peek(P.L) !== '.' || peek(P.L, 1) !== '.') {
2227      restoreLex(P.L, save)
2228      return null
2229    }
2230    const dotStart = P.L.b
2231    advance(P.L)
2232    advance(P.L)
2233    const dotEnd = P.L.b
2234    const p2Start = P.L.b
2235    while (isDigit(peek(P.L)) || isIdentStart(peek(P.L))) advance(P.L)
2236    const p2End = P.L.b
2237    if (p2End === p2Start || peek(P.L) !== '}') {
2238      restoreLex(P.L, save)
2239      return null
2240    }
2241    const cStart = P.L.b
2242    advance(P.L)
2243    const cEnd = P.L.b
2244    const p1Text = sliceBytes(P, p1Start, p1End)
2245    const p2Text = sliceBytes(P, p2Start, p2End)
2246    const p1IsNum = /^\d+$/.test(p1Text)
2247    const p2IsNum = /^\d+$/.test(p2Text)
2248    // Valid brace expression: both numbers OR both single chars. Mixed = reject.
2249    if (p1IsNum !== p2IsNum) {
2250      restoreLex(P.L, save)
2251      return null
2252    }
2253    if (!p1IsNum && (p1Text.length !== 1 || p2Text.length !== 1)) {
2254      restoreLex(P.L, save)
2255      return null
2256    }
2257    const p1Type = p1IsNum ? 'number' : 'word'
2258    const p2Type = p2IsNum ? 'number' : 'word'
2259    return mk(P, 'brace_expression', oStart, cEnd, [
2260      mk(P, '{', oStart, oEnd, []),
2261      mk(P, p1Type, p1Start, p1End, []),
2262      mk(P, '..', dotStart, dotEnd, []),
2263      mk(P, p2Type, p2Start, p2End, []),
2264      mk(P, '}', cStart, cEnd, []),
2265    ])
2266  }
2267  
2268  function tryParseBraceLikeCat(P: ParseState): TsNode[] | null {
2269    // {a,b,c} or {} → split into word fragments like tree-sitter does
2270    if (peek(P.L) !== '{') return null
2271    const oStart = P.L.b
2272    advance(P.L)
2273    const oEnd = P.L.b
2274    const inner: TsNode[] = [mk(P, 'word', oStart, oEnd, [])]
2275    while (P.L.i < P.L.len) {
2276      const bc = peek(P.L)
2277      // SECURITY: stop at command terminators so `{foo;rm x` splits correctly.
2278      if (
2279        bc === '}' ||
2280        bc === '\n' ||
2281        bc === ';' ||
2282        bc === '|' ||
2283        bc === '&' ||
2284        bc === ' ' ||
2285        bc === '\t' ||
2286        bc === '<' ||
2287        bc === '>' ||
2288        bc === '(' ||
2289        bc === ')'
2290      ) {
2291        break
2292      }
2293      // `[` and `]` are single-char words: {o[k]} → { o [ k ] }
2294      if (bc === '[' || bc === ']') {
2295        const bStart = P.L.b
2296        advance(P.L)
2297        inner.push(mk(P, 'word', bStart, P.L.b, []))
2298        continue
2299      }
2300      const midStart = P.L.b
2301      while (P.L.i < P.L.len) {
2302        const mc = peek(P.L)
2303        if (
2304          mc === '}' ||
2305          mc === '\n' ||
2306          mc === ';' ||
2307          mc === '|' ||
2308          mc === '&' ||
2309          mc === ' ' ||
2310          mc === '\t' ||
2311          mc === '<' ||
2312          mc === '>' ||
2313          mc === '(' ||
2314          mc === ')' ||
2315          mc === '[' ||
2316          mc === ']'
2317        ) {
2318          break
2319        }
2320        advance(P.L)
2321      }
2322      const midEnd = P.L.b
2323      if (midEnd > midStart) {
2324        const midText = sliceBytes(P, midStart, midEnd)
2325        const midType = /^-?\d+$/.test(midText) ? 'number' : 'word'
2326        inner.push(mk(P, midType, midStart, midEnd, []))
2327      } else {
2328        break
2329      }
2330    }
2331    if (peek(P.L) === '}') {
2332      const cStart = P.L.b
2333      advance(P.L)
2334      inner.push(mk(P, 'word', cStart, P.L.b, []))
2335    }
2336    return inner
2337  }
2338  
2339  function parseDoubleQuoted(P: ParseState): TsNode {
2340    const qStart = P.L.b
2341    advance(P.L)
2342    const qEnd = P.L.b
2343    const openQ = mk(P, '"', qStart, qEnd, [])
2344    const parts: TsNode[] = [openQ]
2345    let contentStart = P.L.b
2346    let contentStartI = P.L.i
2347    const flushContent = (): void => {
2348      if (P.L.b > contentStart) {
2349        // Tree-sitter's extras rule /\s/ has higher precedence than
2350        // string_content (prec -1), so whitespace-only segments are elided.
2351        // `" ${x} "` → (string (expansion)) not (string (string_content)(expansion)(string_content)).
2352        // Note: this intentionally diverges from preserving all content — cc
2353        // tests relying on whitespace-only string_content need updating
2354        // (CCReconcile).
2355        const txt = P.src.slice(contentStartI, P.L.i)
2356        if (!/^[ \t]+$/.test(txt)) {
2357          parts.push(mk(P, 'string_content', contentStart, P.L.b, []))
2358        }
2359      }
2360    }
2361    while (P.L.i < P.L.len) {
2362      const c = peek(P.L)
2363      if (c === '"') break
2364      if (c === '\\' && P.L.i + 1 < P.L.len) {
2365        advance(P.L)
2366        advance(P.L)
2367        continue
2368      }
2369      if (c === '\n') {
2370        // Split string_content at newline
2371        flushContent()
2372        advance(P.L)
2373        contentStart = P.L.b
2374        contentStartI = P.L.i
2375        continue
2376      }
2377      if (c === '$') {
2378        const c1 = peek(P.L, 1)
2379        if (
2380          c1 === '(' ||
2381          c1 === '{' ||
2382          isIdentStart(c1) ||
2383          SPECIAL_VARS.has(c1) ||
2384          isDigit(c1)
2385        ) {
2386          flushContent()
2387          const exp = parseDollarLike(P)
2388          if (exp) parts.push(exp)
2389          contentStart = P.L.b
2390          contentStartI = P.L.i
2391          continue
2392        }
2393        // Bare $ not at end-of-string: tree-sitter emits it as an anonymous
2394        // '$' token, which splits string_content. $ immediately before the
2395        // closing " is absorbed into the preceding string_content.
2396        if (c1 !== '"' && c1 !== '') {
2397          flushContent()
2398          const dS = P.L.b
2399          advance(P.L)
2400          parts.push(mk(P, '$', dS, P.L.b, []))
2401          contentStart = P.L.b
2402          contentStartI = P.L.i
2403          continue
2404        }
2405      }
2406      if (c === '`') {
2407        flushContent()
2408        const bt = parseBacktick(P)
2409        if (bt) parts.push(bt)
2410        contentStart = P.L.b
2411        contentStartI = P.L.i
2412        continue
2413      }
2414      advance(P.L)
2415    }
2416    flushContent()
2417    let close: TsNode
2418    if (peek(P.L) === '"') {
2419      const cStart = P.L.b
2420      advance(P.L)
2421      close = mk(P, '"', cStart, P.L.b, [])
2422    } else {
2423      close = mk(P, '"', P.L.b, P.L.b, [])
2424    }
2425    parts.push(close)
2426    return mk(P, 'string', qStart, close.endIndex, parts)
2427  }
2428  
2429  function parseDollarLike(P: ParseState): TsNode | null {
2430    const c1 = peek(P.L, 1)
2431    const dStart = P.L.b
2432    if (c1 === '(' && peek(P.L, 2) === '(') {
2433      // $(( arithmetic ))
2434      advance(P.L)
2435      advance(P.L)
2436      advance(P.L)
2437      const open = mk(P, '$((', dStart, P.L.b, [])
2438      const exprs = parseArithCommaList(P, '))', 'var')
2439      skipBlanks(P.L)
2440      let close: TsNode
2441      if (peek(P.L) === ')' && peek(P.L, 1) === ')') {
2442        const cStart = P.L.b
2443        advance(P.L)
2444        advance(P.L)
2445        close = mk(P, '))', cStart, P.L.b, [])
2446      } else {
2447        close = mk(P, '))', P.L.b, P.L.b, [])
2448      }
2449      return mk(P, 'arithmetic_expansion', dStart, close.endIndex, [
2450        open,
2451        ...exprs,
2452        close,
2453      ])
2454    }
2455    if (c1 === '[') {
2456      // $[ arithmetic ] — legacy bash syntax, same as $((...))
2457      advance(P.L)
2458      advance(P.L)
2459      const open = mk(P, '$[', dStart, P.L.b, [])
2460      const exprs = parseArithCommaList(P, ']', 'var')
2461      skipBlanks(P.L)
2462      let close: TsNode
2463      if (peek(P.L) === ']') {
2464        const cStart = P.L.b
2465        advance(P.L)
2466        close = mk(P, ']', cStart, P.L.b, [])
2467      } else {
2468        close = mk(P, ']', P.L.b, P.L.b, [])
2469      }
2470      return mk(P, 'arithmetic_expansion', dStart, close.endIndex, [
2471        open,
2472        ...exprs,
2473        close,
2474      ])
2475    }
2476    if (c1 === '(') {
2477      advance(P.L)
2478      advance(P.L)
2479      const open = mk(P, '$(', dStart, P.L.b, [])
2480      let body = parseStatements(P, ')')
2481      skipBlanks(P.L)
2482      let close: TsNode
2483      if (peek(P.L) === ')') {
2484        const cStart = P.L.b
2485        advance(P.L)
2486        close = mk(P, ')', cStart, P.L.b, [])
2487      } else {
2488        close = mk(P, ')', P.L.b, P.L.b, [])
2489      }
2490      // $(< file) shorthand: unwrap redirected_statement → bare file_redirect
2491      // tree-sitter emits (command_substitution (file_redirect (word))) directly
2492      if (
2493        body.length === 1 &&
2494        body[0]!.type === 'redirected_statement' &&
2495        body[0]!.children.length === 1 &&
2496        body[0]!.children[0]!.type === 'file_redirect'
2497      ) {
2498        body = body[0]!.children
2499      }
2500      return mk(P, 'command_substitution', dStart, close.endIndex, [
2501        open,
2502        ...body,
2503        close,
2504      ])
2505    }
2506    if (c1 === '{') {
2507      advance(P.L)
2508      advance(P.L)
2509      const open = mk(P, '${', dStart, P.L.b, [])
2510      const inner = parseExpansionBody(P)
2511      let close: TsNode
2512      if (peek(P.L) === '}') {
2513        const cStart = P.L.b
2514        advance(P.L)
2515        close = mk(P, '}', cStart, P.L.b, [])
2516      } else {
2517        close = mk(P, '}', P.L.b, P.L.b, [])
2518      }
2519      return mk(P, 'expansion', dStart, close.endIndex, [open, ...inner, close])
2520    }
2521    // Simple expansion $VAR or $? $$ $@ etc
2522    advance(P.L)
2523    const dEnd = P.L.b
2524    const dollar = mk(P, '$', dStart, dEnd, [])
2525    const nc = peek(P.L)
2526    // $_ is special_variable_name only when not followed by more ident chars
2527    if (nc === '_' && !isIdentChar(peek(P.L, 1))) {
2528      const vStart = P.L.b
2529      advance(P.L)
2530      const vn = mk(P, 'special_variable_name', vStart, P.L.b, [])
2531      return mk(P, 'simple_expansion', dStart, P.L.b, [dollar, vn])
2532    }
2533    if (isIdentStart(nc)) {
2534      const vStart = P.L.b
2535      while (isIdentChar(peek(P.L))) advance(P.L)
2536      const vn = mk(P, 'variable_name', vStart, P.L.b, [])
2537      return mk(P, 'simple_expansion', dStart, P.L.b, [dollar, vn])
2538    }
2539    if (isDigit(nc)) {
2540      const vStart = P.L.b
2541      advance(P.L)
2542      const vn = mk(P, 'variable_name', vStart, P.L.b, [])
2543      return mk(P, 'simple_expansion', dStart, P.L.b, [dollar, vn])
2544    }
2545    if (SPECIAL_VARS.has(nc)) {
2546      const vStart = P.L.b
2547      advance(P.L)
2548      const vn = mk(P, 'special_variable_name', vStart, P.L.b, [])
2549      return mk(P, 'simple_expansion', dStart, P.L.b, [dollar, vn])
2550    }
2551    // Bare $ — just a $ leaf (tree-sitter treats trailing $ as literal)
2552    return dollar
2553  }
2554  
2555  function parseExpansionBody(P: ParseState): TsNode[] {
2556    const out: TsNode[] = []
2557    skipBlanks(P.L)
2558    // Bizarre cases: ${#!} ${!#} ${!##} ${!# } ${!## } all emit empty (expansion)
2559    // — both # and ! become anonymous nodes when only combined with each other
2560    // and optional trailing space before }. Note ${!##/} does NOT match (has
2561    // content after), so it parses normally as (special_variable_name)(regex).
2562    {
2563      const c0 = peek(P.L)
2564      const c1 = peek(P.L, 1)
2565      if (c0 === '#' && c1 === '!' && peek(P.L, 2) === '}') {
2566        advance(P.L)
2567        advance(P.L)
2568        return out
2569      }
2570      if (c0 === '!' && c1 === '#') {
2571        // ${!#} ${!##} with optional trailing space then }
2572        let j = 2
2573        if (peek(P.L, j) === '#') j++
2574        if (peek(P.L, j) === ' ') j++
2575        if (peek(P.L, j) === '}') {
2576          while (j-- > 0) advance(P.L)
2577          return out
2578        }
2579      }
2580    }
2581    // Optional # prefix for length
2582    if (peek(P.L) === '#') {
2583      const s = P.L.b
2584      advance(P.L)
2585      out.push(mk(P, '#', s, P.L.b, []))
2586    }
2587    // Optional ! prefix for indirect expansion: ${!varname} ${!prefix*} ${!prefix@}
2588    // Only when followed by an identifier — ${!} alone is special var $!
2589    // Also = ~ prefixes (zsh-style ${=var} ${~var})
2590    const pc = peek(P.L)
2591    if (
2592      (pc === '!' || pc === '=' || pc === '~') &&
2593      (isIdentStart(peek(P.L, 1)) || isDigit(peek(P.L, 1)))
2594    ) {
2595      const s = P.L.b
2596      advance(P.L)
2597      out.push(mk(P, pc, s, P.L.b, []))
2598    }
2599    skipBlanks(P.L)
2600    // Variable name
2601    if (isIdentStart(peek(P.L))) {
2602      const s = P.L.b
2603      while (isIdentChar(peek(P.L))) advance(P.L)
2604      out.push(mk(P, 'variable_name', s, P.L.b, []))
2605    } else if (isDigit(peek(P.L))) {
2606      const s = P.L.b
2607      while (isDigit(peek(P.L))) advance(P.L)
2608      out.push(mk(P, 'variable_name', s, P.L.b, []))
2609    } else if (SPECIAL_VARS.has(peek(P.L))) {
2610      const s = P.L.b
2611      advance(P.L)
2612      out.push(mk(P, 'special_variable_name', s, P.L.b, []))
2613    }
2614    // Optional subscript [idx] — parsed arithmetically
2615    if (peek(P.L) === '[') {
2616      const varNode = out[out.length - 1]
2617      const brOpen = P.L.b
2618      advance(P.L)
2619      const brOpenNode = mk(P, '[', brOpen, P.L.b, [])
2620      const idx = parseSubscriptIndexInline(P)
2621      skipBlanks(P.L)
2622      const brClose = P.L.b
2623      if (peek(P.L) === ']') advance(P.L)
2624      const brCloseNode = mk(P, ']', brClose, P.L.b, [])
2625      if (varNode) {
2626        const kids = idx
2627          ? [varNode, brOpenNode, idx, brCloseNode]
2628          : [varNode, brOpenNode, brCloseNode]
2629        out[out.length - 1] = mk(P, 'subscript', varNode.startIndex, P.L.b, kids)
2630      }
2631    }
2632    skipBlanks(P.L)
2633    // Trailing * or @ for indirect expansion (${!prefix*} ${!prefix@}) or
2634    // @operator for parameter transformation (${var@U} ${var@Q}) — anonymous
2635    const tc = peek(P.L)
2636    if ((tc === '*' || tc === '@') && peek(P.L, 1) === '}') {
2637      const s = P.L.b
2638      advance(P.L)
2639      out.push(mk(P, tc, s, P.L.b, []))
2640      return out
2641    }
2642    if (tc === '@' && isIdentStart(peek(P.L, 1))) {
2643      // ${var@U} transformation — @ is anonymous, consume op char(s)
2644      const s = P.L.b
2645      advance(P.L)
2646      out.push(mk(P, '@', s, P.L.b, []))
2647      while (isIdentChar(peek(P.L))) advance(P.L)
2648      return out
2649    }
2650    // Operator :- := :? :+ - = ? + # ## % %% / // ^ ^^ , ,, etc.
2651    const c = peek(P.L)
2652    // Bare `:` substring operator ${var:off:len} — offset and length parsed
2653    // arithmetically. Must come BEFORE the generic operator handling so `(` after
2654    // `:` goes to parenthesized_expression not the array path. `:-` `:=` `:?`
2655    // `:+` (no space) remain default-value operators; `: -1` (with space before
2656    // -1) is substring with negative offset.
2657    if (c === ':') {
2658      const c1 = peek(P.L, 1)
2659      // `:\n` or `:}` — empty substring expansion, emits nothing (variable_name only)
2660      if (c1 === '\n' || c1 === '}') {
2661        advance(P.L)
2662        while (peek(P.L) === '\n') advance(P.L)
2663        return out
2664      }
2665      if (c1 !== '-' && c1 !== '=' && c1 !== '?' && c1 !== '+') {
2666        advance(P.L)
2667        skipBlanks(P.L)
2668        // Offset — arithmetic. `-N` at top level is a single number node per
2669        // tree-sitter; inside parens it's unary_expression(number).
2670        const offC = peek(P.L)
2671        let off: TsNode | null
2672        if (offC === '-' && isDigit(peek(P.L, 1))) {
2673          const ns = P.L.b
2674          advance(P.L)
2675          while (isDigit(peek(P.L))) advance(P.L)
2676          off = mk(P, 'number', ns, P.L.b, [])
2677        } else {
2678          off = parseArithExpr(P, ':}', 'var')
2679        }
2680        if (off) out.push(off)
2681        skipBlanks(P.L)
2682        if (peek(P.L) === ':') {
2683          advance(P.L)
2684          skipBlanks(P.L)
2685          const lenC = peek(P.L)
2686          let len: TsNode | null
2687          if (lenC === '-' && isDigit(peek(P.L, 1))) {
2688            const ns = P.L.b
2689            advance(P.L)
2690            while (isDigit(peek(P.L))) advance(P.L)
2691            len = mk(P, 'number', ns, P.L.b, [])
2692          } else {
2693            len = parseArithExpr(P, '}', 'var')
2694          }
2695          if (len) out.push(len)
2696        }
2697        return out
2698      }
2699    }
2700    if (
2701      c === ':' ||
2702      c === '#' ||
2703      c === '%' ||
2704      c === '/' ||
2705      c === '^' ||
2706      c === ',' ||
2707      c === '-' ||
2708      c === '=' ||
2709      c === '?' ||
2710      c === '+'
2711    ) {
2712      const s = P.L.b
2713      const c1 = peek(P.L, 1)
2714      let op = c
2715      if (c === ':' && (c1 === '-' || c1 === '=' || c1 === '?' || c1 === '+')) {
2716        advance(P.L)
2717        advance(P.L)
2718        op = c + c1
2719      } else if (
2720        (c === '#' || c === '%' || c === '/' || c === '^' || c === ',') &&
2721        c1 === c
2722      ) {
2723        // Doubled operators: ## %% // ^^ ,,
2724        advance(P.L)
2725        advance(P.L)
2726        op = c + c
2727      } else {
2728        advance(P.L)
2729      }
2730      out.push(mk(P, op, s, P.L.b, []))
2731      // Rest is the default/replacement — parse as word or regex until }
2732      // Pattern-matching operators (# ## % %% / // ^ ^^ , ,,) emit regex;
2733      // value-substitution operators (:- := :? :+ - = ? + :) emit word.
2734      // `/` and `//` split at next `/` into (regex)+(word) for pat/repl.
2735      const isPattern =
2736        op === '#' ||
2737        op === '##' ||
2738        op === '%' ||
2739        op === '%%' ||
2740        op === '/' ||
2741        op === '//' ||
2742        op === '^' ||
2743        op === '^^' ||
2744        op === ',' ||
2745        op === ',,'
2746      if (op === '/' || op === '//') {
2747        // Optional /# or /% anchor prefix — anonymous node
2748        const ac = peek(P.L)
2749        if (ac === '#' || ac === '%') {
2750          const aStart = P.L.b
2751          advance(P.L)
2752          out.push(mk(P, ac, aStart, P.L.b, []))
2753        }
2754        // Pattern: per grammar _expansion_regex_replacement, pattern is
2755        // choice(regex, string, cmd_sub, seq(string, regex)). If it STARTS
2756        // with ", emit (string) and any trailing chars become (regex).
2757        // `${v//"${old}"/}` → (string(expansion)); `${v//"${c}"\//}` →
2758        // (string)(regex).
2759        if (peek(P.L) === '"') {
2760          out.push(parseDoubleQuoted(P))
2761          const tail = parseExpansionRest(P, 'regex', true)
2762          if (tail) out.push(tail)
2763        } else {
2764          const regex = parseExpansionRest(P, 'regex', true)
2765          if (regex) out.push(regex)
2766        }
2767        if (peek(P.L) === '/') {
2768          const sepStart = P.L.b
2769          advance(P.L)
2770          out.push(mk(P, '/', sepStart, P.L.b, []))
2771          // Replacement: per grammar, choice includes `seq(cmd_sub, word)`
2772          // which emits TWO siblings (not concatenation). Also `(` at start
2773          // of replacement is a regular word char, NOT array — unlike `:-`
2774          // default-value context. `${v/(/(Gentoo ${x}, }` replacement
2775          // `(Gentoo ${x}, ` is (concatenation (word)(expansion)(word)).
2776          const repl = parseExpansionRest(P, 'replword', false)
2777          if (repl) {
2778            // seq(cmd_sub, word) special case → siblings. Detected when
2779            // replacement is a concatenation of exactly 2 parts with first
2780            // being command_substitution.
2781            if (
2782              repl.type === 'concatenation' &&
2783              repl.children.length === 2 &&
2784              repl.children[0]!.type === 'command_substitution'
2785            ) {
2786              out.push(repl.children[0]!)
2787              out.push(repl.children[1]!)
2788            } else {
2789              out.push(repl)
2790            }
2791          }
2792        }
2793      } else if (op === '#' || op === '##' || op === '%' || op === '%%') {
2794        // Pattern-removal: per grammar _expansion_regex, pattern is
2795        // repeat(choice(regex, string, raw_string, ')')). Each quote/string
2796        // is a SIBLING, not absorbed into one regex. `${f%'str'*}` →
2797        // (raw_string)(regex); `${f/'str'*}` (slash) stays single regex.
2798        for (const p of parseExpansionRegexSegmented(P)) out.push(p)
2799      } else {
2800        const rest = parseExpansionRest(P, isPattern ? 'regex' : 'word', false)
2801        if (rest) out.push(rest)
2802      }
2803    }
2804    return out
2805  }
2806  
2807  function parseExpansionRest(
2808    P: ParseState,
2809    nodeType: string,
2810    stopAtSlash: boolean,
2811  ): TsNode | null {
2812    // Don't skipBlanks — `${var:- }` space IS the word. Stop at } or newline
2813    // (`${var:\n}` emits no word). stopAtSlash=true stops at `/` for pat/repl
2814    // split in ${var/pat/repl}. nodeType 'replword' is word-mode for the
2815    // replacement in `/` `//` — same as 'word' but `(` is NOT array.
2816    const start = P.L.b
2817    // Value-substitution RHS starting with `(` parses as array: ${var:-(x)} →
2818    // (expansion (variable_name) (array (word))). Only for 'word' context (not
2819    // pattern-matching operators which emit regex, and not 'replword' where `(`
2820    // is a regular char per grammar `_expansion_regex_replacement`).
2821    if (nodeType === 'word' && peek(P.L) === '(') {
2822      advance(P.L)
2823      const open = mk(P, '(', start, P.L.b, [])
2824      const elems: TsNode[] = [open]
2825      while (P.L.i < P.L.len) {
2826        skipBlanks(P.L)
2827        const c = peek(P.L)
2828        if (c === ')' || c === '}' || c === '\n' || c === '') break
2829        const wStart = P.L.b
2830        while (P.L.i < P.L.len) {
2831          const wc = peek(P.L)
2832          if (
2833            wc === ')' ||
2834            wc === '}' ||
2835            wc === ' ' ||
2836            wc === '\t' ||
2837            wc === '\n' ||
2838            wc === ''
2839          ) {
2840            break
2841          }
2842          advance(P.L)
2843        }
2844        if (P.L.b > wStart) elems.push(mk(P, 'word', wStart, P.L.b, []))
2845        else break
2846      }
2847      if (peek(P.L) === ')') {
2848        const cStart = P.L.b
2849        advance(P.L)
2850        elems.push(mk(P, ')', cStart, P.L.b, []))
2851      }
2852      while (peek(P.L) === '\n') advance(P.L)
2853      return mk(P, 'array', start, P.L.b, elems)
2854    }
2855    // REGEX mode: flat single-span scan. Quotes are opaque (skipped past so
2856    // `/` inside them doesn't break stopAtSlash), but NOT emitted as separate
2857    // nodes — the entire range becomes one regex node.
2858    if (nodeType === 'regex') {
2859      let braceDepth = 0
2860      while (P.L.i < P.L.len) {
2861        const c = peek(P.L)
2862        if (c === '\n') break
2863        if (braceDepth === 0) {
2864          if (c === '}') break
2865          if (stopAtSlash && c === '/') break
2866        }
2867        if (c === '\\' && P.L.i + 1 < P.L.len) {
2868          advance(P.L)
2869          advance(P.L)
2870          continue
2871        }
2872        if (c === '"' || c === "'") {
2873          advance(P.L)
2874          while (P.L.i < P.L.len && peek(P.L) !== c) {
2875            if (peek(P.L) === '\\' && P.L.i + 1 < P.L.len) advance(P.L)
2876            advance(P.L)
2877          }
2878          if (peek(P.L) === c) advance(P.L)
2879          continue
2880        }
2881        // Skip past nested ${...} $(...) $[...] so their } / don't terminate us
2882        if (c === '$') {
2883          const c1 = peek(P.L, 1)
2884          if (c1 === '{') {
2885            let d = 0
2886            advance(P.L)
2887            advance(P.L)
2888            d++
2889            while (P.L.i < P.L.len && d > 0) {
2890              const nc = peek(P.L)
2891              if (nc === '{') d++
2892              else if (nc === '}') d--
2893              advance(P.L)
2894            }
2895            continue
2896          }
2897          if (c1 === '(') {
2898            let d = 0
2899            advance(P.L)
2900            advance(P.L)
2901            d++
2902            while (P.L.i < P.L.len && d > 0) {
2903              const nc = peek(P.L)
2904              if (nc === '(') d++
2905              else if (nc === ')') d--
2906              advance(P.L)
2907            }
2908            continue
2909          }
2910        }
2911        if (c === '{') braceDepth++
2912        else if (c === '}' && braceDepth > 0) braceDepth--
2913        advance(P.L)
2914      }
2915      const end = P.L.b
2916      while (peek(P.L) === '\n') advance(P.L)
2917      if (end === start) return null
2918      return mk(P, 'regex', start, end, [])
2919    }
2920    // WORD mode: segmenting parser — recognize nested ${...}, $(...), $'...',
2921    // "...", '...', $ident, <(...)/>(...); bare chars accumulate into word
2922    // segments. Multiple parts → wrapped in concatenation.
2923    const parts: TsNode[] = []
2924    let segStart = P.L.b
2925    let braceDepth = 0
2926    const flushSeg = (): void => {
2927      if (P.L.b > segStart) {
2928        parts.push(mk(P, 'word', segStart, P.L.b, []))
2929      }
2930    }
2931    while (P.L.i < P.L.len) {
2932      const c = peek(P.L)
2933      if (c === '\n') break
2934      if (braceDepth === 0) {
2935        if (c === '}') break
2936        if (stopAtSlash && c === '/') break
2937      }
2938      if (c === '\\' && P.L.i + 1 < P.L.len) {
2939        advance(P.L)
2940        advance(P.L)
2941        continue
2942      }
2943      const c1 = peek(P.L, 1)
2944      if (c === '$') {
2945        if (c1 === '{' || c1 === '(' || c1 === '[') {
2946          flushSeg()
2947          const exp = parseDollarLike(P)
2948          if (exp) parts.push(exp)
2949          segStart = P.L.b
2950          continue
2951        }
2952        if (c1 === "'") {
2953          // $'...' ANSI-C string
2954          flushSeg()
2955          const aStart = P.L.b
2956          advance(P.L)
2957          advance(P.L)
2958          while (P.L.i < P.L.len && peek(P.L) !== "'") {
2959            if (peek(P.L) === '\\' && P.L.i + 1 < P.L.len) advance(P.L)
2960            advance(P.L)
2961          }
2962          if (peek(P.L) === "'") advance(P.L)
2963          parts.push(mk(P, 'ansi_c_string', aStart, P.L.b, []))
2964          segStart = P.L.b
2965          continue
2966        }
2967        if (isIdentStart(c1) || isDigit(c1) || SPECIAL_VARS.has(c1)) {
2968          flushSeg()
2969          const exp = parseDollarLike(P)
2970          if (exp) parts.push(exp)
2971          segStart = P.L.b
2972          continue
2973        }
2974      }
2975      if (c === '"') {
2976        flushSeg()
2977        parts.push(parseDoubleQuoted(P))
2978        segStart = P.L.b
2979        continue
2980      }
2981      if (c === "'") {
2982        flushSeg()
2983        const rStart = P.L.b
2984        advance(P.L)
2985        while (P.L.i < P.L.len && peek(P.L) !== "'") advance(P.L)
2986        if (peek(P.L) === "'") advance(P.L)
2987        parts.push(mk(P, 'raw_string', rStart, P.L.b, []))
2988        segStart = P.L.b
2989        continue
2990      }
2991      if ((c === '<' || c === '>') && c1 === '(') {
2992        flushSeg()
2993        const ps = parseProcessSub(P)
2994        if (ps) parts.push(ps)
2995        segStart = P.L.b
2996        continue
2997      }
2998      if (c === '`') {
2999        flushSeg()
3000        const bt = parseBacktick(P)
3001        if (bt) parts.push(bt)
3002        segStart = P.L.b
3003        continue
3004      }
3005      // Brace tracking so nested {a,b} brace-expansion chars don't prematurely
3006      // terminate (rare, but the `?` in `${cond}? (` should be treated as word).
3007      if (c === '{') braceDepth++
3008      else if (c === '}' && braceDepth > 0) braceDepth--
3009      advance(P.L)
3010    }
3011    flushSeg()
3012    // Consume trailing newlines before } so caller sees }
3013    while (peek(P.L) === '\n') advance(P.L)
3014    // Tree-sitter skips leading whitespace (extras) in expansion RHS when
3015    // there's content after: `${2+ ${2}}` → just (expansion). But `${v:- }`
3016    // (space-only RHS) keeps the space as (word). So drop leading whitespace-
3017    // only word segment if it's NOT the only part.
3018    if (
3019      parts.length > 1 &&
3020      parts[0]!.type === 'word' &&
3021      /^[ \t]+$/.test(parts[0]!.text)
3022    ) {
3023      parts.shift()
3024    }
3025    if (parts.length === 0) return null
3026    if (parts.length === 1) return parts[0]!
3027    // Multiple parts: wrap in concatenation (word mode keeps concat wrapping;
3028    // regex mode also concats per tree-sitter for mixed quote+glob patterns).
3029    const last = parts[parts.length - 1]!
3030    return mk(P, 'concatenation', parts[0]!.startIndex, last.endIndex, parts)
3031  }
3032  
3033  // Pattern for # ## % %% operators — per grammar _expansion_regex:
3034  // repeat(choice(regex, string, raw_string, ')', /\s+/→regex)). Each quote
3035  // becomes a SIBLING node, not absorbed. `${f%'str'*}` → (raw_string)(regex).
3036  function parseExpansionRegexSegmented(P: ParseState): TsNode[] {
3037    const out: TsNode[] = []
3038    let segStart = P.L.b
3039    const flushRegex = (): void => {
3040      if (P.L.b > segStart) out.push(mk(P, 'regex', segStart, P.L.b, []))
3041    }
3042    while (P.L.i < P.L.len) {
3043      const c = peek(P.L)
3044      if (c === '}' || c === '\n') break
3045      if (c === '\\' && P.L.i + 1 < P.L.len) {
3046        advance(P.L)
3047        advance(P.L)
3048        continue
3049      }
3050      if (c === '"') {
3051        flushRegex()
3052        out.push(parseDoubleQuoted(P))
3053        segStart = P.L.b
3054        continue
3055      }
3056      if (c === "'") {
3057        flushRegex()
3058        const rStart = P.L.b
3059        advance(P.L)
3060        while (P.L.i < P.L.len && peek(P.L) !== "'") advance(P.L)
3061        if (peek(P.L) === "'") advance(P.L)
3062        out.push(mk(P, 'raw_string', rStart, P.L.b, []))
3063        segStart = P.L.b
3064        continue
3065      }
3066      // Nested ${...} $(...) — opaque scan so their } doesn't terminate us
3067      if (c === '$') {
3068        const c1 = peek(P.L, 1)
3069        if (c1 === '{') {
3070          let d = 1
3071          advance(P.L)
3072          advance(P.L)
3073          while (P.L.i < P.L.len && d > 0) {
3074            const nc = peek(P.L)
3075            if (nc === '{') d++
3076            else if (nc === '}') d--
3077            advance(P.L)
3078          }
3079          continue
3080        }
3081        if (c1 === '(') {
3082          let d = 1
3083          advance(P.L)
3084          advance(P.L)
3085          while (P.L.i < P.L.len && d > 0) {
3086            const nc = peek(P.L)
3087            if (nc === '(') d++
3088            else if (nc === ')') d--
3089            advance(P.L)
3090          }
3091          continue
3092        }
3093      }
3094      advance(P.L)
3095    }
3096    flushRegex()
3097    while (peek(P.L) === '\n') advance(P.L)
3098    return out
3099  }
3100  
3101  function parseBacktick(P: ParseState): TsNode | null {
3102    const start = P.L.b
3103    advance(P.L)
3104    const open = mk(P, '`', start, P.L.b, [])
3105    P.inBacktick++
3106    // Parse statements inline — stop at closing backtick
3107    const body: TsNode[] = []
3108    while (true) {
3109      skipBlanks(P.L)
3110      if (peek(P.L) === '`' || peek(P.L) === '') break
3111      const save = saveLex(P.L)
3112      const t = nextToken(P.L, 'cmd')
3113      if (t.type === 'EOF' || t.type === 'BACKTICK') {
3114        restoreLex(P.L, save)
3115        break
3116      }
3117      if (t.type === 'NEWLINE') continue
3118      restoreLex(P.L, save)
3119      const stmt = parseAndOr(P)
3120      if (!stmt) break
3121      body.push(stmt)
3122      skipBlanks(P.L)
3123      if (peek(P.L) === '`') break
3124      const save2 = saveLex(P.L)
3125      const sep = nextToken(P.L, 'cmd')
3126      if (sep.type === 'OP' && (sep.value === ';' || sep.value === '&')) {
3127        body.push(leaf(P, sep.value, sep))
3128      } else if (sep.type !== 'NEWLINE') {
3129        restoreLex(P.L, save2)
3130      }
3131    }
3132    P.inBacktick--
3133    let close: TsNode
3134    if (peek(P.L) === '`') {
3135      const cStart = P.L.b
3136      advance(P.L)
3137      close = mk(P, '`', cStart, P.L.b, [])
3138    } else {
3139      close = mk(P, '`', P.L.b, P.L.b, [])
3140    }
3141    // Empty backticks (whitespace/newline only) are elided entirely by
3142    // tree-sitter — used as a line-continuation hack: "foo"`<newline>`"bar"
3143    // → (concatenation (string) (string)) with no command_substitution.
3144    if (body.length === 0) return null
3145    return mk(P, 'command_substitution', start, close.endIndex, [
3146      open,
3147      ...body,
3148      close,
3149    ])
3150  }
3151  
3152  function parseIf(P: ParseState, ifTok: Token): TsNode {
3153    const ifKw = leaf(P, 'if', ifTok)
3154    const kids: TsNode[] = [ifKw]
3155    const cond = parseStatements(P, null)
3156    kids.push(...cond)
3157    consumeKeyword(P, 'then', kids)
3158    const body = parseStatements(P, null)
3159    kids.push(...body)
3160    while (true) {
3161      const save = saveLex(P.L)
3162      const t = nextToken(P.L, 'cmd')
3163      if (t.type === 'WORD' && t.value === 'elif') {
3164        const eKw = leaf(P, 'elif', t)
3165        const eCond = parseStatements(P, null)
3166        const eKids: TsNode[] = [eKw, ...eCond]
3167        consumeKeyword(P, 'then', eKids)
3168        const eBody = parseStatements(P, null)
3169        eKids.push(...eBody)
3170        const last = eKids[eKids.length - 1]!
3171        kids.push(mk(P, 'elif_clause', eKw.startIndex, last.endIndex, eKids))
3172      } else if (t.type === 'WORD' && t.value === 'else') {
3173        const elKw = leaf(P, 'else', t)
3174        const elBody = parseStatements(P, null)
3175        const last = elBody.length > 0 ? elBody[elBody.length - 1]! : elKw
3176        kids.push(
3177          mk(P, 'else_clause', elKw.startIndex, last.endIndex, [elKw, ...elBody]),
3178        )
3179      } else {
3180        restoreLex(P.L, save)
3181        break
3182      }
3183    }
3184    consumeKeyword(P, 'fi', kids)
3185    const last = kids[kids.length - 1]!
3186    return mk(P, 'if_statement', ifKw.startIndex, last.endIndex, kids)
3187  }
3188  
3189  function parseWhile(P: ParseState, kwTok: Token): TsNode {
3190    const kw = leaf(P, kwTok.value, kwTok)
3191    const kids: TsNode[] = [kw]
3192    const cond = parseStatements(P, null)
3193    kids.push(...cond)
3194    const dg = parseDoGroup(P)
3195    if (dg) kids.push(dg)
3196    const last = kids[kids.length - 1]!
3197    return mk(P, 'while_statement', kw.startIndex, last.endIndex, kids)
3198  }
3199  
3200  function parseFor(P: ParseState, forTok: Token): TsNode {
3201    const forKw = leaf(P, forTok.value, forTok)
3202    skipBlanks(P.L)
3203    // C-style for (( ; ; )) — only for `for`, not `select`
3204    if (forTok.value === 'for' && peek(P.L) === '(' && peek(P.L, 1) === '(') {
3205      const oStart = P.L.b
3206      advance(P.L)
3207      advance(P.L)
3208      const open = mk(P, '((', oStart, P.L.b, [])
3209      const kids: TsNode[] = [forKw, open]
3210      // init; cond; update — all three use 'assign' mode so `c = expr` emits
3211      // variable_assignment, while bare idents (c in `c<=5`) → word. Each
3212      // clause may be a comma-separated list.
3213      for (let k = 0; k < 3; k++) {
3214        skipBlanks(P.L)
3215        const es = parseArithCommaList(P, k < 2 ? ';' : '))', 'assign')
3216        kids.push(...es)
3217        if (k < 2) {
3218          if (peek(P.L) === ';') {
3219            const s = P.L.b
3220            advance(P.L)
3221            kids.push(mk(P, ';', s, P.L.b, []))
3222          }
3223        }
3224      }
3225      skipBlanks(P.L)
3226      if (peek(P.L) === ')' && peek(P.L, 1) === ')') {
3227        const cStart = P.L.b
3228        advance(P.L)
3229        advance(P.L)
3230        kids.push(mk(P, '))', cStart, P.L.b, []))
3231      }
3232      // Optional ; or newline
3233      const save = saveLex(P.L)
3234      const sep = nextToken(P.L, 'cmd')
3235      if (sep.type === 'OP' && sep.value === ';') {
3236        kids.push(leaf(P, ';', sep))
3237      } else if (sep.type !== 'NEWLINE') {
3238        restoreLex(P.L, save)
3239      }
3240      const dg = parseDoGroup(P)
3241      if (dg) {
3242        kids.push(dg)
3243      } else {
3244        // C-style for can also use `{ ... }` body instead of `do ... done`
3245        skipNewlines(P)
3246        skipBlanks(P.L)
3247        if (peek(P.L) === '{') {
3248          const bOpen = P.L.b
3249          advance(P.L)
3250          const brace = mk(P, '{', bOpen, P.L.b, [])
3251          const body = parseStatements(P, '}')
3252          let bClose: TsNode
3253          if (peek(P.L) === '}') {
3254            const cs = P.L.b
3255            advance(P.L)
3256            bClose = mk(P, '}', cs, P.L.b, [])
3257          } else {
3258            bClose = mk(P, '}', P.L.b, P.L.b, [])
3259          }
3260          kids.push(
3261            mk(P, 'compound_statement', brace.startIndex, bClose.endIndex, [
3262              brace,
3263              ...body,
3264              bClose,
3265            ]),
3266          )
3267        }
3268      }
3269      const last = kids[kids.length - 1]!
3270      return mk(P, 'c_style_for_statement', forKw.startIndex, last.endIndex, kids)
3271    }
3272    // Regular for VAR in words; do ... done
3273    const kids: TsNode[] = [forKw]
3274    const varTok = nextToken(P.L, 'arg')
3275    kids.push(mk(P, 'variable_name', varTok.start, varTok.end, []))
3276    skipBlanks(P.L)
3277    const save = saveLex(P.L)
3278    const inTok = nextToken(P.L, 'arg')
3279    if (inTok.type === 'WORD' && inTok.value === 'in') {
3280      kids.push(leaf(P, 'in', inTok))
3281      while (true) {
3282        skipBlanks(P.L)
3283        const c = peek(P.L)
3284        if (c === ';' || c === '\n' || c === '') break
3285        const w = parseWord(P, 'arg')
3286        if (!w) break
3287        kids.push(w)
3288      }
3289    } else {
3290      restoreLex(P.L, save)
3291    }
3292    // Separator
3293    const save2 = saveLex(P.L)
3294    const sep = nextToken(P.L, 'cmd')
3295    if (sep.type === 'OP' && sep.value === ';') {
3296      kids.push(leaf(P, ';', sep))
3297    } else if (sep.type !== 'NEWLINE') {
3298      restoreLex(P.L, save2)
3299    }
3300    const dg = parseDoGroup(P)
3301    if (dg) kids.push(dg)
3302    const last = kids[kids.length - 1]!
3303    return mk(P, 'for_statement', forKw.startIndex, last.endIndex, kids)
3304  }
3305  
3306  function parseDoGroup(P: ParseState): TsNode | null {
3307    skipNewlines(P)
3308    const save = saveLex(P.L)
3309    const doTok = nextToken(P.L, 'cmd')
3310    if (doTok.type !== 'WORD' || doTok.value !== 'do') {
3311      restoreLex(P.L, save)
3312      return null
3313    }
3314    const doKw = leaf(P, 'do', doTok)
3315    const body = parseStatements(P, null)
3316    const kids: TsNode[] = [doKw, ...body]
3317    consumeKeyword(P, 'done', kids)
3318    const last = kids[kids.length - 1]!
3319    return mk(P, 'do_group', doKw.startIndex, last.endIndex, kids)
3320  }
3321  
3322  function parseCase(P: ParseState, caseTok: Token): TsNode {
3323    const caseKw = leaf(P, 'case', caseTok)
3324    const kids: TsNode[] = [caseKw]
3325    skipBlanks(P.L)
3326    const word = parseWord(P, 'arg')
3327    if (word) kids.push(word)
3328    skipBlanks(P.L)
3329    consumeKeyword(P, 'in', kids)
3330    skipNewlines(P)
3331    while (true) {
3332      skipBlanks(P.L)
3333      skipNewlines(P)
3334      const save = saveLex(P.L)
3335      const t = nextToken(P.L, 'arg')
3336      if (t.type === 'WORD' && t.value === 'esac') {
3337        kids.push(leaf(P, 'esac', t))
3338        break
3339      }
3340      if (t.type === 'EOF') break
3341      restoreLex(P.L, save)
3342      const item = parseCaseItem(P)
3343      if (!item) break
3344      kids.push(item)
3345    }
3346    const last = kids[kids.length - 1]!
3347    return mk(P, 'case_statement', caseKw.startIndex, last.endIndex, kids)
3348  }
3349  
3350  function parseCaseItem(P: ParseState): TsNode | null {
3351    skipBlanks(P.L)
3352    const start = P.L.b
3353    const kids: TsNode[] = []
3354    // Optional leading '(' before pattern — bash allows (pattern) syntax
3355    if (peek(P.L) === '(') {
3356      const s = P.L.b
3357      advance(P.L)
3358      kids.push(mk(P, '(', s, P.L.b, []))
3359    }
3360    // Pattern(s)
3361    let isFirstAlt = true
3362    while (true) {
3363      skipBlanks(P.L)
3364      const c = peek(P.L)
3365      if (c === ')' || c === '') break
3366      const pats = parseCasePattern(P)
3367      if (pats.length === 0) break
3368      // tree-sitter quirk: first alternative with quotes is inlined as flat
3369      // siblings; subsequent alternatives are wrapped in (concatenation) with
3370      // `word` instead of `extglob_pattern` for bare segments.
3371      if (!isFirstAlt && pats.length > 1) {
3372        const rewritten = pats.map(p =>
3373          p.type === 'extglob_pattern'
3374            ? mk(P, 'word', p.startIndex, p.endIndex, [])
3375            : p,
3376        )
3377        const first = rewritten[0]!
3378        const last = rewritten[rewritten.length - 1]!
3379        kids.push(
3380          mk(P, 'concatenation', first.startIndex, last.endIndex, rewritten),
3381        )
3382      } else {
3383        kids.push(...pats)
3384      }
3385      isFirstAlt = false
3386      skipBlanks(P.L)
3387      // \<newline> line continuation between alternatives
3388      if (peek(P.L) === '\\' && peek(P.L, 1) === '\n') {
3389        advance(P.L)
3390        advance(P.L)
3391        skipBlanks(P.L)
3392      }
3393      if (peek(P.L) === '|') {
3394        const s = P.L.b
3395        advance(P.L)
3396        kids.push(mk(P, '|', s, P.L.b, []))
3397        // \<newline> after | is also a line continuation
3398        if (peek(P.L) === '\\' && peek(P.L, 1) === '\n') {
3399          advance(P.L)
3400          advance(P.L)
3401        }
3402      } else {
3403        break
3404      }
3405    }
3406    if (peek(P.L) === ')') {
3407      const s = P.L.b
3408      advance(P.L)
3409      kids.push(mk(P, ')', s, P.L.b, []))
3410    }
3411    const body = parseStatements(P, null)
3412    kids.push(...body)
3413    const save = saveLex(P.L)
3414    const term = nextToken(P.L, 'cmd')
3415    if (
3416      term.type === 'OP' &&
3417      (term.value === ';;' || term.value === ';&' || term.value === ';;&')
3418    ) {
3419      kids.push(leaf(P, term.value, term))
3420    } else {
3421      restoreLex(P.L, save)
3422    }
3423    if (kids.length === 0) return null
3424    // tree-sitter quirk: case_item with EMPTY body and a single pattern matching
3425    // extglob-operator-char-prefix (no actual glob metachars) downgrades to word.
3426    // `-o) owner=$2 ;;` (has body) → extglob_pattern; `-g) ;;` (empty) → word.
3427    if (body.length === 0) {
3428      for (let i = 0; i < kids.length; i++) {
3429        const k = kids[i]!
3430        if (k.type !== 'extglob_pattern') continue
3431        const text = sliceBytes(P, k.startIndex, k.endIndex)
3432        if (/^[-+?*@!][a-zA-Z]/.test(text) && !/[*?(]/.test(text)) {
3433          kids[i] = mk(P, 'word', k.startIndex, k.endIndex, [])
3434        }
3435      }
3436    }
3437    const last = kids[kids.length - 1]!
3438    return mk(P, 'case_item', start, last.endIndex, kids)
3439  }
3440  
3441  function parseCasePattern(P: ParseState): TsNode[] {
3442    skipBlanks(P.L)
3443    const save = saveLex(P.L)
3444    const start = P.L.b
3445    const startI = P.L.i
3446    let parenDepth = 0
3447    let hasDollar = false
3448    let hasBracketOutsideParen = false
3449    let hasQuote = false
3450    while (P.L.i < P.L.len) {
3451      const c = peek(P.L)
3452      if (c === '\\' && P.L.i + 1 < P.L.len) {
3453        // Escaped char — consume both (handles `bar\ baz` as single pattern)
3454        // \<newline> is a line continuation; eat it but stay in pattern.
3455        advance(P.L)
3456        advance(P.L)
3457        continue
3458      }
3459      if (c === '"' || c === "'") {
3460        hasQuote = true
3461        // Skip past the quoted segment so its content (spaces, |, etc.) doesn't
3462        // break the peek-ahead scan.
3463        advance(P.L)
3464        while (P.L.i < P.L.len && peek(P.L) !== c) {
3465          if (peek(P.L) === '\\' && P.L.i + 1 < P.L.len) advance(P.L)
3466          advance(P.L)
3467        }
3468        if (peek(P.L) === c) advance(P.L)
3469        continue
3470      }
3471      // Paren counting: any ( inside pattern opens a scope; don't break at ) or |
3472      // until balanced. Handles extglob *(a|b) and nested shapes *([0-9])([0-9]).
3473      if (c === '(') {
3474        parenDepth++
3475        advance(P.L)
3476        continue
3477      }
3478      if (parenDepth > 0) {
3479        if (c === ')') {
3480          parenDepth--
3481          advance(P.L)
3482          continue
3483        }
3484        if (c === '\n') break
3485        advance(P.L)
3486        continue
3487      }
3488      if (c === ')' || c === '|' || c === ' ' || c === '\t' || c === '\n') break
3489      if (c === '$') hasDollar = true
3490      if (c === '[') hasBracketOutsideParen = true
3491      advance(P.L)
3492    }
3493    if (P.L.b === start) return []
3494    const text = P.src.slice(startI, P.L.i)
3495    const hasExtglobParen = /[*?+@!]\(/.test(text)
3496    // Quoted segments in pattern: tree-sitter splits at quote boundaries into
3497    // multiple sibling nodes. `*"foo"*` → (extglob_pattern)(string)(extglob_pattern).
3498    // Re-scan with a segmenting pass.
3499    if (hasQuote && !hasExtglobParen) {
3500      restoreLex(P.L, save)
3501      return parseCasePatternSegmented(P)
3502    }
3503    // tree-sitter splits patterns with [ or $ into concatenation via word parsing
3504    // UNLESS pattern has extglob parens (those override and emit extglob_pattern).
3505    // `*.[1357]` → concat(word word number word); `${PN}.pot` → concat(expansion word);
3506    // but `*([0-9])` → extglob_pattern (has extglob paren).
3507    if (!hasExtglobParen && (hasDollar || hasBracketOutsideParen)) {
3508      restoreLex(P.L, save)
3509      const w = parseWord(P, 'arg')
3510      return w ? [w] : []
3511    }
3512    // Patterns starting with extglob operator chars (+ - ? * @ !) followed by
3513    // identifier chars are extglob_pattern per tree-sitter, even without parens
3514    // or glob metachars. `-o)` → extglob_pattern; plain `foo)` → word.
3515    const type =
3516      hasExtglobParen || /[*?]/.test(text) || /^[-+?*@!][a-zA-Z]/.test(text)
3517        ? 'extglob_pattern'
3518        : 'word'
3519    return [mk(P, type, start, P.L.b, [])]
3520  }
3521  
3522  // Segmented scan for case patterns containing quotes: `*"foo"*` →
3523  // [extglob_pattern, string, extglob_pattern]. Bare segments → extglob_pattern
3524  // if they have */?, else word. Stops at ) | space tab newline outside quotes.
3525  function parseCasePatternSegmented(P: ParseState): TsNode[] {
3526    const parts: TsNode[] = []
3527    let segStart = P.L.b
3528    let segStartI = P.L.i
3529    const flushSeg = (): void => {
3530      if (P.L.i > segStartI) {
3531        const t = P.src.slice(segStartI, P.L.i)
3532        const type = /[*?]/.test(t) ? 'extglob_pattern' : 'word'
3533        parts.push(mk(P, type, segStart, P.L.b, []))
3534      }
3535    }
3536    while (P.L.i < P.L.len) {
3537      const c = peek(P.L)
3538      if (c === '\\' && P.L.i + 1 < P.L.len) {
3539        advance(P.L)
3540        advance(P.L)
3541        continue
3542      }
3543      if (c === '"') {
3544        flushSeg()
3545        parts.push(parseDoubleQuoted(P))
3546        segStart = P.L.b
3547        segStartI = P.L.i
3548        continue
3549      }
3550      if (c === "'") {
3551        flushSeg()
3552        const tok = nextToken(P.L, 'arg')
3553        parts.push(leaf(P, 'raw_string', tok))
3554        segStart = P.L.b
3555        segStartI = P.L.i
3556        continue
3557      }
3558      if (c === ')' || c === '|' || c === ' ' || c === '\t' || c === '\n') break
3559      advance(P.L)
3560    }
3561    flushSeg()
3562    return parts
3563  }
3564  
3565  function parseFunction(P: ParseState, fnTok: Token): TsNode {
3566    const fnKw = leaf(P, 'function', fnTok)
3567    skipBlanks(P.L)
3568    const nameTok = nextToken(P.L, 'arg')
3569    const name = mk(P, 'word', nameTok.start, nameTok.end, [])
3570    const kids: TsNode[] = [fnKw, name]
3571    skipBlanks(P.L)
3572    if (peek(P.L) === '(' && peek(P.L, 1) === ')') {
3573      const o = nextToken(P.L, 'cmd')
3574      const c = nextToken(P.L, 'cmd')
3575      kids.push(leaf(P, '(', o))
3576      kids.push(leaf(P, ')', c))
3577    }
3578    skipBlanks(P.L)
3579    skipNewlines(P)
3580    const body = parseCommand(P)
3581    if (body) {
3582      // Hoist redirects from redirected_statement(compound_statement, ...) to
3583      // function_definition level per tree-sitter grammar
3584      if (
3585        body.type === 'redirected_statement' &&
3586        body.children.length >= 2 &&
3587        body.children[0]!.type === 'compound_statement'
3588      ) {
3589        kids.push(...body.children)
3590      } else {
3591        kids.push(body)
3592      }
3593    }
3594    const last = kids[kids.length - 1]!
3595    return mk(P, 'function_definition', fnKw.startIndex, last.endIndex, kids)
3596  }
3597  
3598  function parseDeclaration(P: ParseState, kwTok: Token): TsNode {
3599    const kw = leaf(P, kwTok.value, kwTok)
3600    const kids: TsNode[] = [kw]
3601    while (true) {
3602      skipBlanks(P.L)
3603      const c = peek(P.L)
3604      if (
3605        c === '' ||
3606        c === '\n' ||
3607        c === ';' ||
3608        c === '&' ||
3609        c === '|' ||
3610        c === ')' ||
3611        c === '<' ||
3612        c === '>'
3613      ) {
3614        break
3615      }
3616      const a = tryParseAssignment(P)
3617      if (a) {
3618        kids.push(a)
3619        continue
3620      }
3621      // Quoted string or concatenation: `export "FOO=bar"`, `export 'X'`
3622      if (c === '"' || c === "'" || c === '$') {
3623        const w = parseWord(P, 'arg')
3624        if (w) {
3625          kids.push(w)
3626          continue
3627        }
3628        break
3629      }
3630      // Flag like -a or bare variable name
3631      const save = saveLex(P.L)
3632      const tok = nextToken(P.L, 'arg')
3633      if (tok.type === 'WORD' || tok.type === 'NUMBER') {
3634        if (tok.value.startsWith('-')) {
3635          kids.push(leaf(P, 'word', tok))
3636        } else if (isIdentStart(tok.value[0] ?? '')) {
3637          kids.push(mk(P, 'variable_name', tok.start, tok.end, []))
3638        } else {
3639          kids.push(leaf(P, 'word', tok))
3640        }
3641      } else {
3642        restoreLex(P.L, save)
3643        break
3644      }
3645    }
3646    const last = kids[kids.length - 1]!
3647    return mk(P, 'declaration_command', kw.startIndex, last.endIndex, kids)
3648  }
3649  
3650  function parseUnset(P: ParseState, kwTok: Token): TsNode {
3651    const kw = leaf(P, 'unset', kwTok)
3652    const kids: TsNode[] = [kw]
3653    while (true) {
3654      skipBlanks(P.L)
3655      const c = peek(P.L)
3656      if (
3657        c === '' ||
3658        c === '\n' ||
3659        c === ';' ||
3660        c === '&' ||
3661        c === '|' ||
3662        c === ')' ||
3663        c === '<' ||
3664        c === '>'
3665      ) {
3666        break
3667      }
3668      // SECURITY: use parseWord (not raw nextToken) so quoted strings like
3669      // `unset 'a[$(id)]'` emit a raw_string child that ast.ts can reject.
3670      // Previously `break` silently dropped non-WORD args — hiding the
3671      // arithmetic-subscript code-exec vector from the security walker.
3672      const arg = parseWord(P, 'arg')
3673      if (!arg) break
3674      if (arg.type === 'word') {
3675        if (arg.text.startsWith('-')) {
3676          kids.push(arg)
3677        } else {
3678          kids.push(mk(P, 'variable_name', arg.startIndex, arg.endIndex, []))
3679        }
3680      } else {
3681        kids.push(arg)
3682      }
3683    }
3684    const last = kids[kids.length - 1]!
3685    return mk(P, 'unset_command', kw.startIndex, last.endIndex, kids)
3686  }
3687  
3688  function consumeKeyword(P: ParseState, name: string, kids: TsNode[]): void {
3689    skipNewlines(P)
3690    const save = saveLex(P.L)
3691    const t = nextToken(P.L, 'cmd')
3692    if (t.type === 'WORD' && t.value === name) {
3693      kids.push(leaf(P, name, t))
3694    } else {
3695      restoreLex(P.L, save)
3696    }
3697  }
3698  
3699  // ───────────────────── Test & Arithmetic Expressions ─────────────────────
3700  
3701  function parseTestExpr(P: ParseState, closer: string): TsNode | null {
3702    return parseTestOr(P, closer)
3703  }
3704  
3705  function parseTestOr(P: ParseState, closer: string): TsNode | null {
3706    let left = parseTestAnd(P, closer)
3707    if (!left) return null
3708    while (true) {
3709      skipBlanks(P.L)
3710      const save = saveLex(P.L)
3711      if (peek(P.L) === '|' && peek(P.L, 1) === '|') {
3712        const s = P.L.b
3713        advance(P.L)
3714        advance(P.L)
3715        const op = mk(P, '||', s, P.L.b, [])
3716        const right = parseTestAnd(P, closer)
3717        if (!right) {
3718          restoreLex(P.L, save)
3719          break
3720        }
3721        left = mk(P, 'binary_expression', left.startIndex, right.endIndex, [
3722          left,
3723          op,
3724          right,
3725        ])
3726      } else {
3727        break
3728      }
3729    }
3730    return left
3731  }
3732  
3733  function parseTestAnd(P: ParseState, closer: string): TsNode | null {
3734    let left = parseTestUnary(P, closer)
3735    if (!left) return null
3736    while (true) {
3737      skipBlanks(P.L)
3738      if (peek(P.L) === '&' && peek(P.L, 1) === '&') {
3739        const s = P.L.b
3740        advance(P.L)
3741        advance(P.L)
3742        const op = mk(P, '&&', s, P.L.b, [])
3743        const right = parseTestUnary(P, closer)
3744        if (!right) break
3745        left = mk(P, 'binary_expression', left.startIndex, right.endIndex, [
3746          left,
3747          op,
3748          right,
3749        ])
3750      } else {
3751        break
3752      }
3753    }
3754    return left
3755  }
3756  
3757  function parseTestUnary(P: ParseState, closer: string): TsNode | null {
3758    skipBlanks(P.L)
3759    const c = peek(P.L)
3760    if (c === '(') {
3761      const s = P.L.b
3762      advance(P.L)
3763      const open = mk(P, '(', s, P.L.b, [])
3764      const inner = parseTestOr(P, closer)
3765      skipBlanks(P.L)
3766      let close: TsNode
3767      if (peek(P.L) === ')') {
3768        const cs = P.L.b
3769        advance(P.L)
3770        close = mk(P, ')', cs, P.L.b, [])
3771      } else {
3772        close = mk(P, ')', P.L.b, P.L.b, [])
3773      }
3774      const kids = inner ? [open, inner, close] : [open, close]
3775      return mk(
3776        P,
3777        'parenthesized_expression',
3778        open.startIndex,
3779        close.endIndex,
3780        kids,
3781      )
3782    }
3783    return parseTestBinary(P, closer)
3784  }
3785  
3786  /**
3787   * Parse `!`-negated or test-operator (`-f`) or parenthesized primary — but NOT
3788   * a binary comparison. Used as LHS of binary_expression so `! x =~ y` binds
3789   * `!` to `x` only, not the whole `x =~ y`.
3790   */
3791  function parseTestNegatablePrimary(
3792    P: ParseState,
3793    closer: string,
3794  ): TsNode | null {
3795    skipBlanks(P.L)
3796    const c = peek(P.L)
3797    if (c === '!') {
3798      const s = P.L.b
3799      advance(P.L)
3800      const bang = mk(P, '!', s, P.L.b, [])
3801      const inner = parseTestNegatablePrimary(P, closer)
3802      if (!inner) return bang
3803      return mk(P, 'unary_expression', bang.startIndex, inner.endIndex, [
3804        bang,
3805        inner,
3806      ])
3807    }
3808    if (c === '-' && isIdentStart(peek(P.L, 1))) {
3809      const s = P.L.b
3810      advance(P.L)
3811      while (isIdentChar(peek(P.L))) advance(P.L)
3812      const op = mk(P, 'test_operator', s, P.L.b, [])
3813      skipBlanks(P.L)
3814      const arg = parseTestPrimary(P, closer)
3815      if (!arg) return op
3816      return mk(P, 'unary_expression', op.startIndex, arg.endIndex, [op, arg])
3817    }
3818    return parseTestPrimary(P, closer)
3819  }
3820  
3821  function parseTestBinary(P: ParseState, closer: string): TsNode | null {
3822    skipBlanks(P.L)
3823    // `!` in test context binds tighter than =~/==.
3824    // `[[ ! "x" =~ y ]]` → (binary_expression (unary_expression (string)) (regex))
3825    // `[[ ! -f x ]]` → (unary_expression ! (unary_expression (test_operator) (word)))
3826    const left = parseTestNegatablePrimary(P, closer)
3827    if (!left) return null
3828    skipBlanks(P.L)
3829    // Binary comparison: == != =~ -eq -lt etc.
3830    const c = peek(P.L)
3831    const c1 = peek(P.L, 1)
3832    let op: TsNode | null = null
3833    const os = P.L.b
3834    if (c === '=' && c1 === '=') {
3835      advance(P.L)
3836      advance(P.L)
3837      op = mk(P, '==', os, P.L.b, [])
3838    } else if (c === '!' && c1 === '=') {
3839      advance(P.L)
3840      advance(P.L)
3841      op = mk(P, '!=', os, P.L.b, [])
3842    } else if (c === '=' && c1 === '~') {
3843      advance(P.L)
3844      advance(P.L)
3845      op = mk(P, '=~', os, P.L.b, [])
3846    } else if (c === '=' && c1 !== '=') {
3847      advance(P.L)
3848      op = mk(P, '=', os, P.L.b, [])
3849    } else if (c === '<' && c1 !== '<') {
3850      advance(P.L)
3851      op = mk(P, '<', os, P.L.b, [])
3852    } else if (c === '>' && c1 !== '>') {
3853      advance(P.L)
3854      op = mk(P, '>', os, P.L.b, [])
3855    } else if (c === '-' && isIdentStart(c1)) {
3856      advance(P.L)
3857      while (isIdentChar(peek(P.L))) advance(P.L)
3858      op = mk(P, 'test_operator', os, P.L.b, [])
3859    }
3860    if (!op) return left
3861    skipBlanks(P.L)
3862    // In [[ ]], RHS of ==/!=/=/=~ gets special pattern parsing: paren counting
3863    // so @(a|b|c) doesn't break on |, and segments become extglob_pattern/regex.
3864    if (closer === ']]') {
3865      const opText = op.type
3866      if (opText === '=~') {
3867        skipBlanks(P.L)
3868        // If the ENTIRE RHS is a quoted string, emit string/raw_string not
3869        // regex: `[[ "$x" =~ "$y" ]]` → (binary_expression (string) (string)).
3870        // If there's content after the quote (`' boop '(.*)$`), the whole RHS
3871        // stays a single (regex). Peek past the quote to check.
3872        const rc = peek(P.L)
3873        let rhs: TsNode | null = null
3874        if (rc === '"' || rc === "'") {
3875          const save = saveLex(P.L)
3876          const quoted =
3877            rc === '"'
3878              ? parseDoubleQuoted(P)
3879              : leaf(P, 'raw_string', nextToken(P.L, 'arg'))
3880          // Check if RHS ends here: only whitespace then ]] or &&/|| or newline
3881          let j = P.L.i
3882          while (j < P.L.len && (P.src[j] === ' ' || P.src[j] === '\t')) j++
3883          const nc = P.src[j] ?? ''
3884          const nc1 = P.src[j + 1] ?? ''
3885          if (
3886            (nc === ']' && nc1 === ']') ||
3887            (nc === '&' && nc1 === '&') ||
3888            (nc === '|' && nc1 === '|') ||
3889            nc === '\n' ||
3890            nc === ''
3891          ) {
3892            rhs = quoted
3893          } else {
3894            restoreLex(P.L, save)
3895          }
3896        }
3897        if (!rhs) rhs = parseTestRegexRhs(P)
3898        if (!rhs) return left
3899        return mk(P, 'binary_expression', left.startIndex, rhs.endIndex, [
3900          left,
3901          op,
3902          rhs,
3903        ])
3904      }
3905      // Single `=` emits (regex) per tree-sitter; `==` and `!=` emit extglob_pattern
3906      if (opText === '=') {
3907        const rhs = parseTestRegexRhs(P)
3908        if (!rhs) return left
3909        return mk(P, 'binary_expression', left.startIndex, rhs.endIndex, [
3910          left,
3911          op,
3912          rhs,
3913        ])
3914      }
3915      if (opText === '==' || opText === '!=') {
3916        const parts = parseTestExtglobRhs(P)
3917        if (parts.length === 0) return left
3918        const last = parts[parts.length - 1]!
3919        return mk(P, 'binary_expression', left.startIndex, last.endIndex, [
3920          left,
3921          op,
3922          ...parts,
3923        ])
3924      }
3925    }
3926    const right = parseTestPrimary(P, closer)
3927    if (!right) return left
3928    return mk(P, 'binary_expression', left.startIndex, right.endIndex, [
3929      left,
3930      op,
3931      right,
3932    ])
3933  }
3934  
3935  // RHS of =~ in [[ ]] — scan as single (regex) node with paren/bracket counting
3936  // so | ( ) inside the regex don't break parsing. Stop at ]] or ws+&&/||.
3937  function parseTestRegexRhs(P: ParseState): TsNode | null {
3938    skipBlanks(P.L)
3939    const start = P.L.b
3940    let parenDepth = 0
3941    let bracketDepth = 0
3942    while (P.L.i < P.L.len) {
3943      const c = peek(P.L)
3944      if (c === '\\' && P.L.i + 1 < P.L.len) {
3945        advance(P.L)
3946        advance(P.L)
3947        continue
3948      }
3949      if (c === '\n') break
3950      if (parenDepth === 0 && bracketDepth === 0) {
3951        if (c === ']' && peek(P.L, 1) === ']') break
3952        if (c === ' ' || c === '\t') {
3953          // Peek past blanks for ]] or &&/||
3954          let j = P.L.i
3955          while (j < P.L.len && (P.L.src[j] === ' ' || P.L.src[j] === '\t')) j++
3956          const nc = P.L.src[j] ?? ''
3957          const nc1 = P.L.src[j + 1] ?? ''
3958          if (
3959            (nc === ']' && nc1 === ']') ||
3960            (nc === '&' && nc1 === '&') ||
3961            (nc === '|' && nc1 === '|')
3962          ) {
3963            break
3964          }
3965          advance(P.L)
3966          continue
3967        }
3968      }
3969      if (c === '(') parenDepth++
3970      else if (c === ')' && parenDepth > 0) parenDepth--
3971      else if (c === '[') bracketDepth++
3972      else if (c === ']' && bracketDepth > 0) bracketDepth--
3973      advance(P.L)
3974    }
3975    if (P.L.b === start) return null
3976    return mk(P, 'regex', start, P.L.b, [])
3977  }
3978  
3979  // RHS of ==/!=/= in [[ ]] — returns array of parts. Bare text → extglob_pattern
3980  // (with paren counting for @(a|b)); $(...)/${}/quoted → proper node types.
3981  // Multiple parts become flat children of binary_expression per tree-sitter.
3982  function parseTestExtglobRhs(P: ParseState): TsNode[] {
3983    skipBlanks(P.L)
3984    const parts: TsNode[] = []
3985    let segStart = P.L.b
3986    let segStartI = P.L.i
3987    let parenDepth = 0
3988    const flushSeg = () => {
3989      if (P.L.i > segStartI) {
3990        const text = P.src.slice(segStartI, P.L.i)
3991        // Pure number stays number; everything else is extglob_pattern
3992        const type = /^\d+$/.test(text) ? 'number' : 'extglob_pattern'
3993        parts.push(mk(P, type, segStart, P.L.b, []))
3994      }
3995    }
3996    while (P.L.i < P.L.len) {
3997      const c = peek(P.L)
3998      if (c === '\\' && P.L.i + 1 < P.L.len) {
3999        advance(P.L)
4000        advance(P.L)
4001        continue
4002      }
4003      if (c === '\n') break
4004      if (parenDepth === 0) {
4005        if (c === ']' && peek(P.L, 1) === ']') break
4006        if (c === ' ' || c === '\t') {
4007          let j = P.L.i
4008          while (j < P.L.len && (P.L.src[j] === ' ' || P.L.src[j] === '\t')) j++
4009          const nc = P.L.src[j] ?? ''
4010          const nc1 = P.L.src[j + 1] ?? ''
4011          if (
4012            (nc === ']' && nc1 === ']') ||
4013            (nc === '&' && nc1 === '&') ||
4014            (nc === '|' && nc1 === '|')
4015          ) {
4016            break
4017          }
4018          advance(P.L)
4019          continue
4020        }
4021      }
4022      // $ " ' must be parsed even inside @( ) extglob parens — parseDollarLike
4023      // consumes matching ) so parenDepth stays consistent.
4024      if (c === '$') {
4025        const c1 = peek(P.L, 1)
4026        if (
4027          c1 === '(' ||
4028          c1 === '{' ||
4029          isIdentStart(c1) ||
4030          SPECIAL_VARS.has(c1)
4031        ) {
4032          flushSeg()
4033          const exp = parseDollarLike(P)
4034          if (exp) parts.push(exp)
4035          segStart = P.L.b
4036          segStartI = P.L.i
4037          continue
4038        }
4039      }
4040      if (c === '"') {
4041        flushSeg()
4042        parts.push(parseDoubleQuoted(P))
4043        segStart = P.L.b
4044        segStartI = P.L.i
4045        continue
4046      }
4047      if (c === "'") {
4048        flushSeg()
4049        const tok = nextToken(P.L, 'arg')
4050        parts.push(leaf(P, 'raw_string', tok))
4051        segStart = P.L.b
4052        segStartI = P.L.i
4053        continue
4054      }
4055      if (c === '(') parenDepth++
4056      else if (c === ')' && parenDepth > 0) parenDepth--
4057      advance(P.L)
4058    }
4059    flushSeg()
4060    return parts
4061  }
4062  
4063  function parseTestPrimary(P: ParseState, closer: string): TsNode | null {
4064    skipBlanks(P.L)
4065    // Stop at closer
4066    if (closer === ']' && peek(P.L) === ']') return null
4067    if (closer === ']]' && peek(P.L) === ']' && peek(P.L, 1) === ']') return null
4068    return parseWord(P, 'arg')
4069  }
4070  
4071  /**
4072   * Arithmetic context modes:
4073   * - 'var': bare identifiers → variable_name (default, used in $((..)), ((..)))
4074   * - 'word': bare identifiers → word (c-style for head condition/update clauses)
4075   * - 'assign': identifiers with = → variable_assignment (c-style for init clause)
4076   */
4077  type ArithMode = 'var' | 'word' | 'assign'
4078  
4079  /** Operator precedence table (higher = tighter binding). */
4080  const ARITH_PREC: Record<string, number> = {
4081    '=': 2,
4082    '+=': 2,
4083    '-=': 2,
4084    '*=': 2,
4085    '/=': 2,
4086    '%=': 2,
4087    '<<=': 2,
4088    '>>=': 2,
4089    '&=': 2,
4090    '^=': 2,
4091    '|=': 2,
4092    '||': 4,
4093    '&&': 5,
4094    '|': 6,
4095    '^': 7,
4096    '&': 8,
4097    '==': 9,
4098    '!=': 9,
4099    '<': 10,
4100    '>': 10,
4101    '<=': 10,
4102    '>=': 10,
4103    '<<': 11,
4104    '>>': 11,
4105    '+': 12,
4106    '-': 12,
4107    '*': 13,
4108    '/': 13,
4109    '%': 13,
4110    '**': 14,
4111  }
4112  
4113  /** Right-associative operators (assignment and exponent). */
4114  const ARITH_RIGHT_ASSOC = new Set([
4115    '=',
4116    '+=',
4117    '-=',
4118    '*=',
4119    '/=',
4120    '%=',
4121    '<<=',
4122    '>>=',
4123    '&=',
4124    '^=',
4125    '|=',
4126    '**',
4127  ])
4128  
4129  function parseArithExpr(
4130    P: ParseState,
4131    stop: string,
4132    mode: ArithMode = 'var',
4133  ): TsNode | null {
4134    return parseArithTernary(P, stop, mode)
4135  }
4136  
4137  /** Top-level: comma-separated list. arithmetic_expansion emits multiple children. */
4138  function parseArithCommaList(
4139    P: ParseState,
4140    stop: string,
4141    mode: ArithMode = 'var',
4142  ): TsNode[] {
4143    const out: TsNode[] = []
4144    while (true) {
4145      const e = parseArithTernary(P, stop, mode)
4146      if (e) out.push(e)
4147      skipBlanks(P.L)
4148      if (peek(P.L) === ',' && !isArithStop(P, stop)) {
4149        advance(P.L)
4150        continue
4151      }
4152      break
4153    }
4154    return out
4155  }
4156  
4157  function parseArithTernary(
4158    P: ParseState,
4159    stop: string,
4160    mode: ArithMode,
4161  ): TsNode | null {
4162    const cond = parseArithBinary(P, stop, 0, mode)
4163    if (!cond) return null
4164    skipBlanks(P.L)
4165    if (peek(P.L) === '?') {
4166      const qs = P.L.b
4167      advance(P.L)
4168      const q = mk(P, '?', qs, P.L.b, [])
4169      const t = parseArithBinary(P, ':', 0, mode)
4170      skipBlanks(P.L)
4171      let colon: TsNode
4172      if (peek(P.L) === ':') {
4173        const cs = P.L.b
4174        advance(P.L)
4175        colon = mk(P, ':', cs, P.L.b, [])
4176      } else {
4177        colon = mk(P, ':', P.L.b, P.L.b, [])
4178      }
4179      const f = parseArithTernary(P, stop, mode)
4180      const last = f ?? colon
4181      const kids: TsNode[] = [cond, q]
4182      if (t) kids.push(t)
4183      kids.push(colon)
4184      if (f) kids.push(f)
4185      return mk(P, 'ternary_expression', cond.startIndex, last.endIndex, kids)
4186    }
4187    return cond
4188  }
4189  
4190  /** Scan next arithmetic binary operator; returns [text, length] or null. */
4191  function scanArithOp(P: ParseState): [string, number] | null {
4192    const c = peek(P.L)
4193    const c1 = peek(P.L, 1)
4194    const c2 = peek(P.L, 2)
4195    // 3-char: <<= >>=
4196    if (c === '<' && c1 === '<' && c2 === '=') return ['<<=', 3]
4197    if (c === '>' && c1 === '>' && c2 === '=') return ['>>=', 3]
4198    // 2-char
4199    if (c === '*' && c1 === '*') return ['**', 2]
4200    if (c === '<' && c1 === '<') return ['<<', 2]
4201    if (c === '>' && c1 === '>') return ['>>', 2]
4202    if (c === '=' && c1 === '=') return ['==', 2]
4203    if (c === '!' && c1 === '=') return ['!=', 2]
4204    if (c === '<' && c1 === '=') return ['<=', 2]
4205    if (c === '>' && c1 === '=') return ['>=', 2]
4206    if (c === '&' && c1 === '&') return ['&&', 2]
4207    if (c === '|' && c1 === '|') return ['||', 2]
4208    if (c === '+' && c1 === '=') return ['+=', 2]
4209    if (c === '-' && c1 === '=') return ['-=', 2]
4210    if (c === '*' && c1 === '=') return ['*=', 2]
4211    if (c === '/' && c1 === '=') return ['/=', 2]
4212    if (c === '%' && c1 === '=') return ['%=', 2]
4213    if (c === '&' && c1 === '=') return ['&=', 2]
4214    if (c === '^' && c1 === '=') return ['^=', 2]
4215    if (c === '|' && c1 === '=') return ['|=', 2]
4216    // 1-char — but NOT ++ -- (those are pre/postfix)
4217    if (c === '+' && c1 !== '+') return ['+', 1]
4218    if (c === '-' && c1 !== '-') return ['-', 1]
4219    if (c === '*') return ['*', 1]
4220    if (c === '/') return ['/', 1]
4221    if (c === '%') return ['%', 1]
4222    if (c === '<') return ['<', 1]
4223    if (c === '>') return ['>', 1]
4224    if (c === '&') return ['&', 1]
4225    if (c === '|') return ['|', 1]
4226    if (c === '^') return ['^', 1]
4227    if (c === '=') return ['=', 1]
4228    return null
4229  }
4230  
4231  /** Precedence-climbing binary expression parser. */
4232  function parseArithBinary(
4233    P: ParseState,
4234    stop: string,
4235    minPrec: number,
4236    mode: ArithMode,
4237  ): TsNode | null {
4238    let left = parseArithUnary(P, stop, mode)
4239    if (!left) return null
4240    while (true) {
4241      skipBlanks(P.L)
4242      if (isArithStop(P, stop)) break
4243      if (peek(P.L) === ',') break
4244      const opInfo = scanArithOp(P)
4245      if (!opInfo) break
4246      const [opText, opLen] = opInfo
4247      const prec = ARITH_PREC[opText]
4248      if (prec === undefined || prec < minPrec) break
4249      const os = P.L.b
4250      for (let k = 0; k < opLen; k++) advance(P.L)
4251      const op = mk(P, opText, os, P.L.b, [])
4252      const nextMin = ARITH_RIGHT_ASSOC.has(opText) ? prec : prec + 1
4253      const right = parseArithBinary(P, stop, nextMin, mode)
4254      if (!right) break
4255      left = mk(P, 'binary_expression', left.startIndex, right.endIndex, [
4256        left,
4257        op,
4258        right,
4259      ])
4260    }
4261    return left
4262  }
4263  
4264  function parseArithUnary(
4265    P: ParseState,
4266    stop: string,
4267    mode: ArithMode,
4268  ): TsNode | null {
4269    skipBlanks(P.L)
4270    if (isArithStop(P, stop)) return null
4271    const c = peek(P.L)
4272    const c1 = peek(P.L, 1)
4273    // Prefix ++ --
4274    if ((c === '+' && c1 === '+') || (c === '-' && c1 === '-')) {
4275      const s = P.L.b
4276      advance(P.L)
4277      advance(P.L)
4278      const op = mk(P, c + c1, s, P.L.b, [])
4279      const inner = parseArithUnary(P, stop, mode)
4280      if (!inner) return op
4281      return mk(P, 'unary_expression', op.startIndex, inner.endIndex, [op, inner])
4282    }
4283    if (c === '-' || c === '+' || c === '!' || c === '~') {
4284      // In 'word'/'assign' mode (c-style for head), `-N` is a single number
4285      // literal per tree-sitter, not unary_expression. 'var' mode uses unary.
4286      if (mode !== 'var' && c === '-' && isDigit(c1)) {
4287        const s = P.L.b
4288        advance(P.L)
4289        while (isDigit(peek(P.L))) advance(P.L)
4290        return mk(P, 'number', s, P.L.b, [])
4291      }
4292      const s = P.L.b
4293      advance(P.L)
4294      const op = mk(P, c, s, P.L.b, [])
4295      const inner = parseArithUnary(P, stop, mode)
4296      if (!inner) return op
4297      return mk(P, 'unary_expression', op.startIndex, inner.endIndex, [op, inner])
4298    }
4299    return parseArithPostfix(P, stop, mode)
4300  }
4301  
4302  function parseArithPostfix(
4303    P: ParseState,
4304    stop: string,
4305    mode: ArithMode,
4306  ): TsNode | null {
4307    const prim = parseArithPrimary(P, stop, mode)
4308    if (!prim) return null
4309    const c = peek(P.L)
4310    const c1 = peek(P.L, 1)
4311    if ((c === '+' && c1 === '+') || (c === '-' && c1 === '-')) {
4312      const s = P.L.b
4313      advance(P.L)
4314      advance(P.L)
4315      const op = mk(P, c + c1, s, P.L.b, [])
4316      return mk(P, 'postfix_expression', prim.startIndex, op.endIndex, [prim, op])
4317    }
4318    return prim
4319  }
4320  
4321  function parseArithPrimary(
4322    P: ParseState,
4323    stop: string,
4324    mode: ArithMode,
4325  ): TsNode | null {
4326    skipBlanks(P.L)
4327    if (isArithStop(P, stop)) return null
4328    const c = peek(P.L)
4329    if (c === '(') {
4330      const s = P.L.b
4331      advance(P.L)
4332      const open = mk(P, '(', s, P.L.b, [])
4333      // Parenthesized expression may contain comma-separated exprs
4334      const inners = parseArithCommaList(P, ')', mode)
4335      skipBlanks(P.L)
4336      let close: TsNode
4337      if (peek(P.L) === ')') {
4338        const cs = P.L.b
4339        advance(P.L)
4340        close = mk(P, ')', cs, P.L.b, [])
4341      } else {
4342        close = mk(P, ')', P.L.b, P.L.b, [])
4343      }
4344      return mk(P, 'parenthesized_expression', open.startIndex, close.endIndex, [
4345        open,
4346        ...inners,
4347        close,
4348      ])
4349    }
4350    if (c === '"') {
4351      return parseDoubleQuoted(P)
4352    }
4353    if (c === '$') {
4354      return parseDollarLike(P)
4355    }
4356    if (isDigit(c)) {
4357      const s = P.L.b
4358      while (isDigit(peek(P.L))) advance(P.L)
4359      // Hex: 0x1f
4360      if (
4361        P.L.b - s === 1 &&
4362        c === '0' &&
4363        (peek(P.L) === 'x' || peek(P.L) === 'X')
4364      ) {
4365        advance(P.L)
4366        while (isHexDigit(peek(P.L))) advance(P.L)
4367      }
4368      // Base notation: BASE#DIGITS e.g. 2#1010, 16#ff
4369      else if (peek(P.L) === '#') {
4370        advance(P.L)
4371        while (isBaseDigit(peek(P.L))) advance(P.L)
4372      }
4373      return mk(P, 'number', s, P.L.b, [])
4374    }
4375    if (isIdentStart(c)) {
4376      const s = P.L.b
4377      while (isIdentChar(peek(P.L))) advance(P.L)
4378      const nc = peek(P.L)
4379      // Assignment in 'assign' mode (c-style for init): emit variable_assignment
4380      // so chained `a = b = c = 1` nests correctly. Other modes treat `=` as a
4381      // binary_expression operator via the precedence table.
4382      if (mode === 'assign') {
4383        skipBlanks(P.L)
4384        const ac = peek(P.L)
4385        const ac1 = peek(P.L, 1)
4386        if (ac === '=' && ac1 !== '=') {
4387          const vn = mk(P, 'variable_name', s, P.L.b, [])
4388          const es = P.L.b
4389          advance(P.L)
4390          const eq = mk(P, '=', es, P.L.b, [])
4391          // RHS may itself be another assignment (chained)
4392          const val = parseArithTernary(P, stop, mode)
4393          const end = val ? val.endIndex : eq.endIndex
4394          const kids = val ? [vn, eq, val] : [vn, eq]
4395          return mk(P, 'variable_assignment', s, end, kids)
4396        }
4397      }
4398      // Subscript
4399      if (nc === '[') {
4400        const vn = mk(P, 'variable_name', s, P.L.b, [])
4401        const brS = P.L.b
4402        advance(P.L)
4403        const brOpen = mk(P, '[', brS, P.L.b, [])
4404        const idx = parseArithTernary(P, ']', 'var') ?? parseDollarLike(P)
4405        skipBlanks(P.L)
4406        let brClose: TsNode
4407        if (peek(P.L) === ']') {
4408          const cs = P.L.b
4409          advance(P.L)
4410          brClose = mk(P, ']', cs, P.L.b, [])
4411        } else {
4412          brClose = mk(P, ']', P.L.b, P.L.b, [])
4413        }
4414        const kids = idx ? [vn, brOpen, idx, brClose] : [vn, brOpen, brClose]
4415        return mk(P, 'subscript', s, brClose.endIndex, kids)
4416      }
4417      // Bare identifier: variable_name in 'var' mode, word in 'word'/'assign' mode.
4418      // 'assign' mode falls through to word when no `=` follows (c-style for
4419      // cond/update clauses: `c<=5` → binary_expression(word, number)).
4420      const identType = mode === 'var' ? 'variable_name' : 'word'
4421      return mk(P, identType, s, P.L.b, [])
4422    }
4423    return null
4424  }
4425  
4426  function isArithStop(P: ParseState, stop: string): boolean {
4427    const c = peek(P.L)
4428    if (stop === '))') return c === ')' && peek(P.L, 1) === ')'
4429    if (stop === ')') return c === ')'
4430    if (stop === ';') return c === ';'
4431    if (stop === ':') return c === ':'
4432    if (stop === ']') return c === ']'
4433    if (stop === '}') return c === '}'
4434    if (stop === ':}') return c === ':' || c === '}'
4435    return c === '' || c === '\n'
4436  }