/ utils / bash / ParsedCommand.ts
ParsedCommand.ts
  1  import memoize from 'lodash-es/memoize.js'
  2  import {
  3    extractOutputRedirections,
  4    splitCommandWithOperators,
  5  } from './commands.js'
  6  import type { Node } from './parser.js'
  7  import {
  8    analyzeCommand,
  9    type TreeSitterAnalysis,
 10  } from './treeSitterAnalysis.js'
 11  
 12  export type OutputRedirection = {
 13    target: string
 14    operator: '>' | '>>'
 15  }
 16  
 17  /**
 18   * Interface for parsed command implementations.
 19   * Both tree-sitter and regex fallback implementations conform to this.
 20   */
 21  export interface IParsedCommand {
 22    readonly originalCommand: string
 23    toString(): string
 24    getPipeSegments(): string[]
 25    withoutOutputRedirections(): string
 26    getOutputRedirections(): OutputRedirection[]
 27    /**
 28     * Returns tree-sitter analysis data if available.
 29     * Returns null for the regex fallback implementation.
 30     */
 31    getTreeSitterAnalysis(): TreeSitterAnalysis | null
 32  }
 33  
 34  /**
 35   * @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is
 36   * unavailable. The primary gate is parseForSecurity (ast.ts).
 37   *
 38   * Regex-based fallback implementation using shell-quote parser.
 39   * Used when tree-sitter is not available.
 40   * Exported for testing purposes.
 41   */
 42  export class RegexParsedCommand_DEPRECATED implements IParsedCommand {
 43    readonly originalCommand: string
 44  
 45    constructor(command: string) {
 46      this.originalCommand = command
 47    }
 48  
 49    toString(): string {
 50      return this.originalCommand
 51    }
 52  
 53    getPipeSegments(): string[] {
 54      try {
 55        const parts = splitCommandWithOperators(this.originalCommand)
 56        const segments: string[] = []
 57        let currentSegment: string[] = []
 58  
 59        for (const part of parts) {
 60          if (part === '|') {
 61            if (currentSegment.length > 0) {
 62              segments.push(currentSegment.join(' '))
 63              currentSegment = []
 64            }
 65          } else {
 66            currentSegment.push(part)
 67          }
 68        }
 69  
 70        if (currentSegment.length > 0) {
 71          segments.push(currentSegment.join(' '))
 72        }
 73  
 74        return segments.length > 0 ? segments : [this.originalCommand]
 75      } catch {
 76        return [this.originalCommand]
 77      }
 78    }
 79  
 80    withoutOutputRedirections(): string {
 81      if (!this.originalCommand.includes('>')) {
 82        return this.originalCommand
 83      }
 84      const { commandWithoutRedirections, redirections } =
 85        extractOutputRedirections(this.originalCommand)
 86      return redirections.length > 0
 87        ? commandWithoutRedirections
 88        : this.originalCommand
 89    }
 90  
 91    getOutputRedirections(): OutputRedirection[] {
 92      const { redirections } = extractOutputRedirections(this.originalCommand)
 93      return redirections
 94    }
 95  
 96    getTreeSitterAnalysis(): TreeSitterAnalysis | null {
 97      return null
 98    }
 99  }
100  
101  type RedirectionNode = OutputRedirection & {
102    startIndex: number
103    endIndex: number
104  }
105  
106  function visitNodes(node: Node, visitor: (node: Node) => void): void {
107    visitor(node)
108    for (const child of node.children) {
109      visitNodes(child, visitor)
110    }
111  }
112  
113  function extractPipePositions(rootNode: Node): number[] {
114    const pipePositions: number[] = []
115    visitNodes(rootNode, node => {
116      if (node.type === 'pipeline') {
117        for (const child of node.children) {
118          if (child.type === '|') {
119            pipePositions.push(child.startIndex)
120          }
121        }
122      }
123    })
124    // visitNodes is depth-first. For `a | b && c | d`, the outer `list` nests
125    // the second pipeline as a sibling of the first, so the outer `|` is
126    // visited before the inner one — positions arrive out of order.
127    // getPipeSegments iterates them to slice left-to-right, so sort here.
128    return pipePositions.sort((a, b) => a - b)
129  }
130  
131  function extractRedirectionNodes(rootNode: Node): RedirectionNode[] {
132    const redirections: RedirectionNode[] = []
133    visitNodes(rootNode, node => {
134      if (node.type === 'file_redirect') {
135        const children = node.children
136        const op = children.find(c => c.type === '>' || c.type === '>>')
137        const target = children.find(c => c.type === 'word')
138        if (op && target) {
139          redirections.push({
140            startIndex: node.startIndex,
141            endIndex: node.endIndex,
142            target: target.text,
143            operator: op.type as '>' | '>>',
144          })
145        }
146      }
147    })
148    return redirections
149  }
150  
151  class TreeSitterParsedCommand implements IParsedCommand {
152    readonly originalCommand: string
153    // Tree-sitter's startIndex/endIndex are UTF-8 byte offsets, but JS
154    // String.slice() uses UTF-16 code-unit indices. For ASCII they coincide;
155    // for multi-byte code points (e.g. `—` U+2014: 3 UTF-8 bytes, 1 code unit)
156    // they diverge and slicing the string directly lands mid-token. Slicing
157    // the UTF-8 Buffer with tree-sitter's byte offsets and decoding back to
158    // string is correct regardless of code-point width.
159    private readonly commandBytes: Buffer
160    private readonly pipePositions: number[]
161    private readonly redirectionNodes: RedirectionNode[]
162    private readonly treeSitterAnalysis: TreeSitterAnalysis
163  
164    constructor(
165      command: string,
166      pipePositions: number[],
167      redirectionNodes: RedirectionNode[],
168      treeSitterAnalysis: TreeSitterAnalysis,
169    ) {
170      this.originalCommand = command
171      this.commandBytes = Buffer.from(command, 'utf8')
172      this.pipePositions = pipePositions
173      this.redirectionNodes = redirectionNodes
174      this.treeSitterAnalysis = treeSitterAnalysis
175    }
176  
177    toString(): string {
178      return this.originalCommand
179    }
180  
181    getPipeSegments(): string[] {
182      if (this.pipePositions.length === 0) {
183        return [this.originalCommand]
184      }
185  
186      const segments: string[] = []
187      let currentStart = 0
188  
189      for (const pipePos of this.pipePositions) {
190        const segment = this.commandBytes
191          .subarray(currentStart, pipePos)
192          .toString('utf8')
193          .trim()
194        if (segment) {
195          segments.push(segment)
196        }
197        currentStart = pipePos + 1
198      }
199  
200      const lastSegment = this.commandBytes
201        .subarray(currentStart)
202        .toString('utf8')
203        .trim()
204      if (lastSegment) {
205        segments.push(lastSegment)
206      }
207  
208      return segments
209    }
210  
211    withoutOutputRedirections(): string {
212      if (this.redirectionNodes.length === 0) return this.originalCommand
213  
214      const sorted = [...this.redirectionNodes].sort(
215        (a, b) => b.startIndex - a.startIndex,
216      )
217  
218      let result = this.commandBytes
219      for (const redir of sorted) {
220        result = Buffer.concat([
221          result.subarray(0, redir.startIndex),
222          result.subarray(redir.endIndex),
223        ])
224      }
225      return result.toString('utf8').trim().replace(/\s+/g, ' ')
226    }
227  
228    getOutputRedirections(): OutputRedirection[] {
229      return this.redirectionNodes.map(({ target, operator }) => ({
230        target,
231        operator,
232      }))
233    }
234  
235    getTreeSitterAnalysis(): TreeSitterAnalysis {
236      return this.treeSitterAnalysis
237    }
238  }
239  
240  const getTreeSitterAvailable = memoize(async (): Promise<boolean> => {
241    try {
242      const { parseCommand } = await import('./parser.js')
243      const testResult = await parseCommand('echo test')
244      return testResult !== null
245    } catch {
246      return false
247    }
248  })
249  
250  /**
251   * Build a TreeSitterParsedCommand from a pre-parsed AST root. Lets callers
252   * that already have the tree skip the redundant native.parse that
253   * ParsedCommand.parse would do.
254   */
255  export function buildParsedCommandFromRoot(
256    command: string,
257    root: Node,
258  ): IParsedCommand {
259    const pipePositions = extractPipePositions(root)
260    const redirectionNodes = extractRedirectionNodes(root)
261    const analysis = analyzeCommand(root, command)
262    return new TreeSitterParsedCommand(
263      command,
264      pipePositions,
265      redirectionNodes,
266      analysis,
267    )
268  }
269  
270  async function doParse(command: string): Promise<IParsedCommand | null> {
271    if (!command) return null
272  
273    const treeSitterAvailable = await getTreeSitterAvailable()
274    if (treeSitterAvailable) {
275      try {
276        const { parseCommand } = await import('./parser.js')
277        const data = await parseCommand(command)
278        if (data) {
279          // Native NAPI parser returns plain JS objects (no WASM handles);
280          // nothing to free — extract directly.
281          return buildParsedCommandFromRoot(command, data.rootNode)
282        }
283      } catch {
284        // Fall through to regex implementation
285      }
286    }
287  
288    // Fallback to regex implementation
289    return new RegexParsedCommand_DEPRECATED(command)
290  }
291  
292  // Single-entry cache: legacy callers (bashCommandIsSafeAsync,
293  // buildSegmentWithoutRedirections) may call ParsedCommand.parse repeatedly
294  // with the same command string. Each parse() is ~1 native.parse + ~6 tree
295  // walks, so caching the most recent command skips the redundant work.
296  // Size-1 bound avoids leaking TreeSitterParsedCommand instances.
297  let lastCmd: string | undefined
298  let lastResult: Promise<IParsedCommand | null> | undefined
299  
300  /**
301   * ParsedCommand provides methods for working with shell commands.
302   * Uses tree-sitter when available for quote-aware parsing,
303   * falls back to regex-based parsing otherwise.
304   */
305  export const ParsedCommand = {
306    /**
307     * Parse a command string and return a ParsedCommand instance.
308     * Returns null if parsing fails completely.
309     */
310    parse(command: string): Promise<IParsedCommand | null> {
311      if (command === lastCmd && lastResult !== undefined) {
312        return lastResult
313      }
314      lastCmd = command
315      lastResult = doParse(command)
316      return lastResult
317    },
318  }