/ utils / bash / bashPipeCommand.ts
bashPipeCommand.ts
  1  import {
  2    hasMalformedTokens,
  3    hasShellQuoteSingleQuoteBug,
  4    type ParseEntry,
  5    quote,
  6    tryParseShellCommand,
  7  } from './shellQuote.js'
  8  
  9  /**
 10   * Rearranges a command with pipes to place stdin redirect after the first command.
 11   * This fixes an issue where eval treats the entire piped command as a single unit,
 12   * causing the stdin redirect to apply to eval itself rather than the first command.
 13   */
 14  export function rearrangePipeCommand(command: string): string {
 15    // Skip if command has backticks - shell-quote doesn't handle them well
 16    if (command.includes('`')) {
 17      return quoteWithEvalStdinRedirect(command)
 18    }
 19  
 20    // Skip if command has command substitution - shell-quote parses $() incorrectly,
 21    // treating ( and ) as separate operators instead of recognizing command substitution
 22    if (command.includes('$(')) {
 23      return quoteWithEvalStdinRedirect(command)
 24    }
 25  
 26    // Skip if command references shell variables ($VAR, ${VAR}). shell-quote's parse()
 27    // expands these to empty string when no env is passed, silently dropping the
 28    // reference. Even if we preserved the token via an env function, quote() would
 29    // then escape the $ during rebuild, preventing runtime expansion. See #9732.
 30    if (/\$[A-Za-z_{]/.test(command)) {
 31      return quoteWithEvalStdinRedirect(command)
 32    }
 33  
 34    // Skip if command contains bash control structures (for/while/until/if/case/select)
 35    // shell-quote cannot parse these correctly and will incorrectly find pipes inside
 36    // the control structure body, breaking the command when rearranged
 37    if (containsControlStructure(command)) {
 38      return quoteWithEvalStdinRedirect(command)
 39    }
 40  
 41    // Join continuation lines before parsing: shell-quote doesn't handle \<newline>
 42    // and produces empty string tokens for each occurrence, causing spurious empty
 43    // arguments in the reconstructed command
 44    const joined = joinContinuationLines(command)
 45  
 46    // shell-quote treats bare newlines as whitespace, not command separators.
 47    // Parsing+rebuilding 'cmd1 | head\ncmd2 | grep' yields 'cmd1 | head cmd2 | grep',
 48    // silently merging pipelines. Line-continuation (\<newline>) is already stripped
 49    // above; any remaining newline is a real separator. Bail to the eval fallback,
 50    // which preserves the newline inside a single-quoted arg. See #32515.
 51    if (joined.includes('\n')) {
 52      return quoteWithEvalStdinRedirect(command)
 53    }
 54  
 55    // SECURITY: shell-quote treats \' inside single quotes as an escape, but
 56    // bash treats it as literal \ followed by a closing quote. The pattern
 57    // '\' <payload> '\' makes shell-quote merge <payload> into the quoted
 58    // string, hiding operators like ; from the token stream. Rebuilding from
 59    // that merged token can expose the operators when bash re-parses.
 60    if (hasShellQuoteSingleQuoteBug(joined)) {
 61      return quoteWithEvalStdinRedirect(command)
 62    }
 63  
 64    const parseResult = tryParseShellCommand(joined)
 65  
 66    // If parsing fails (malformed syntax), fall back to quoting the whole command
 67    if (!parseResult.success) {
 68      return quoteWithEvalStdinRedirect(command)
 69    }
 70  
 71    const parsed = parseResult.tokens
 72  
 73    // SECURITY: shell-quote tokenizes differently from bash. Input like
 74    // `echo {"hi":\"hi;calc.exe"}` is a bash syntax error (unbalanced quote),
 75    // but shell-quote parses it into tokens with `;` as an operator and
 76    // `calc.exe` as a separate word. Rebuilding from those tokens produces
 77    // valid bash that executes `calc.exe` — turning a syntax error into an
 78    // injection. Unbalanced delimiters in a string token signal this
 79    // misparsing; fall back to whole-command quoting, which preserves the
 80    // original (bash then rejects it with the same syntax error it would have
 81    // raised without us).
 82    if (hasMalformedTokens(joined, parsed)) {
 83      return quoteWithEvalStdinRedirect(command)
 84    }
 85  
 86    const firstPipeIndex = findFirstPipeOperator(parsed)
 87  
 88    if (firstPipeIndex <= 0) {
 89      return quoteWithEvalStdinRedirect(command)
 90    }
 91  
 92    // Rebuild: first_command < /dev/null | rest_of_pipeline
 93    const parts = [
 94      ...buildCommandParts(parsed, 0, firstPipeIndex),
 95      '< /dev/null',
 96      ...buildCommandParts(parsed, firstPipeIndex, parsed.length),
 97    ]
 98  
 99    return singleQuoteForEval(parts.join(' '))
100  }
101  
102  /**
103   * Finds the index of the first pipe operator in parsed shell command
104   */
105  function findFirstPipeOperator(parsed: ParseEntry[]): number {
106    for (let i = 0; i < parsed.length; i++) {
107      const entry = parsed[i]
108      if (isOperator(entry, '|')) {
109        return i
110      }
111    }
112    return -1
113  }
114  
115  /**
116   * Builds command parts from parsed entries, handling strings and operators.
117   * Special handling for file descriptor redirections to preserve them as single units.
118   */
119  function buildCommandParts(
120    parsed: ParseEntry[],
121    start: number,
122    end: number,
123  ): string[] {
124    const parts: string[] = []
125    // Track if we've seen a non-env-var string token yet
126    // Environment variables are only valid at the start of a command
127    let seenNonEnvVar = false
128  
129    for (let i = start; i < end; i++) {
130      const entry = parsed[i]
131  
132      // Check for file descriptor redirections (e.g., 2>&1, 2>/dev/null)
133      if (
134        typeof entry === 'string' &&
135        /^[012]$/.test(entry) &&
136        i + 2 < end &&
137        isOperator(parsed[i + 1])
138      ) {
139        const op = parsed[i + 1] as { op: string }
140        const target = parsed[i + 2]
141  
142        // Handle 2>&1 style redirections
143        if (
144          op.op === '>&' &&
145          typeof target === 'string' &&
146          /^[012]$/.test(target)
147        ) {
148          parts.push(`${entry}>&${target}`)
149          i += 2
150          continue
151        }
152  
153        // Handle 2>/dev/null style redirections
154        if (op.op === '>' && target === '/dev/null') {
155          parts.push(`${entry}>/dev/null`)
156          i += 2
157          continue
158        }
159  
160        // Handle 2> &1 style (space between > and &1)
161        if (
162          op.op === '>' &&
163          typeof target === 'string' &&
164          target.startsWith('&')
165        ) {
166          const fd = target.slice(1)
167          if (/^[012]$/.test(fd)) {
168            parts.push(`${entry}>&${fd}`)
169            i += 2
170            continue
171          }
172        }
173      }
174  
175      // Handle regular entries
176      if (typeof entry === 'string') {
177        // Environment variable assignments are only valid at the start of a command,
178        // before any non-env-var tokens (the actual command and its arguments)
179        const isEnvVar = !seenNonEnvVar && isEnvironmentVariableAssignment(entry)
180  
181        if (isEnvVar) {
182          // For env var assignments, we need to preserve the = but quote the value if needed
183          // Split into name and value parts
184          const eqIndex = entry.indexOf('=')
185          const name = entry.slice(0, eqIndex)
186          const value = entry.slice(eqIndex + 1)
187  
188          // Quote the value part to handle spaces and special characters
189          const quotedValue = quote([value])
190          parts.push(`${name}=${quotedValue}`)
191        } else {
192          // Once we see a non-env-var string, all subsequent strings are arguments
193          seenNonEnvVar = true
194          parts.push(quote([entry]))
195        }
196      } else if (isOperator(entry)) {
197        // Special handling for glob operators
198        if (entry.op === 'glob' && 'pattern' in entry) {
199          // Don't quote glob patterns - they need to remain as-is for shell expansion
200          parts.push(entry.pattern as string)
201        } else {
202          parts.push(entry.op)
203          // Reset after command separators - the next command can have its own env vars
204          if (isCommandSeparator(entry.op)) {
205            seenNonEnvVar = false
206          }
207        }
208      }
209    }
210  
211    return parts
212  }
213  
214  /**
215   * Checks if a string is an environment variable assignment (VAR=value)
216   * Environment variable names must start with letter or underscore,
217   * followed by letters, numbers, or underscores
218   */
219  function isEnvironmentVariableAssignment(str: string): boolean {
220    return /^[A-Za-z_][A-Za-z0-9_]*=/.test(str)
221  }
222  
223  /**
224   * Checks if an operator is a command separator that starts a new command context.
225   * After these operators, environment variable assignments are valid again.
226   */
227  function isCommandSeparator(op: string): boolean {
228    return op === '&&' || op === '||' || op === ';'
229  }
230  
231  /**
232   * Type guard to check if a parsed entry is an operator
233   */
234  function isOperator(entry: unknown, op?: string): entry is { op: string } {
235    if (!entry || typeof entry !== 'object' || !('op' in entry)) {
236      return false
237    }
238    return op ? entry.op === op : true
239  }
240  
241  /**
242   * Checks if a command contains bash control structures that shell-quote cannot parse.
243   * These include for/while/until/if/case/select loops and conditionals.
244   * We match keywords followed by whitespace to avoid false positives with commands
245   * or arguments that happen to contain these words.
246   */
247  function containsControlStructure(command: string): boolean {
248    return /\b(for|while|until|if|case|select)\s/.test(command)
249  }
250  
251  /**
252   * Quotes a command and adds `< /dev/null` as a shell redirect on eval, rather than
253   * as an eval argument. This is critical for pipe commands where we can't parse the
254   * pipe boundary (e.g., commands with $(), backticks, or control structures).
255   *
256   * Using `singleQuoteForEval(cmd) + ' < /dev/null'` produces: eval 'cmd' < /dev/null
257   *   → eval's stdin is /dev/null, eval evaluates 'cmd', pipes inside work correctly
258   *
259   * The previous approach `quote([cmd, '<', '/dev/null'])` produced: eval 'cmd' \< /dev/null
260   *   → eval concatenates args to 'cmd < /dev/null', redirect applies to LAST pipe command
261   */
262  function quoteWithEvalStdinRedirect(command: string): string {
263    return singleQuoteForEval(command) + ' < /dev/null'
264  }
265  
266  /**
267   * Single-quote a string for use as an eval argument. Escapes embedded single
268   * quotes via '"'"' (close-sq, literal-sq-in-dq, reopen-sq). Used instead of
269   * shell-quote's quote() which switches to double-quote mode when the input
270   * contains single quotes and then escapes ! -> \!, corrupting jq/awk filters
271   * like `select(.x != .y)` into `select(.x \!= .y)`.
272   */
273  function singleQuoteForEval(s: string): string {
274    return "'" + s.replace(/'/g, `'"'"'`) + "'"
275  }
276  
277  /**
278   * Joins shell continuation lines (backslash-newline) into a single line.
279   * Only joins when there's an odd number of backslashes before the newline
280   * (the last one escapes the newline). Even backslashes pair up as escape
281   * sequences and the newline remains a separator.
282   */
283  function joinContinuationLines(command: string): string {
284    return command.replace(/\\+\n/g, match => {
285      const backslashCount = match.length - 1 // -1 for the newline
286      if (backslashCount % 2 === 1) {
287        // Odd number: last backslash escapes the newline (line continuation)
288        return '\\'.repeat(backslashCount - 1)
289      } else {
290        // Even number: all pair up, newline is a real separator
291        return match
292      }
293    })
294  }