/ tools / BashTool / sedValidation.ts
sedValidation.ts
  1  import type { ToolPermissionContext } from '../../Tool.js'
  2  import { splitCommand_DEPRECATED } from '../../utils/bash/commands.js'
  3  import { tryParseShellCommand } from '../../utils/bash/shellQuote.js'
  4  import type { PermissionResult } from '../../utils/permissions/PermissionResult.js'
  5  
  6  /**
  7   * Helper: Validate flags against an allowlist
  8   * Handles both single flags and combined flags (e.g., -nE)
  9   * @param flags Array of flags to validate
 10   * @param allowedFlags Array of allowed single-character and long flags
 11   * @returns true if all flags are valid, false otherwise
 12   */
 13  function validateFlagsAgainstAllowlist(
 14    flags: string[],
 15    allowedFlags: string[],
 16  ): boolean {
 17    for (const flag of flags) {
 18      // Handle combined flags like -nE or -Er
 19      if (flag.startsWith('-') && !flag.startsWith('--') && flag.length > 2) {
 20        // Check each character in combined flag
 21        for (let i = 1; i < flag.length; i++) {
 22          const singleFlag = '-' + flag[i]
 23          if (!allowedFlags.includes(singleFlag)) {
 24            return false
 25          }
 26        }
 27      } else {
 28        // Single flag or long flag
 29        if (!allowedFlags.includes(flag)) {
 30          return false
 31        }
 32      }
 33    }
 34    return true
 35  }
 36  
 37  /**
 38   * Pattern 1: Check if this is a line printing command with -n flag
 39   * Allows: sed -n 'N' | sed -n 'N,M' with optional -E, -r, -z flags
 40   * Allows semicolon-separated print commands like: sed -n '1p;2p;3p'
 41   * File arguments are ALLOWED for this pattern
 42   * @internal Exported for testing
 43   */
 44  export function isLinePrintingCommand(
 45    command: string,
 46    expressions: string[],
 47  ): boolean {
 48    const sedMatch = command.match(/^\s*sed\s+/)
 49    if (!sedMatch) return false
 50  
 51    const withoutSed = command.slice(sedMatch[0].length)
 52    const parseResult = tryParseShellCommand(withoutSed)
 53    if (!parseResult.success) return false
 54    const parsed = parseResult.tokens
 55  
 56    // Extract all flags
 57    const flags: string[] = []
 58    for (const arg of parsed) {
 59      if (typeof arg === 'string' && arg.startsWith('-') && arg !== '--') {
 60        flags.push(arg)
 61      }
 62    }
 63  
 64    // Validate flags - only allow -n, -E, -r, -z and their long forms
 65    const allowedFlags = [
 66      '-n',
 67      '--quiet',
 68      '--silent',
 69      '-E',
 70      '--regexp-extended',
 71      '-r',
 72      '-z',
 73      '--zero-terminated',
 74      '--posix',
 75    ]
 76  
 77    if (!validateFlagsAgainstAllowlist(flags, allowedFlags)) {
 78      return false
 79    }
 80  
 81    // Check if -n flag is present (required for Pattern 1)
 82    let hasNFlag = false
 83    for (const flag of flags) {
 84      if (flag === '-n' || flag === '--quiet' || flag === '--silent') {
 85        hasNFlag = true
 86        break
 87      }
 88      // Check in combined flags
 89      if (flag.startsWith('-') && !flag.startsWith('--') && flag.includes('n')) {
 90        hasNFlag = true
 91        break
 92      }
 93    }
 94  
 95    // Must have -n flag for Pattern 1
 96    if (!hasNFlag) {
 97      return false
 98    }
 99  
100    // Must have at least one expression
101    if (expressions.length === 0) {
102      return false
103    }
104  
105    // All expressions must be print commands (strict allowlist)
106    // Allow semicolon-separated commands
107    for (const expr of expressions) {
108      const commands = expr.split(';')
109      for (const cmd of commands) {
110        if (!isPrintCommand(cmd.trim())) {
111          return false
112        }
113      }
114    }
115  
116    return true
117  }
118  
119  /**
120   * Helper: Check if a single command is a valid print command
121   * STRICT ALLOWLIST - only these exact forms are allowed:
122   * - p (print all)
123   * - Np (print line N, where N is digits)
124   * - N,Mp (print lines N through M)
125   * Anything else (including w, W, e, E commands) is rejected.
126   * @internal Exported for testing
127   */
128  export function isPrintCommand(cmd: string): boolean {
129    if (!cmd) return false
130    // Single strict regex that only matches allowed print commands
131    // ^(?:\d+|\d+,\d+)?p$ matches: p, 1p, 123p, 1,5p, 10,200p
132    return /^(?:\d+|\d+,\d+)?p$/.test(cmd)
133  }
134  
135  /**
136   * Pattern 2: Check if this is a substitution command
137   * Allows: sed 's/pattern/replacement/flags' where flags are only: g, p, i, I, m, M, 1-9
138   * When allowFileWrites is true, allows -i flag and file arguments for in-place editing
139   * When allowFileWrites is false (default), requires stdout-only (no file arguments, no -i flag)
140   * @internal Exported for testing
141   */
142  function isSubstitutionCommand(
143    command: string,
144    expressions: string[],
145    hasFileArguments: boolean,
146    options?: { allowFileWrites?: boolean },
147  ): boolean {
148    const allowFileWrites = options?.allowFileWrites ?? false
149  
150    // When not allowing file writes, must NOT have file arguments
151    if (!allowFileWrites && hasFileArguments) {
152      return false
153    }
154  
155    const sedMatch = command.match(/^\s*sed\s+/)
156    if (!sedMatch) return false
157  
158    const withoutSed = command.slice(sedMatch[0].length)
159    const parseResult = tryParseShellCommand(withoutSed)
160    if (!parseResult.success) return false
161    const parsed = parseResult.tokens
162  
163    // Extract all flags
164    const flags: string[] = []
165    for (const arg of parsed) {
166      if (typeof arg === 'string' && arg.startsWith('-') && arg !== '--') {
167        flags.push(arg)
168      }
169    }
170  
171    // Validate flags based on mode
172    // Base allowed flags for both modes
173    const allowedFlags = ['-E', '--regexp-extended', '-r', '--posix']
174  
175    // When allowing file writes, also permit -i and --in-place
176    if (allowFileWrites) {
177      allowedFlags.push('-i', '--in-place')
178    }
179  
180    if (!validateFlagsAgainstAllowlist(flags, allowedFlags)) {
181      return false
182    }
183  
184    // Must have exactly one expression
185    if (expressions.length !== 1) {
186      return false
187    }
188  
189    const expr = expressions[0]!.trim()
190  
191    // STRICT ALLOWLIST: Must be exactly a substitution command starting with 's'
192    // This rejects standalone commands like 'e', 'w file', etc.
193    if (!expr.startsWith('s')) {
194      return false
195    }
196  
197    // Parse substitution: s/pattern/replacement/flags
198    // Only allow / as delimiter (strict)
199    const substitutionMatch = expr.match(/^s\/(.*?)$/)
200    if (!substitutionMatch) {
201      return false
202    }
203  
204    const rest = substitutionMatch[1]!
205  
206    // Find the positions of / delimiters
207    let delimiterCount = 0
208    let lastDelimiterPos = -1
209    let i = 0
210    while (i < rest.length) {
211      if (rest[i] === '\\') {
212        // Skip escaped character
213        i += 2
214        continue
215      }
216      if (rest[i] === '/') {
217        delimiterCount++
218        lastDelimiterPos = i
219      }
220      i++
221    }
222  
223    // Must have found exactly 2 delimiters (pattern and replacement)
224    if (delimiterCount !== 2) {
225      return false
226    }
227  
228    // Extract flags (everything after the last delimiter)
229    const exprFlags = rest.slice(lastDelimiterPos + 1)
230  
231    // Validate flags: only allow g, p, i, I, m, M, and optionally ONE digit 1-9
232    const allowedFlagChars = /^[gpimIM]*[1-9]?[gpimIM]*$/
233    if (!allowedFlagChars.test(exprFlags)) {
234      return false
235    }
236  
237    return true
238  }
239  
240  /**
241   * Checks if a sed command is allowed by the allowlist.
242   * The allowlist patterns themselves are strict enough to reject dangerous operations.
243   * @param command The sed command to check
244   * @param options.allowFileWrites When true, allows -i flag and file arguments for substitution commands
245   * @returns true if the command is allowed (matches allowlist and passes denylist check), false otherwise
246   */
247  export function sedCommandIsAllowedByAllowlist(
248    command: string,
249    options?: { allowFileWrites?: boolean },
250  ): boolean {
251    const allowFileWrites = options?.allowFileWrites ?? false
252  
253    // Extract sed expressions (content inside quotes where actual sed commands live)
254    let expressions: string[]
255    try {
256      expressions = extractSedExpressions(command)
257    } catch (_error) {
258      // If parsing failed, treat as not allowed
259      return false
260    }
261  
262    // Check if sed command has file arguments
263    const hasFileArguments = hasFileArgs(command)
264  
265    // Check if command matches allowlist patterns
266    let isPattern1 = false
267    let isPattern2 = false
268  
269    if (allowFileWrites) {
270      // When allowing file writes, only check substitution commands (Pattern 2 variant)
271      // Pattern 1 (line printing) doesn't need file writes
272      isPattern2 = isSubstitutionCommand(command, expressions, hasFileArguments, {
273        allowFileWrites: true,
274      })
275    } else {
276      // Standard read-only mode: check both patterns
277      isPattern1 = isLinePrintingCommand(command, expressions)
278      isPattern2 = isSubstitutionCommand(command, expressions, hasFileArguments)
279    }
280  
281    if (!isPattern1 && !isPattern2) {
282      return false
283    }
284  
285    // Pattern 2 does not allow semicolons (command separators)
286    // Pattern 1 allows semicolons for separating print commands
287    for (const expr of expressions) {
288      if (isPattern2 && expr.includes(';')) {
289        return false
290      }
291    }
292  
293    // Defense-in-depth: Even if allowlist matches, check denylist
294    for (const expr of expressions) {
295      if (containsDangerousOperations(expr)) {
296        return false
297      }
298    }
299  
300    return true
301  }
302  
303  /**
304   * Check if a sed command has file arguments (not just stdin)
305   * @internal Exported for testing
306   */
307  export function hasFileArgs(command: string): boolean {
308    const sedMatch = command.match(/^\s*sed\s+/)
309    if (!sedMatch) return false
310  
311    const withoutSed = command.slice(sedMatch[0].length)
312    const parseResult = tryParseShellCommand(withoutSed)
313    if (!parseResult.success) return true
314    const parsed = parseResult.tokens
315  
316    try {
317      let argCount = 0
318      let hasEFlag = false
319  
320      for (let i = 0; i < parsed.length; i++) {
321        const arg = parsed[i]
322  
323        // Handle both string arguments and glob patterns (like *.log)
324        if (typeof arg !== 'string' && typeof arg !== 'object') continue
325  
326        // If it's a glob pattern, it counts as a file argument
327        if (
328          typeof arg === 'object' &&
329          arg !== null &&
330          'op' in arg &&
331          arg.op === 'glob'
332        ) {
333          return true
334        }
335  
336        // Skip non-string arguments that aren't glob patterns
337        if (typeof arg !== 'string') continue
338  
339        // Handle -e flag followed by expression
340        if ((arg === '-e' || arg === '--expression') && i + 1 < parsed.length) {
341          hasEFlag = true
342          i++ // Skip the next argument since it's the expression
343          continue
344        }
345  
346        // Handle --expression=value format
347        if (arg.startsWith('--expression=')) {
348          hasEFlag = true
349          continue
350        }
351  
352        // Handle -e=value format (non-standard but defense in depth)
353        if (arg.startsWith('-e=')) {
354          hasEFlag = true
355          continue
356        }
357  
358        // Skip other flags
359        if (arg.startsWith('-')) continue
360  
361        argCount++
362  
363        // If we used -e flags, ALL non-flag arguments are file arguments
364        if (hasEFlag) {
365          return true
366        }
367  
368        // If we didn't use -e flags, the first non-flag argument is the sed expression,
369        // so we need more than 1 non-flag argument to have file arguments
370        if (argCount > 1) {
371          return true
372        }
373      }
374  
375      return false
376    } catch (_error) {
377      return true // Assume dangerous if parsing fails
378    }
379  }
380  
381  /**
382   * Extract sed expressions from command, ignoring flags and filenames
383   * @param command Full sed command
384   * @returns Array of sed expressions to check for dangerous operations
385   * @throws Error if parsing fails
386   * @internal Exported for testing
387   */
388  export function extractSedExpressions(command: string): string[] {
389    const expressions: string[] = []
390  
391    // Calculate withoutSed by trimming off the first N characters (removing 'sed ')
392    const sedMatch = command.match(/^\s*sed\s+/)
393    if (!sedMatch) return expressions
394  
395    const withoutSed = command.slice(sedMatch[0].length)
396  
397    // Reject dangerous flag combinations like -ew, -eW, -ee, -we (combined -e/-w with dangerous commands)
398    if (/-e[wWe]/.test(withoutSed) || /-w[eE]/.test(withoutSed)) {
399      throw new Error('Dangerous flag combination detected')
400    }
401  
402    // Use shell-quote to parse the arguments properly
403    const parseResult = tryParseShellCommand(withoutSed)
404    if (!parseResult.success) {
405      // Malformed shell syntax - throw error to be caught by caller
406      throw new Error(`Malformed shell syntax: ${parseResult.error}`)
407    }
408    const parsed = parseResult.tokens
409    try {
410      let foundEFlag = false
411      let foundExpression = false
412  
413      for (let i = 0; i < parsed.length; i++) {
414        const arg = parsed[i]
415  
416        // Skip non-string arguments (like control operators)
417        if (typeof arg !== 'string') continue
418  
419        // Handle -e flag followed by expression
420        if ((arg === '-e' || arg === '--expression') && i + 1 < parsed.length) {
421          foundEFlag = true
422          const nextArg = parsed[i + 1]
423          if (typeof nextArg === 'string') {
424            expressions.push(nextArg)
425            i++ // Skip the next argument since we consumed it
426          }
427          continue
428        }
429  
430        // Handle --expression=value format
431        if (arg.startsWith('--expression=')) {
432          foundEFlag = true
433          expressions.push(arg.slice('--expression='.length))
434          continue
435        }
436  
437        // Handle -e=value format (non-standard but defense in depth)
438        if (arg.startsWith('-e=')) {
439          foundEFlag = true
440          expressions.push(arg.slice('-e='.length))
441          continue
442        }
443  
444        // Skip other flags
445        if (arg.startsWith('-')) continue
446  
447        // If we haven't found any -e flags, the first non-flag argument is the sed expression
448        if (!foundEFlag && !foundExpression) {
449          expressions.push(arg)
450          foundExpression = true
451          continue
452        }
453  
454        // If we've already found -e flags or a standalone expression,
455        // remaining non-flag arguments are filenames
456        break
457      }
458    } catch (error) {
459      // If shell-quote parsing fails, treat the sed command as unsafe
460      throw new Error(
461        `Failed to parse sed command: ${error instanceof Error ? error.message : 'Unknown error'}`,
462      )
463    }
464  
465    return expressions
466  }
467  
468  /**
469   * Check if a sed expression contains dangerous operations (denylist)
470   * @param expression Single sed expression (without quotes)
471   * @returns true if dangerous, false if safe
472   */
473  function containsDangerousOperations(expression: string): boolean {
474    const cmd = expression.trim()
475    if (!cmd) return false
476  
477    // CONSERVATIVE REJECTIONS: Broadly reject patterns that could be dangerous
478    // When in doubt, treat as unsafe
479  
480    // Reject non-ASCII characters (Unicode homoglyphs, combining chars, etc.)
481    // Examples: w (fullwidth), ᴡ (small capital), w̃ (combining tilde)
482    // Check for characters outside ASCII range (0x01-0x7F, excluding null byte)
483    // eslint-disable-next-line no-control-regex
484    if (/[^\x01-\x7F]/.test(cmd)) {
485      return true
486    }
487  
488    // Reject curly braces (blocks) - too complex to parse
489    if (cmd.includes('{') || cmd.includes('}')) {
490      return true
491    }
492  
493    // Reject newlines - multi-line commands are too complex
494    if (cmd.includes('\n')) {
495      return true
496    }
497  
498    // Reject comments (# not immediately after s command)
499    // Comments look like: #comment or start with #
500    // Delimiter looks like: s#pattern#replacement#
501    const hashIndex = cmd.indexOf('#')
502    if (hashIndex !== -1 && !(hashIndex > 0 && cmd[hashIndex - 1] === 's')) {
503      return true
504    }
505  
506    // Reject negation operator
507    // Negation can appear: at start (!/pattern/), after address (/pattern/!, 1,10!, $!)
508    // Delimiter looks like: s!pattern!replacement! (has 's' before it)
509    if (/^!/.test(cmd) || /[/\d$]!/.test(cmd)) {
510      return true
511    }
512  
513    // Reject tilde in GNU step address format (digit~digit, ,~digit, or $~digit)
514    // Allow whitespace around tilde
515    if (/\d\s*~\s*\d|,\s*~\s*\d|\$\s*~\s*\d/.test(cmd)) {
516      return true
517    }
518  
519    // Reject comma at start (bare comma is shorthand for 1,$ address range)
520    if (/^,/.test(cmd)) {
521      return true
522    }
523  
524    // Reject comma followed by +/- (GNU offset addresses)
525    if (/,\s*[+-]/.test(cmd)) {
526      return true
527    }
528  
529    // Reject backslash tricks:
530    // 1. s\ (substitution with backslash delimiter)
531    // 2. \X where X could be an alternate delimiter (|, #, %, etc.) - not regex escapes
532    if (/s\\/.test(cmd) || /\\[|#%@]/.test(cmd)) {
533      return true
534    }
535  
536    // Reject escaped slashes followed by w/W (patterns like /\/path\/to\/file/w)
537    if (/\\\/.*[wW]/.test(cmd)) {
538      return true
539    }
540  
541    // Reject malformed/suspicious patterns we don't understand
542    // If there's a slash followed by non-slash chars, then whitespace, then dangerous commands
543    // Examples: /pattern w file, /pattern e cmd, /foo X;w file
544    if (/\/[^/]*\s+[wWeE]/.test(cmd)) {
545      return true
546    }
547  
548    // Reject malformed substitution commands that don't follow normal pattern
549    // Examples: s/foobareoutput.txt (missing delimiters), s/foo/bar//w (extra delimiter)
550    if (/^s\//.test(cmd) && !/^s\/[^/]*\/[^/]*\/[^/]*$/.test(cmd)) {
551      return true
552    }
553  
554    // PARANOID: Reject any command starting with 's' that ends with dangerous chars (w, W, e, E)
555    // and doesn't match our known safe substitution pattern. This catches malformed s commands
556    // with non-slash delimiters that might be trying to use dangerous flags.
557    if (/^s./.test(cmd) && /[wWeE]$/.test(cmd)) {
558      // Check if it's a properly formed substitution (any delimiter, not just /)
559      const properSubst = /^s([^\\\n]).*?\1.*?\1[^wWeE]*$/.test(cmd)
560      if (!properSubst) {
561        return true
562      }
563    }
564  
565    // Check for dangerous write commands
566    // Patterns: [address]w filename, [address]W filename, /pattern/w filename, /pattern/W filename
567    // Simplified to avoid exponential backtracking (CodeQL issue)
568    // Check for w/W in contexts where it would be a command (with optional whitespace)
569    if (
570      /^[wW]\s*\S+/.test(cmd) || // At start: w file
571      /^\d+\s*[wW]\s*\S+/.test(cmd) || // After line number: 1w file or 1 w file
572      /^\$\s*[wW]\s*\S+/.test(cmd) || // After $: $w file or $ w file
573      /^\/[^/]*\/[IMim]*\s*[wW]\s*\S+/.test(cmd) || // After pattern: /pattern/w file
574      /^\d+,\d+\s*[wW]\s*\S+/.test(cmd) || // After range: 1,10w file
575      /^\d+,\$\s*[wW]\s*\S+/.test(cmd) || // After range: 1,$w file
576      /^\/[^/]*\/[IMim]*,\/[^/]*\/[IMim]*\s*[wW]\s*\S+/.test(cmd) // After pattern range: /s/,/e/w file
577    ) {
578      return true
579    }
580  
581    // Check for dangerous execute commands
582    // Patterns: [address]e [command], /pattern/e [command], or commands starting with e
583    // Simplified to avoid exponential backtracking (CodeQL issue)
584    // Check for e in contexts where it would be a command (with optional whitespace)
585    if (
586      /^e/.test(cmd) || // At start: e cmd
587      /^\d+\s*e/.test(cmd) || // After line number: 1e or 1 e
588      /^\$\s*e/.test(cmd) || // After $: $e or $ e
589      /^\/[^/]*\/[IMim]*\s*e/.test(cmd) || // After pattern: /pattern/e
590      /^\d+,\d+\s*e/.test(cmd) || // After range: 1,10e
591      /^\d+,\$\s*e/.test(cmd) || // After range: 1,$e
592      /^\/[^/]*\/[IMim]*,\/[^/]*\/[IMim]*\s*e/.test(cmd) // After pattern range: /s/,/e/e
593    ) {
594      return true
595    }
596  
597    // Check for substitution commands with dangerous flags
598    // Pattern: s<delim>pattern<delim>replacement<delim>flags where flags contain w or e
599    // Per POSIX, sed allows any character except backslash and newline as delimiter
600    const substitutionMatch = cmd.match(/s([^\\\n]).*?\1.*?\1(.*?)$/)
601    if (substitutionMatch) {
602      const flags = substitutionMatch[2] || ''
603  
604      // Check for write flag: s/old/new/w filename or s/old/new/gw filename
605      if (flags.includes('w') || flags.includes('W')) {
606        return true
607      }
608  
609      // Check for execute flag: s/old/new/e or s/old/new/ge
610      if (flags.includes('e') || flags.includes('E')) {
611        return true
612      }
613    }
614  
615    // Check for y (transliterate) command followed by dangerous operations
616    // Pattern: y<delim>source<delim>dest<delim> followed by anything
617    // The y command uses same delimiter syntax as s command
618    // PARANOID: Reject any y command that has w/W/e/E anywhere after the delimiters
619    const yCommandMatch = cmd.match(/y([^\\\n])/)
620    if (yCommandMatch) {
621      // If we see a y command, check if there's any w, W, e, or E in the entire command
622      // This is paranoid but safe - y commands are rare and w/e after y is suspicious
623      if (/[wWeE]/.test(cmd)) {
624        return true
625      }
626    }
627  
628    return false
629  }
630  
631  /**
632   * Cross-cutting validation step for sed commands.
633   *
634   * This is a constraint check that blocks dangerous sed operations regardless of mode.
635   * It returns 'passthrough' for non-sed commands or safe sed commands,
636   * and 'ask' for dangerous sed operations (w/W/e/E commands).
637   *
638   * @param input - Object containing the command string
639   * @param toolPermissionContext - Context containing mode and permissions
640   * @returns
641   * - 'ask' if any sed command contains dangerous operations
642   * - 'passthrough' if no sed commands or all are safe
643   */
644  export function checkSedConstraints(
645    input: { command: string },
646    toolPermissionContext: ToolPermissionContext,
647  ): PermissionResult {
648    const commands = splitCommand_DEPRECATED(input.command)
649  
650    for (const cmd of commands) {
651      // Skip non-sed commands
652      const trimmed = cmd.trim()
653      const baseCmd = trimmed.split(/\s+/)[0]
654      if (baseCmd !== 'sed') {
655        continue
656      }
657  
658      // In acceptEdits mode, allow file writes (-i flag) but still block dangerous operations
659      const allowFileWrites = toolPermissionContext.mode === 'acceptEdits'
660  
661      const isAllowed = sedCommandIsAllowedByAllowlist(trimmed, {
662        allowFileWrites,
663      })
664  
665      if (!isAllowed) {
666        return {
667          behavior: 'ask',
668          message:
669            'sed command requires approval (contains potentially dangerous operations)',
670          decisionReason: {
671            type: 'other',
672            reason:
673              'sed command contains operations that require explicit approval (e.g., write commands, execute commands)',
674          },
675        }
676      }
677    }
678  
679    // No dangerous sed commands found (or no sed commands at all)
680    return {
681      behavior: 'passthrough',
682      message: 'No dangerous sed operations detected',
683    }
684  }