/ tools / BashTool / sedEditParser.ts
sedEditParser.ts
  1  /**
  2   * Parser for sed edit commands (-i flag substitutions)
  3   * Extracts file paths and substitution patterns to enable file-edit-style rendering
  4   */
  5  
  6  import { randomBytes } from 'crypto'
  7  import { tryParseShellCommand } from '../../utils/bash/shellQuote.js'
  8  
  9  // BRE→ERE conversion placeholders (null-byte sentinels, never appear in user input)
 10  const BACKSLASH_PLACEHOLDER = '\x00BACKSLASH\x00'
 11  const PLUS_PLACEHOLDER = '\x00PLUS\x00'
 12  const QUESTION_PLACEHOLDER = '\x00QUESTION\x00'
 13  const PIPE_PLACEHOLDER = '\x00PIPE\x00'
 14  const LPAREN_PLACEHOLDER = '\x00LPAREN\x00'
 15  const RPAREN_PLACEHOLDER = '\x00RPAREN\x00'
 16  const BACKSLASH_PLACEHOLDER_RE = new RegExp(BACKSLASH_PLACEHOLDER, 'g')
 17  const PLUS_PLACEHOLDER_RE = new RegExp(PLUS_PLACEHOLDER, 'g')
 18  const QUESTION_PLACEHOLDER_RE = new RegExp(QUESTION_PLACEHOLDER, 'g')
 19  const PIPE_PLACEHOLDER_RE = new RegExp(PIPE_PLACEHOLDER, 'g')
 20  const LPAREN_PLACEHOLDER_RE = new RegExp(LPAREN_PLACEHOLDER, 'g')
 21  const RPAREN_PLACEHOLDER_RE = new RegExp(RPAREN_PLACEHOLDER, 'g')
 22  
 23  export type SedEditInfo = {
 24    /** The file path being edited */
 25    filePath: string
 26    /** The search pattern (regex) */
 27    pattern: string
 28    /** The replacement string */
 29    replacement: string
 30    /** Substitution flags (g, i, etc.) */
 31    flags: string
 32    /** Whether to use extended regex (-E or -r flag) */
 33    extendedRegex: boolean
 34  }
 35  
 36  /**
 37   * Check if a command is a sed in-place edit command
 38   * Returns true only for simple sed -i 's/pattern/replacement/flags' file commands
 39   */
 40  export function isSedInPlaceEdit(command: string): boolean {
 41    const info = parseSedEditCommand(command)
 42    return info !== null
 43  }
 44  
 45  /**
 46   * Parse a sed edit command and extract the edit information
 47   * Returns null if the command is not a valid sed in-place edit
 48   */
 49  export function parseSedEditCommand(command: string): SedEditInfo | null {
 50    const trimmed = command.trim()
 51  
 52    // Must start with sed
 53    const sedMatch = trimmed.match(/^\s*sed\s+/)
 54    if (!sedMatch) return null
 55  
 56    const withoutSed = trimmed.slice(sedMatch[0].length)
 57    const parseResult = tryParseShellCommand(withoutSed)
 58    if (!parseResult.success) return null
 59    const tokens = parseResult.tokens
 60  
 61    // Extract string tokens only
 62    const args: string[] = []
 63    for (const token of tokens) {
 64      if (typeof token === 'string') {
 65        args.push(token)
 66      } else if (
 67        typeof token === 'object' &&
 68        token !== null &&
 69        'op' in token &&
 70        token.op === 'glob'
 71      ) {
 72        // Glob patterns are too complex for this simple parser
 73        return null
 74      }
 75    }
 76  
 77    // Parse flags and arguments
 78    let hasInPlaceFlag = false
 79    let extendedRegex = false
 80    let expression: string | null = null
 81    let filePath: string | null = null
 82  
 83    let i = 0
 84    while (i < args.length) {
 85      const arg = args[i]!
 86  
 87      // Handle -i flag (with or without backup suffix)
 88      if (arg === '-i' || arg === '--in-place') {
 89        hasInPlaceFlag = true
 90        i++
 91        // On macOS, -i requires a suffix argument (even if empty string)
 92        // Check if next arg looks like a backup suffix (empty, or starts with dot)
 93        // Don't consume flags (-E, -r) or sed expressions (starting with s, y, d)
 94        if (i < args.length) {
 95          const nextArg = args[i]
 96          // If next arg is empty string or starts with dot, it's a backup suffix
 97          if (
 98            typeof nextArg === 'string' &&
 99            !nextArg.startsWith('-') &&
100            (nextArg === '' || nextArg.startsWith('.'))
101          ) {
102            i++ // Skip the backup suffix
103          }
104        }
105        continue
106      }
107      if (arg.startsWith('-i')) {
108        // -i.bak or similar (inline suffix)
109        hasInPlaceFlag = true
110        i++
111        continue
112      }
113  
114      // Handle extended regex flags
115      if (arg === '-E' || arg === '-r' || arg === '--regexp-extended') {
116        extendedRegex = true
117        i++
118        continue
119      }
120  
121      // Handle -e flag with expression
122      if (arg === '-e' || arg === '--expression') {
123        if (i + 1 < args.length && typeof args[i + 1] === 'string') {
124          // Only support single expression
125          if (expression !== null) return null
126          expression = args[i + 1]!
127          i += 2
128          continue
129        }
130        return null
131      }
132      if (arg.startsWith('--expression=')) {
133        if (expression !== null) return null
134        expression = arg.slice('--expression='.length)
135        i++
136        continue
137      }
138  
139      // Skip other flags we don't understand
140      if (arg.startsWith('-')) {
141        // Unknown flag - not safe to parse
142        return null
143      }
144  
145      // Non-flag argument
146      if (expression === null) {
147        // First non-flag arg is the expression
148        expression = arg
149      } else if (filePath === null) {
150        // Second non-flag arg is the file path
151        filePath = arg
152      } else {
153        // More than one file - not supported for simple rendering
154        return null
155      }
156  
157      i++
158    }
159  
160    // Must have -i flag, expression, and file path
161    if (!hasInPlaceFlag || !expression || !filePath) {
162      return null
163    }
164  
165    // Parse the substitution expression: s/pattern/replacement/flags
166    // Only support / as delimiter for simplicity
167    const substMatch = expression.match(/^s\//)
168    if (!substMatch) {
169      return null
170    }
171  
172    const rest = expression.slice(2) // Skip 's/'
173  
174    // Find pattern and replacement by tracking escaped characters
175    let pattern = ''
176    let replacement = ''
177    let flags = ''
178    let state: 'pattern' | 'replacement' | 'flags' = 'pattern'
179    let j = 0
180  
181    while (j < rest.length) {
182      const char = rest[j]!
183  
184      if (char === '\\' && j + 1 < rest.length) {
185        // Escaped character
186        if (state === 'pattern') {
187          pattern += char + rest[j + 1]
188        } else if (state === 'replacement') {
189          replacement += char + rest[j + 1]
190        } else {
191          flags += char + rest[j + 1]
192        }
193        j += 2
194        continue
195      }
196  
197      if (char === '/') {
198        if (state === 'pattern') {
199          state = 'replacement'
200        } else if (state === 'replacement') {
201          state = 'flags'
202        } else {
203          // Extra delimiter in flags - unexpected
204          return null
205        }
206        j++
207        continue
208      }
209  
210      if (state === 'pattern') {
211        pattern += char
212      } else if (state === 'replacement') {
213        replacement += char
214      } else {
215        flags += char
216      }
217      j++
218    }
219  
220    // Must have found all three parts (pattern, replacement delimiter, and optional flags)
221    if (state !== 'flags') {
222      return null
223    }
224  
225    // Validate flags - only allow safe substitution flags
226    const validFlags = /^[gpimIM1-9]*$/
227    if (!validFlags.test(flags)) {
228      return null
229    }
230  
231    return {
232      filePath,
233      pattern,
234      replacement,
235      flags,
236      extendedRegex,
237    }
238  }
239  
240  /**
241   * Apply a sed substitution to file content
242   * Returns the new content after applying the substitution
243   */
244  export function applySedSubstitution(
245    content: string,
246    sedInfo: SedEditInfo,
247  ): string {
248    // Convert sed pattern to JavaScript regex
249    let regexFlags = ''
250  
251    // Handle global flag
252    if (sedInfo.flags.includes('g')) {
253      regexFlags += 'g'
254    }
255  
256    // Handle case-insensitive flag (i or I in sed)
257    if (sedInfo.flags.includes('i') || sedInfo.flags.includes('I')) {
258      regexFlags += 'i'
259    }
260  
261    // Handle multiline flag (m or M in sed)
262    if (sedInfo.flags.includes('m') || sedInfo.flags.includes('M')) {
263      regexFlags += 'm'
264    }
265  
266    // Convert sed pattern to JavaScript regex pattern
267    let jsPattern = sedInfo.pattern
268      // Unescape \/ to /
269      .replace(/\\\//g, '/')
270  
271    // In BRE mode (no -E flag), metacharacters have opposite escaping:
272    // BRE: \+ means "one or more", + is literal
273    // ERE/JS: + means "one or more", \+ is literal
274    // We need to convert BRE escaping to ERE for JavaScript regex
275    if (!sedInfo.extendedRegex) {
276      jsPattern = jsPattern
277        // Step 1: Protect literal backslashes (\\) first - in both BRE and ERE, \\ is literal backslash
278        .replace(/\\\\/g, BACKSLASH_PLACEHOLDER)
279        // Step 2: Replace escaped metacharacters with placeholders (these should become unescaped in JS)
280        .replace(/\\\+/g, PLUS_PLACEHOLDER)
281        .replace(/\\\?/g, QUESTION_PLACEHOLDER)
282        .replace(/\\\|/g, PIPE_PLACEHOLDER)
283        .replace(/\\\(/g, LPAREN_PLACEHOLDER)
284        .replace(/\\\)/g, RPAREN_PLACEHOLDER)
285        // Step 3: Escape unescaped metacharacters (these are literal in BRE)
286        .replace(/\+/g, '\\+')
287        .replace(/\?/g, '\\?')
288        .replace(/\|/g, '\\|')
289        .replace(/\(/g, '\\(')
290        .replace(/\)/g, '\\)')
291        // Step 4: Replace placeholders with their JS equivalents
292        .replace(BACKSLASH_PLACEHOLDER_RE, '\\\\')
293        .replace(PLUS_PLACEHOLDER_RE, '+')
294        .replace(QUESTION_PLACEHOLDER_RE, '?')
295        .replace(PIPE_PLACEHOLDER_RE, '|')
296        .replace(LPAREN_PLACEHOLDER_RE, '(')
297        .replace(RPAREN_PLACEHOLDER_RE, ')')
298    }
299  
300    // Unescape sed-specific escapes in replacement
301    // Convert \n to newline, & to $& (match), etc.
302    // Use a unique placeholder with random salt to prevent injection attacks
303    const salt = randomBytes(8).toString('hex')
304    const ESCAPED_AMP_PLACEHOLDER = `___ESCAPED_AMPERSAND_${salt}___`
305    const jsReplacement = sedInfo.replacement
306      // Unescape \/ to /
307      .replace(/\\\//g, '/')
308      // First escape \& to a placeholder
309      .replace(/\\&/g, ESCAPED_AMP_PLACEHOLDER)
310      // Convert & to $& (full match) - use $$& to get literal $& in output
311      .replace(/&/g, '$$&')
312      // Convert placeholder back to literal &
313      .replace(new RegExp(ESCAPED_AMP_PLACEHOLDER, 'g'), '&')
314  
315    try {
316      const regex = new RegExp(jsPattern, regexFlags)
317      return content.replace(regex, jsReplacement)
318    } catch {
319      // If regex is invalid, return original content
320      return content
321    }
322  }