/ tools / FileEditTool / utils.ts
utils.ts
  1  import { type StructuredPatchHunk, structuredPatch } from 'diff'
  2  import { logError } from 'src/utils/log.js'
  3  import { expandPath } from 'src/utils/path.js'
  4  import { countCharInString } from 'src/utils/stringUtils.js'
  5  import {
  6    DIFF_TIMEOUT_MS,
  7    getPatchForDisplay,
  8    getPatchFromContents,
  9  } from '../../utils/diff.js'
 10  import { errorMessage, isENOENT } from '../../utils/errors.js'
 11  import {
 12    addLineNumbers,
 13    convertLeadingTabsToSpaces,
 14    readFileSyncCached,
 15  } from '../../utils/file.js'
 16  import type { EditInput, FileEdit } from './types.js'
 17  
 18  // Claude can't output curly quotes, so we define them as constants here for Claude to use
 19  // in the code. We do this because we normalize curly quotes to straight quotes
 20  // when applying edits.
 21  export const LEFT_SINGLE_CURLY_QUOTE = '‘'
 22  export const RIGHT_SINGLE_CURLY_QUOTE = '’'
 23  export const LEFT_DOUBLE_CURLY_QUOTE = '“'
 24  export const RIGHT_DOUBLE_CURLY_QUOTE = '”'
 25  
 26  /**
 27   * Normalizes quotes in a string by converting curly quotes to straight quotes
 28   * @param str The string to normalize
 29   * @returns The string with all curly quotes replaced by straight quotes
 30   */
 31  export function normalizeQuotes(str: string): string {
 32    return str
 33      .replaceAll(LEFT_SINGLE_CURLY_QUOTE, "'")
 34      .replaceAll(RIGHT_SINGLE_CURLY_QUOTE, "'")
 35      .replaceAll(LEFT_DOUBLE_CURLY_QUOTE, '"')
 36      .replaceAll(RIGHT_DOUBLE_CURLY_QUOTE, '"')
 37  }
 38  
 39  /**
 40   * Strips trailing whitespace from each line in a string while preserving line endings
 41   * @param str The string to process
 42   * @returns The string with trailing whitespace removed from each line
 43   */
 44  export function stripTrailingWhitespace(str: string): string {
 45    // Handle different line endings: CRLF, LF, CR
 46    // Use a regex that matches line endings and captures them
 47    const lines = str.split(/(\r\n|\n|\r)/)
 48  
 49    let result = ''
 50    for (let i = 0; i < lines.length; i++) {
 51      const part = lines[i]
 52      if (part !== undefined) {
 53        if (i % 2 === 0) {
 54          // Even indices are line content
 55          result += part.replace(/\s+$/, '')
 56        } else {
 57          // Odd indices are line endings
 58          result += part
 59        }
 60      }
 61    }
 62  
 63    return result
 64  }
 65  
 66  /**
 67   * Finds the actual string in the file content that matches the search string,
 68   * accounting for quote normalization
 69   * @param fileContent The file content to search in
 70   * @param searchString The string to search for
 71   * @returns The actual string found in the file, or null if not found
 72   */
 73  export function findActualString(
 74    fileContent: string,
 75    searchString: string,
 76  ): string | null {
 77    // First try exact match
 78    if (fileContent.includes(searchString)) {
 79      return searchString
 80    }
 81  
 82    // Try with normalized quotes
 83    const normalizedSearch = normalizeQuotes(searchString)
 84    const normalizedFile = normalizeQuotes(fileContent)
 85  
 86    const searchIndex = normalizedFile.indexOf(normalizedSearch)
 87    if (searchIndex !== -1) {
 88      // Find the actual string in the file that matches
 89      return fileContent.substring(searchIndex, searchIndex + searchString.length)
 90    }
 91  
 92    return null
 93  }
 94  
 95  /**
 96   * When old_string matched via quote normalization (curly quotes in file,
 97   * straight quotes from model), apply the same curly quote style to new_string
 98   * so the edit preserves the file's typography.
 99   *
100   * Uses a simple open/close heuristic: a quote character preceded by whitespace,
101   * start of string, or opening punctuation is treated as an opening quote;
102   * otherwise it's a closing quote.
103   */
104  export function preserveQuoteStyle(
105    oldString: string,
106    actualOldString: string,
107    newString: string,
108  ): string {
109    // If they're the same, no normalization happened
110    if (oldString === actualOldString) {
111      return newString
112    }
113  
114    // Detect which curly quote types were in the file
115    const hasDoubleQuotes =
116      actualOldString.includes(LEFT_DOUBLE_CURLY_QUOTE) ||
117      actualOldString.includes(RIGHT_DOUBLE_CURLY_QUOTE)
118    const hasSingleQuotes =
119      actualOldString.includes(LEFT_SINGLE_CURLY_QUOTE) ||
120      actualOldString.includes(RIGHT_SINGLE_CURLY_QUOTE)
121  
122    if (!hasDoubleQuotes && !hasSingleQuotes) {
123      return newString
124    }
125  
126    let result = newString
127  
128    if (hasDoubleQuotes) {
129      result = applyCurlyDoubleQuotes(result)
130    }
131    if (hasSingleQuotes) {
132      result = applyCurlySingleQuotes(result)
133    }
134  
135    return result
136  }
137  
138  function isOpeningContext(chars: string[], index: number): boolean {
139    if (index === 0) {
140      return true
141    }
142    const prev = chars[index - 1]
143    return (
144      prev === ' ' ||
145      prev === '\t' ||
146      prev === '\n' ||
147      prev === '\r' ||
148      prev === '(' ||
149      prev === '[' ||
150      prev === '{' ||
151      prev === '\u2014' || // em dash
152      prev === '\u2013' // en dash
153    )
154  }
155  
156  function applyCurlyDoubleQuotes(str: string): string {
157    const chars = [...str]
158    const result: string[] = []
159    for (let i = 0; i < chars.length; i++) {
160      if (chars[i] === '"') {
161        result.push(
162          isOpeningContext(chars, i)
163            ? LEFT_DOUBLE_CURLY_QUOTE
164            : RIGHT_DOUBLE_CURLY_QUOTE,
165        )
166      } else {
167        result.push(chars[i]!)
168      }
169    }
170    return result.join('')
171  }
172  
173  function applyCurlySingleQuotes(str: string): string {
174    const chars = [...str]
175    const result: string[] = []
176    for (let i = 0; i < chars.length; i++) {
177      if (chars[i] === "'") {
178        // Don't convert apostrophes in contractions (e.g., "don't", "it's")
179        // An apostrophe between two letters is a contraction, not a quote
180        const prev = i > 0 ? chars[i - 1] : undefined
181        const next = i < chars.length - 1 ? chars[i + 1] : undefined
182        const prevIsLetter = prev !== undefined && /\p{L}/u.test(prev)
183        const nextIsLetter = next !== undefined && /\p{L}/u.test(next)
184        if (prevIsLetter && nextIsLetter) {
185          // Apostrophe in a contraction — use right single curly quote
186          result.push(RIGHT_SINGLE_CURLY_QUOTE)
187        } else {
188          result.push(
189            isOpeningContext(chars, i)
190              ? LEFT_SINGLE_CURLY_QUOTE
191              : RIGHT_SINGLE_CURLY_QUOTE,
192          )
193        }
194      } else {
195        result.push(chars[i]!)
196      }
197    }
198    return result.join('')
199  }
200  
201  /**
202   * Transform edits to ensure replace_all always has a boolean value
203   * @param edits Array of edits with optional replace_all
204   * @returns Array of edits with replace_all guaranteed to be boolean
205   */
206  export function applyEditToFile(
207    originalContent: string,
208    oldString: string,
209    newString: string,
210    replaceAll: boolean = false,
211  ): string {
212    const f = replaceAll
213      ? (content: string, search: string, replace: string) =>
214          content.replaceAll(search, () => replace)
215      : (content: string, search: string, replace: string) =>
216          content.replace(search, () => replace)
217  
218    if (newString !== '') {
219      return f(originalContent, oldString, newString)
220    }
221  
222    const stripTrailingNewline =
223      !oldString.endsWith('\n') && originalContent.includes(oldString + '\n')
224  
225    return stripTrailingNewline
226      ? f(originalContent, oldString + '\n', newString)
227      : f(originalContent, oldString, newString)
228  }
229  
230  /**
231   * Applies an edit to a file and returns the patch and updated file.
232   * Does not write the file to disk.
233   */
234  export function getPatchForEdit({
235    filePath,
236    fileContents,
237    oldString,
238    newString,
239    replaceAll = false,
240  }: {
241    filePath: string
242    fileContents: string
243    oldString: string
244    newString: string
245    replaceAll?: boolean
246  }): { patch: StructuredPatchHunk[]; updatedFile: string } {
247    return getPatchForEdits({
248      filePath,
249      fileContents,
250      edits: [
251        { old_string: oldString, new_string: newString, replace_all: replaceAll },
252      ],
253    })
254  }
255  
256  /**
257   * Applies a list of edits to a file and returns the patch and updated file.
258   * Does not write the file to disk.
259   *
260   * NOTE: The returned patch is to be used for display purposes only - it has spaces instead of tabs
261   */
262  export function getPatchForEdits({
263    filePath,
264    fileContents,
265    edits,
266  }: {
267    filePath: string
268    fileContents: string
269    edits: FileEdit[]
270  }): { patch: StructuredPatchHunk[]; updatedFile: string } {
271    let updatedFile = fileContents
272    const appliedNewStrings: string[] = []
273  
274    // Special case for empty files.
275    if (
276      !fileContents &&
277      edits.length === 1 &&
278      edits[0] &&
279      edits[0].old_string === '' &&
280      edits[0].new_string === ''
281    ) {
282      const patch = getPatchForDisplay({
283        filePath,
284        fileContents,
285        edits: [
286          {
287            old_string: fileContents,
288            new_string: updatedFile,
289            replace_all: false,
290          },
291        ],
292      })
293      return { patch, updatedFile: '' }
294    }
295  
296    // Apply each edit and check if it actually changes the file
297    for (const edit of edits) {
298      // Strip trailing newlines from old_string before checking
299      const oldStringToCheck = edit.old_string.replace(/\n+$/, '')
300  
301      // Check if old_string is a substring of any previously applied new_string
302      for (const previousNewString of appliedNewStrings) {
303        if (
304          oldStringToCheck !== '' &&
305          previousNewString.includes(oldStringToCheck)
306        ) {
307          throw new Error(
308            'Cannot edit file: old_string is a substring of a new_string from a previous edit.',
309          )
310        }
311      }
312  
313      const previousContent = updatedFile
314      updatedFile =
315        edit.old_string === ''
316          ? edit.new_string
317          : applyEditToFile(
318              updatedFile,
319              edit.old_string,
320              edit.new_string,
321              edit.replace_all,
322            )
323  
324      // If this edit didn't change anything, throw an error
325      if (updatedFile === previousContent) {
326        throw new Error('String not found in file. Failed to apply edit.')
327      }
328  
329      // Track the new string that was applied
330      appliedNewStrings.push(edit.new_string)
331    }
332  
333    if (updatedFile === fileContents) {
334      throw new Error(
335        'Original and edited file match exactly. Failed to apply edit.',
336      )
337    }
338  
339    // We already have before/after content, so call getPatchFromContents directly.
340    // Previously this went through getPatchForDisplay with edits=[{old:fileContents,new:updatedFile}],
341    // which transforms fileContents twice (once as preparedFileContents, again as escapedOldString
342    // inside the reduce) and runs a no-op full-content .replace(). This saves ~20% on large files.
343    const patch = getPatchFromContents({
344      filePath,
345      oldContent: convertLeadingTabsToSpaces(fileContents),
346      newContent: convertLeadingTabsToSpaces(updatedFile),
347    })
348  
349    return { patch, updatedFile }
350  }
351  
352  // Cap on edited_text_file attachment snippets. Format-on-save of a large file
353  // previously injected the entire file per turn (observed max 16.1KB, ~14K
354  // tokens/session). 8KB preserves meaningful context while bounding worst case.
355  const DIFF_SNIPPET_MAX_BYTES = 8192
356  
357  /**
358   * Used for attachments, to show snippets when files change.
359   *
360   * TODO: Unify this with the other snippet logic.
361   */
362  export function getSnippetForTwoFileDiff(
363    fileAContents: string,
364    fileBContents: string,
365  ): string {
366    const patch = structuredPatch(
367      'file.txt',
368      'file.txt',
369      fileAContents,
370      fileBContents,
371      undefined,
372      undefined,
373      {
374        context: 8,
375        timeout: DIFF_TIMEOUT_MS,
376      },
377    )
378  
379    if (!patch) {
380      return ''
381    }
382  
383    const full = patch.hunks
384      .map(_ => ({
385        startLine: _.oldStart,
386        content: _.lines
387          // Filter out deleted lines AND diff metadata lines
388          .filter(_ => !_.startsWith('-') && !_.startsWith('\\'))
389          .map(_ => _.slice(1))
390          .join('\n'),
391      }))
392      .map(addLineNumbers)
393      .join('\n...\n')
394  
395    if (full.length <= DIFF_SNIPPET_MAX_BYTES) {
396      return full
397    }
398  
399    // Truncate at the last line boundary that fits within the cap.
400    // Marker format matches BashTool/utils.ts.
401    const cutoff = full.lastIndexOf('\n', DIFF_SNIPPET_MAX_BYTES)
402    const kept =
403      cutoff > 0 ? full.slice(0, cutoff) : full.slice(0, DIFF_SNIPPET_MAX_BYTES)
404    const remaining = countCharInString(full, '\n', kept.length) + 1
405    return `${kept}\n\n... [${remaining} lines truncated] ...`
406  }
407  
408  const CONTEXT_LINES = 4
409  
410  /**
411   * Gets a snippet from a file showing the context around a patch with line numbers.
412   * @param originalFile The original file content before applying the patch
413   * @param patch The diff hunks to use for determining snippet location
414   * @param newFile The file content after applying the patch
415   * @returns The snippet text with line numbers and the starting line number
416   */
417  export function getSnippetForPatch(
418    patch: StructuredPatchHunk[],
419    newFile: string,
420  ): { formattedSnippet: string; startLine: number } {
421    if (patch.length === 0) {
422      // No changes, return empty snippet
423      return { formattedSnippet: '', startLine: 1 }
424    }
425  
426    // Find the first and last changed lines across all hunks
427    let minLine = Infinity
428    let maxLine = -Infinity
429  
430    for (const hunk of patch) {
431      if (hunk.oldStart < minLine) {
432        minLine = hunk.oldStart
433      }
434      // For the end line, we need to consider the new lines count since we're showing the new file
435      const hunkEnd = hunk.oldStart + (hunk.newLines || 0) - 1
436      if (hunkEnd > maxLine) {
437        maxLine = hunkEnd
438      }
439    }
440  
441    // Calculate the range with context
442    const startLine = Math.max(1, minLine - CONTEXT_LINES)
443    const endLine = maxLine + CONTEXT_LINES
444  
445    // Split the new file into lines and get the snippet
446    const fileLines = newFile.split(/\r?\n/)
447    const snippetLines = fileLines.slice(startLine - 1, endLine)
448    const snippet = snippetLines.join('\n')
449  
450    // Add line numbers
451    const formattedSnippet = addLineNumbers({
452      content: snippet,
453      startLine,
454    })
455  
456    return { formattedSnippet, startLine }
457  }
458  
459  /**
460   * Gets a snippet from a file showing the context around a single edit.
461   * This is a convenience function that uses the original algorithm.
462   * @param originalFile The original file content
463   * @param oldString The text to replace
464   * @param newString The text to replace it with
465   * @param contextLines The number of lines to show before and after the change
466   * @returns The snippet and the starting line number
467   */
468  export function getSnippet(
469    originalFile: string,
470    oldString: string,
471    newString: string,
472    contextLines: number = 4,
473  ): { snippet: string; startLine: number } {
474    // Use the original algorithm from FileEditTool.tsx
475    const before = originalFile.split(oldString)[0] ?? ''
476    const replacementLine = before.split(/\r?\n/).length - 1
477    const newFileLines = applyEditToFile(
478      originalFile,
479      oldString,
480      newString,
481    ).split(/\r?\n/)
482  
483    // Calculate the start and end line numbers for the snippet
484    const startLine = Math.max(0, replacementLine - contextLines)
485    const endLine =
486      replacementLine + contextLines + newString.split(/\r?\n/).length
487  
488    // Get snippet
489    const snippetLines = newFileLines.slice(startLine, endLine)
490    const snippet = snippetLines.join('\n')
491  
492    return { snippet, startLine: startLine + 1 }
493  }
494  
495  export function getEditsForPatch(patch: StructuredPatchHunk[]): FileEdit[] {
496    return patch.map(hunk => {
497      // Extract the changes from this hunk
498      const contextLines: string[] = []
499      const oldLines: string[] = []
500      const newLines: string[] = []
501  
502      // Parse each line and categorize it
503      for (const line of hunk.lines) {
504        if (line.startsWith(' ')) {
505          // Context line - appears in both versions
506          contextLines.push(line.slice(1))
507          oldLines.push(line.slice(1))
508          newLines.push(line.slice(1))
509        } else if (line.startsWith('-')) {
510          // Deleted line - only in old version
511          oldLines.push(line.slice(1))
512        } else if (line.startsWith('+')) {
513          // Added line - only in new version
514          newLines.push(line.slice(1))
515        }
516      }
517  
518      return {
519        old_string: oldLines.join('\n'),
520        new_string: newLines.join('\n'),
521        replace_all: false,
522      }
523    })
524  }
525  
526  /**
527   * Contains replacements to de-sanitize strings from Claude
528   * Since Claude can't see any of these strings (sanitized in the API)
529   * It'll output the sanitized versions in the edit response
530   */
531  const DESANITIZATIONS: Record<string, string> = {
532    '<fnr>': '<function_results>',
533    '<n>': '<name>',
534    '</n>': '</name>',
535    '<o>': '<output>',
536    '</o>': '</output>',
537    '<e>': '<error>',
538    '</e>': '</error>',
539    '<s>': '<system>',
540    '</s>': '</system>',
541    '<r>': '<result>',
542    '</r>': '</result>',
543    '< META_START >': '<META_START>',
544    '< META_END >': '<META_END>',
545    '< EOT >': '<EOT>',
546    '< META >': '<META>',
547    '< SOS >': '<SOS>',
548    '\n\nH:': '\n\nHuman:',
549    '\n\nA:': '\n\nAssistant:',
550  }
551  
552  /**
553   * Normalizes a match string by applying specific replacements
554   * This helps handle when exact matches fail due to formatting differences
555   * @returns The normalized string and which replacements were applied
556   */
557  function desanitizeMatchString(matchString: string): {
558    result: string
559    appliedReplacements: Array<{ from: string; to: string }>
560  } {
561    let result = matchString
562    const appliedReplacements: Array<{ from: string; to: string }> = []
563  
564    for (const [from, to] of Object.entries(DESANITIZATIONS)) {
565      const beforeReplace = result
566      result = result.replaceAll(from, to)
567  
568      if (beforeReplace !== result) {
569        appliedReplacements.push({ from, to })
570      }
571    }
572  
573    return { result, appliedReplacements }
574  }
575  
576  /**
577   * Normalize the input for the FileEditTool
578   * If the string to replace is not found in the file, try with a normalized version
579   * Returns the normalized input if successful, or the original input if not
580   */
581  export function normalizeFileEditInput({
582    file_path,
583    edits,
584  }: {
585    file_path: string
586    edits: EditInput[]
587  }): {
588    file_path: string
589    edits: EditInput[]
590  } {
591    if (edits.length === 0) {
592      return { file_path, edits }
593    }
594  
595    // Markdown uses two trailing spaces as a hard line break — stripping would
596    // silently change semantics. Skip stripTrailingWhitespace for .md/.mdx.
597    const isMarkdown = /\.(md|mdx)$/i.test(file_path)
598  
599    try {
600      const fullPath = expandPath(file_path)
601  
602      // Use cached file read to avoid redundant I/O operations.
603      // If the file doesn't exist, readFileSyncCached throws ENOENT which the
604      // catch below handles by returning the original input (no TOCTOU pre-check).
605      const fileContent = readFileSyncCached(fullPath)
606  
607      return {
608        file_path,
609        edits: edits.map(({ old_string, new_string, replace_all }) => {
610          const normalizedNewString = isMarkdown
611            ? new_string
612            : stripTrailingWhitespace(new_string)
613  
614          // If exact string match works, keep it as is
615          if (fileContent.includes(old_string)) {
616            return {
617              old_string,
618              new_string: normalizedNewString,
619              replace_all,
620            }
621          }
622  
623          // Try de-sanitize string if exact match fails
624          const { result: desanitizedOldString, appliedReplacements } =
625            desanitizeMatchString(old_string)
626  
627          if (fileContent.includes(desanitizedOldString)) {
628            // Apply the same exact replacements to new_string
629            let desanitizedNewString = normalizedNewString
630            for (const { from, to } of appliedReplacements) {
631              desanitizedNewString = desanitizedNewString.replaceAll(from, to)
632            }
633  
634            return {
635              old_string: desanitizedOldString,
636              new_string: desanitizedNewString,
637              replace_all,
638            }
639          }
640  
641          return {
642            old_string,
643            new_string: normalizedNewString,
644            replace_all,
645          }
646        }),
647      }
648    } catch (error) {
649      // If there's any error reading the file, just return original input.
650      // ENOENT is expected when the file doesn't exist yet (e.g., new file).
651      if (!isENOENT(error)) {
652        logError(error)
653      }
654    }
655  
656    return { file_path, edits }
657  }
658  
659  /**
660   * Compare two sets of edits to determine if they are equivalent
661   * by applying both sets to the original content and comparing results.
662   * This handles cases where edits might be different but produce the same outcome.
663   */
664  export function areFileEditsEquivalent(
665    edits1: FileEdit[],
666    edits2: FileEdit[],
667    originalContent: string,
668  ): boolean {
669    // Fast path: check if edits are literally identical
670    if (
671      edits1.length === edits2.length &&
672      edits1.every((edit1, index) => {
673        const edit2 = edits2[index]
674        return (
675          edit2 !== undefined &&
676          edit1.old_string === edit2.old_string &&
677          edit1.new_string === edit2.new_string &&
678          edit1.replace_all === edit2.replace_all
679        )
680      })
681    ) {
682      return true
683    }
684  
685    // Try applying both sets of edits
686    let result1: { patch: StructuredPatchHunk[]; updatedFile: string } | null =
687      null
688    let error1: string | null = null
689    let result2: { patch: StructuredPatchHunk[]; updatedFile: string } | null =
690      null
691    let error2: string | null = null
692  
693    try {
694      result1 = getPatchForEdits({
695        filePath: 'temp',
696        fileContents: originalContent,
697        edits: edits1,
698      })
699    } catch (e) {
700      error1 = errorMessage(e)
701    }
702  
703    try {
704      result2 = getPatchForEdits({
705        filePath: 'temp',
706        fileContents: originalContent,
707        edits: edits2,
708      })
709    } catch (e) {
710      error2 = errorMessage(e)
711    }
712  
713    // If both threw errors, they're equal only if the errors are the same
714    if (error1 !== null && error2 !== null) {
715      // Normalize error messages for comparison
716      return error1 === error2
717    }
718  
719    // If one threw an error and the other didn't, they're not equal
720    if (error1 !== null || error2 !== null) {
721      return false
722    }
723  
724    // Both succeeded - compare the results
725    return result1!.updatedFile === result2!.updatedFile
726  }
727  
728  /**
729   * Unified function to check if two file edit inputs are equivalent.
730   * Handles file edits (FileEditTool).
731   */
732  export function areFileEditsInputsEquivalent(
733    input1: {
734      file_path: string
735      edits: FileEdit[]
736    },
737    input2: {
738      file_path: string
739      edits: FileEdit[]
740    },
741  ): boolean {
742    // Fast path: different files
743    if (input1.file_path !== input2.file_path) {
744      return false
745    }
746  
747    // Fast path: literal equality
748    if (
749      input1.edits.length === input2.edits.length &&
750      input1.edits.every((edit1, index) => {
751        const edit2 = input2.edits[index]
752        return (
753          edit2 !== undefined &&
754          edit1.old_string === edit2.old_string &&
755          edit1.new_string === edit2.new_string &&
756          edit1.replace_all === edit2.replace_all
757        )
758      })
759    ) {
760      return true
761    }
762  
763    // Semantic comparison (requires file read). If the file doesn't exist,
764    // compare against empty content (no TOCTOU pre-check).
765    let fileContent = ''
766    try {
767      fileContent = readFileSyncCached(input1.file_path)
768    } catch (error) {
769      if (!isENOENT(error)) {
770        throw error
771      }
772    }
773  
774    return areFileEditsEquivalent(input1.edits, input2.edits, fileContent)
775  }