/ utils / diff.ts
diff.ts
  1  import { type StructuredPatchHunk, structuredPatch } from 'diff'
  2  import { logEvent } from 'src/services/analytics/index.js'
  3  import { getLocCounter } from '../bootstrap/state.js'
  4  import { addToTotalLinesChanged } from '../cost-tracker.js'
  5  import type { FileEdit } from '../tools/FileEditTool/types.js'
  6  import { count } from './array.js'
  7  import { convertLeadingTabsToSpaces } from './file.js'
  8  
  9  export const CONTEXT_LINES = 3
 10  export const DIFF_TIMEOUT_MS = 5_000
 11  
 12  /**
 13   * Shifts hunk line numbers by offset. Use when getPatchForDisplay received
 14   * a slice of the file (e.g. readEditContext) rather than the whole file —
 15   * callers pass `ctx.lineOffset - 1` to convert slice-relative to file-relative.
 16   */
 17  export function adjustHunkLineNumbers(
 18    hunks: StructuredPatchHunk[],
 19    offset: number,
 20  ): StructuredPatchHunk[] {
 21    if (offset === 0) return hunks
 22    return hunks.map(h => ({
 23      ...h,
 24      oldStart: h.oldStart + offset,
 25      newStart: h.newStart + offset,
 26    }))
 27  }
 28  
 29  // For some reason, & confuses the diff library, so we replace it with a token,
 30  // then substitute it back in after the diff is computed.
 31  const AMPERSAND_TOKEN = '<<:AMPERSAND_TOKEN:>>'
 32  
 33  const DOLLAR_TOKEN = '<<:DOLLAR_TOKEN:>>'
 34  
 35  function escapeForDiff(s: string): string {
 36    return s.replaceAll('&', AMPERSAND_TOKEN).replaceAll('$', DOLLAR_TOKEN)
 37  }
 38  
 39  function unescapeFromDiff(s: string): string {
 40    return s.replaceAll(AMPERSAND_TOKEN, '&').replaceAll(DOLLAR_TOKEN, '$')
 41  }
 42  
 43  /**
 44   * Count lines added and removed in a patch and update the total
 45   * For new files, pass the content string as the second parameter
 46   * @param patch Array of diff hunks
 47   * @param newFileContent Optional content string for new files
 48   */
 49  export function countLinesChanged(
 50    patch: StructuredPatchHunk[],
 51    newFileContent?: string,
 52  ): void {
 53    let numAdditions = 0
 54    let numRemovals = 0
 55  
 56    if (patch.length === 0 && newFileContent) {
 57      // For new files, count all lines as additions
 58      numAdditions = newFileContent.split(/\r?\n/).length
 59    } else {
 60      numAdditions = patch.reduce(
 61        (acc, hunk) => acc + count(hunk.lines, _ => _.startsWith('+')),
 62        0,
 63      )
 64      numRemovals = patch.reduce(
 65        (acc, hunk) => acc + count(hunk.lines, _ => _.startsWith('-')),
 66        0,
 67      )
 68    }
 69  
 70    addToTotalLinesChanged(numAdditions, numRemovals)
 71  
 72    getLocCounter()?.add(numAdditions, { type: 'added' })
 73    getLocCounter()?.add(numRemovals, { type: 'removed' })
 74  
 75    logEvent('tengu_file_changed', {
 76      lines_added: numAdditions,
 77      lines_removed: numRemovals,
 78    })
 79  }
 80  
 81  export function getPatchFromContents({
 82    filePath,
 83    oldContent,
 84    newContent,
 85    ignoreWhitespace = false,
 86    singleHunk = false,
 87  }: {
 88    filePath: string
 89    oldContent: string
 90    newContent: string
 91    ignoreWhitespace?: boolean
 92    singleHunk?: boolean
 93  }): StructuredPatchHunk[] {
 94    const result = structuredPatch(
 95      filePath,
 96      filePath,
 97      escapeForDiff(oldContent),
 98      escapeForDiff(newContent),
 99      undefined,
100      undefined,
101      {
102        ignoreWhitespace,
103        context: singleHunk ? 100_000 : CONTEXT_LINES,
104        timeout: DIFF_TIMEOUT_MS,
105      },
106    )
107    if (!result) {
108      return []
109    }
110    return result.hunks.map(_ => ({
111      ..._,
112      lines: _.lines.map(unescapeFromDiff),
113    }))
114  }
115  
116  /**
117   * Get a patch for display with edits applied
118   * @param filePath The path to the file
119   * @param fileContents The contents of the file
120   * @param edits An array of edits to apply to the file
121   * @param ignoreWhitespace Whether to ignore whitespace changes
122   * @returns An array of hunks representing the diff
123   *
124   * NOTE: This function will return the diff with all leading tabs
125   * rendered as spaces for display
126   */
127  
128  export function getPatchForDisplay({
129    filePath,
130    fileContents,
131    edits,
132    ignoreWhitespace = false,
133  }: {
134    filePath: string
135    fileContents: string
136    edits: FileEdit[]
137    ignoreWhitespace?: boolean
138  }): StructuredPatchHunk[] {
139    const preparedFileContents = escapeForDiff(
140      convertLeadingTabsToSpaces(fileContents),
141    )
142    const result = structuredPatch(
143      filePath,
144      filePath,
145      preparedFileContents,
146      edits.reduce((p, edit) => {
147        const { old_string, new_string } = edit
148        const replace_all = 'replace_all' in edit ? edit.replace_all : false
149        const escapedOldString = escapeForDiff(
150          convertLeadingTabsToSpaces(old_string),
151        )
152        const escapedNewString = escapeForDiff(
153          convertLeadingTabsToSpaces(new_string),
154        )
155  
156        if (replace_all) {
157          return p.replaceAll(escapedOldString, () => escapedNewString)
158        } else {
159          return p.replace(escapedOldString, () => escapedNewString)
160        }
161      }, preparedFileContents),
162      undefined,
163      undefined,
164      {
165        context: CONTEXT_LINES,
166        ignoreWhitespace,
167        timeout: DIFF_TIMEOUT_MS,
168      },
169    )
170    if (!result) {
171      return []
172    }
173    return result.hunks.map(_ => ({
174      ..._,
175      lines: _.lines.map(unescapeFromDiff),
176    }))
177  }