/ lib / diff-parse.ts
diff-parse.ts
  1  import type { DiffHunk } from './types';
  2  import { inferLanguage } from './highlight';
  3  
  4  // ── Types ────────────────────────────────────────────────────────
  5  
  6  export interface ParsedHunk {
  7    id: string;
  8    filePath: string;
  9    fileStatus: 'added' | 'deleted' | 'modified' | 'renamed';
 10    hunkHeader: string;
 11    content: string;
 12    additions: number;
 13    deletions: number;
 14    scopeName: string | null;
 15  }
 16  
 17  export interface IndexedHunk extends ParsedHunk {
 18    language: string;
 19    expandedHunkHeader: string;
 20    expandedContent: string;
 21  }
 22  
 23  // ── Hunk header parsing ─────────────────────────────────────────
 24  
 25  const HUNK_HEADER_RE = /^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)/;
 26  
 27  function parseHunkHeader(header: string) {
 28    const m = header.match(HUNK_HEADER_RE);
 29    if (!m) return null;
 30    return {
 31      baseStart: parseInt(m[1], 10),
 32      // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- regex optional group can be undefined at runtime
 33      baseCount: m[2] !== undefined ? parseInt(m[2], 10) : 1,
 34      headStart: parseInt(m[3], 10),
 35      // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- regex optional group can be undefined at runtime
 36      headCount: m[4] !== undefined ? parseInt(m[4], 10) : 1,
 37      // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- regex optional group
 38      suffix: m[5] ?? '',
 39    };
 40  }
 41  
 42  function buildHunkHeader(
 43    baseStart: number,
 44    baseCount: number,
 45    headStart: number,
 46    headCount: number,
 47    suffix: string
 48  ): string {
 49    return `@@ -${baseStart},${baseCount} +${headStart},${headCount} @@${suffix}`;
 50  }
 51  
 52  // ── Parse unified diff into per-hunk objects ─────────────────────
 53  
 54  export function parseUnifiedDiff(diff: string): ParsedHunk[] {
 55    const hunks: ParsedHunk[] = [];
 56    const lines = diff.split('\n');
 57    let i = 0;
 58    let hunkCounter = 0;
 59  
 60    let currentFilePath: string | null = null;
 61    let currentFileStatus: 'added' | 'deleted' | 'modified' | 'renamed' = 'modified';
 62    let isRename = false;
 63    let renameFrom: string | null = null;
 64  
 65    while (i < lines.length) {
 66      const line = lines[i];
 67  
 68      // diff header
 69      if (line.startsWith('diff --git')) {
 70        isRename = false;
 71        renameFrom = null;
 72        i++;
 73        continue;
 74      }
 75  
 76      // Detect rename markers
 77      if (line.startsWith('rename from ')) {
 78        renameFrom = line.slice('rename from '.length);
 79        isRename = true;
 80        i++;
 81        continue;
 82      }
 83      if (line.startsWith('rename to ')) {
 84        isRename = true;
 85        i++;
 86        continue;
 87      }
 88  
 89      // similarity / dissimilarity index, old mode, new mode, etc.
 90      if (
 91        line.startsWith('index ') ||
 92        line.startsWith('old mode') ||
 93        line.startsWith('new mode') ||
 94        line.startsWith('new file mode') ||
 95        line.startsWith('deleted file mode') ||
 96        line.startsWith('similarity index') ||
 97        line.startsWith('dissimilarity index') ||
 98        line.startsWith('copy from') ||
 99        line.startsWith('copy to') ||
100        line.startsWith('Binary files')
101      ) {
102        if (line.startsWith('new file mode')) currentFileStatus = 'added';
103        if (line.startsWith('deleted file mode')) currentFileStatus = 'deleted';
104        i++;
105        continue;
106      }
107  
108      // --- a/path or --- /dev/null
109      if (line.startsWith('--- ')) {
110        if (line === '--- /dev/null') {
111          currentFileStatus = 'added';
112        }
113        i++;
114        continue;
115      }
116  
117      // +++ b/path or +++ /dev/null
118      if (line.startsWith('+++ ')) {
119        if (line === '+++ /dev/null') {
120          currentFileStatus = 'deleted';
121        } else {
122          currentFilePath = line.slice('+++ b/'.length);
123          if (isRename && renameFrom) {
124            currentFileStatus = 'renamed';
125          } else if (currentFileStatus !== 'added' && currentFileStatus !== 'deleted') {
126            currentFileStatus = 'modified';
127          }
128        }
129        i++;
130        continue;
131      }
132  
133      // Hunk header
134      if (line.startsWith('@@ ') && currentFilePath) {
135        const match = line.match(HUNK_HEADER_RE);
136        if (!match) {
137          i++;
138          continue;
139        }
140  
141        // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- regex optional group
142        const suffix = match[5] ?? '';
143        const scopeName = suffix.trim() || null;
144  
145        // Collect hunk body lines
146        i++;
147        const body: string[] = [];
148        while (i < lines.length) {
149          const bodyLine = lines[i];
150          if (bodyLine.startsWith('@@ ') || bodyLine.startsWith('diff --git')) {
151            break;
152          }
153          // Skip "\ No newline at end of file"
154          if (bodyLine.startsWith('\\')) {
155            i++;
156            continue;
157          }
158          body.push(bodyLine);
159          i++;
160        }
161  
162        // Remove trailing empty lines from split artifact
163        while (body.length > 0 && body[body.length - 1] === '') body.pop();
164  
165        const content = body.join('\n');
166  
167        let additions = 0;
168        let deletions = 0;
169        for (const bl of body) {
170          if (bl.startsWith('+')) additions++;
171          else if (bl.startsWith('-')) deletions++;
172        }
173  
174        hunks.push({
175          id: `hunk-${hunkCounter++}`,
176          filePath: currentFilePath,
177          fileStatus: currentFileStatus,
178          hunkHeader: line,
179          content,
180          additions,
181          deletions,
182          scopeName,
183        });
184  
185        continue;
186      }
187  
188      i++;
189    }
190  
191    return hunks;
192  }
193  
194  // ── Expand context for a single hunk ─────────────────────────────
195  
196  export function expandHunkContext(
197    hunk: ParsedHunk,
198    filesBefore: Record<string, string>,
199    filesAfter: Record<string, string>,
200    targetContext = 15
201  ): { expandedHunkHeader: string; expandedContent: string } {
202    const parsed = parseHunkHeader(hunk.hunkHeader);
203    if (!parsed) {
204      return { expandedHunkHeader: hunk.hunkHeader, expandedContent: hunk.content };
205    }
206  
207    // Choose source file for context padding
208    // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- Record index may be undefined at runtime
209    const baseLines = filesBefore[hunk.filePath]?.split('\n');
210    // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- Record index may be undefined at runtime
211    const headLines = filesAfter[hunk.filePath]?.split('\n');
212  
213    // For added files, context comes from head; for others, from base
214    // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- Record index may be undefined at runtime
215    const ctxSource = hunk.fileStatus === 'added' ? headLines : (baseLines ?? headLines);
216  
217    // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- Record index may be undefined at runtime
218    if (!ctxSource) {
219      return { expandedHunkHeader: hunk.hunkHeader, expandedContent: hunk.content };
220    }
221  
222    // Parse body lines
223    const rawLines = hunk.content.split('\n');
224    const bodyLines: { marker: string; text: string }[] = [];
225    for (const raw of rawLines) {
226      if (raw === '') continue;
227      const marker = raw[0];
228      bodyLines.push({ marker, text: raw.slice(1) });
229    }
230  
231    if (bodyLines.length === 0) {
232      return { expandedHunkHeader: hunk.hunkHeader, expandedContent: hunk.content };
233    }
234  
235    // Count existing leading/trailing context
236    let leadCtx = 0;
237    for (const bl of bodyLines) {
238      if (bl.marker === ' ') leadCtx++;
239      else break;
240    }
241    let trailCtx = 0;
242    for (let j = bodyLines.length - 1; j >= 0; j--) {
243      if (bodyLines[j].marker === ' ') trailCtx++;
244      else break;
245    }
246  
247    // For added files (all + lines), use headStart for positioning
248    const hunkStart0 = hunk.fileStatus === 'added' ? parsed.headStart - 1 : parsed.baseStart - 1;
249  
250    // Prepend leading context
251    const extraLead = Math.max(0, targetContext - leadCtx);
252    const prependFrom = Math.max(0, hunkStart0 - extraLead);
253    const prepend = ctxSource.slice(prependFrom, hunkStart0).map((l) => ' ' + l);
254  
255    // Calculate where the hunk body ends in the source file
256    // Walk through body lines to find where base pointer ends
257    let basePos = parsed.baseStart - 1;
258    let headPos = parsed.headStart - 1;
259    for (const bl of bodyLines) {
260      if (bl.marker === ' ') {
261        basePos++;
262        headPos++;
263      } else if (bl.marker === '-') {
264        basePos++;
265      } else if (bl.marker === '+') {
266        headPos++;
267      }
268    }
269    const hunkEnd0 = hunk.fileStatus === 'added' ? headPos : basePos;
270  
271    // Append trailing context
272    const extraTrail = Math.max(0, targetContext - trailCtx);
273    const appendTo = Math.min(ctxSource.length, hunkEnd0 + extraTrail);
274    const append = ctxSource.slice(hunkEnd0, appendTo).map((l) => ' ' + l);
275  
276    // Rebuild hunk header with new counts
277    const allContent = [...prepend, ...rawLines.filter((l) => l !== ''), ...append];
278  
279    let newBaseCount = 0;
280    let newHeadCount = 0;
281    for (const l of allContent) {
282      const m = l[0];
283      if (m === ' ') {
284        newBaseCount++;
285        newHeadCount++;
286      } else if (m === '-') {
287        newBaseCount++;
288      } else if (m === '+') {
289        newHeadCount++;
290      }
291    }
292  
293    const newBaseStart = Math.max(1, parsed.baseStart - prepend.length);
294    const newHeadStart = Math.max(1, parsed.headStart - prepend.length);
295  
296    const expandedHunkHeader = buildHunkHeader(newBaseStart, newBaseCount, newHeadStart, newHeadCount, parsed.suffix);
297    const expandedContent = allContent.join('\n');
298  
299    return { expandedHunkHeader, expandedContent };
300  }
301  
302  // ── Full pipeline: parse → expand → index ────────────────────────
303  
304  export function buildIndexedHunks(
305    diff: string,
306    filesBefore: Record<string, string>,
307    filesAfter: Record<string, string>,
308    targetContext = 15
309  ): IndexedHunk[] {
310    const parsed = parseUnifiedDiff(diff);
311  
312    return parsed.map((hunk) => {
313      const { expandedHunkHeader, expandedContent } = expandHunkContext(hunk, filesBefore, filesAfter, targetContext);
314      const language = inferLanguage(hunk.filePath);
315  
316      return {
317        ...hunk,
318        language,
319        expandedHunkHeader,
320        expandedContent,
321      };
322    });
323  }
324  
325  // ── Expand full diff as a single string ──────────────────────────
326  // Replaces expandDiffContext from context-builder.ts but also
327  // handles added files by using head file contents.
328  
329  export function expandFullDiff(
330    diff: string,
331    filesBefore: Record<string, string>,
332    filesAfter: Record<string, string>,
333    targetContext = 15
334  ): string {
335    const result: string[] = [];
336    let currentBasePath: string | null = null;
337    let currentFileStatus: 'added' | 'deleted' | 'modified' = 'modified';
338    const lines = diff.split('\n');
339    let i = 0;
340  
341    while (i < lines.length) {
342      const line = lines[i];
343  
344      if (line.startsWith('--- /dev/null')) {
345        currentBasePath = null;
346        currentFileStatus = 'added';
347        result.push(line);
348        i++;
349        continue;
350      }
351  
352      if (line.startsWith('--- a/')) {
353        currentBasePath = line.slice('--- a/'.length);
354        currentFileStatus = 'modified';
355        result.push(line);
356        i++;
357        continue;
358      }
359  
360      if (line.startsWith('+++ /dev/null')) {
361        currentFileStatus = 'deleted';
362        result.push(line);
363        i++;
364        continue;
365      }
366  
367      if (line.startsWith('+++ b/')) {
368        const headPath = line.slice('+++ b/'.length);
369        if (currentFileStatus === 'added') {
370          currentBasePath = headPath; // use headPath for context lookup
371        }
372        result.push(line);
373        i++;
374        continue;
375      }
376  
377      if (!line.startsWith('@@ ')) {
378        result.push(line);
379        i++;
380        continue;
381      }
382  
383      // Hunk header
384      const match = line.match(HUNK_HEADER_RE);
385  
386      // Choose file lines for context expansion
387      // For added files, use head file; for others, use base file
388      let fileLines: string[] | undefined;
389      if (currentFileStatus === 'added' && currentBasePath) {
390        // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- Record index may be undefined at runtime
391        fileLines = filesAfter[currentBasePath]?.split('\n');
392      } else if (currentBasePath) {
393        // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- Record index may be undefined at runtime
394        fileLines = filesBefore[currentBasePath]?.split('\n');
395      }
396  
397      if (!match || !fileLines) {
398        result.push(line);
399        i++;
400        continue;
401      }
402  
403      const baseStart = parseInt(match[1]);
404      const baseCount = parseInt(match[2]) || 1;
405      const headStart = parseInt(match[3]);
406      const headCount = parseInt(match[4]) || 1;
407      const suffix = match[5];
408  
409      // Collect hunk body
410      i++;
411      const body: string[] = [];
412      while (i < lines.length && !lines[i].startsWith('@@ ') && !lines[i].startsWith('diff ')) {
413        body.push(lines[i]);
414        i++;
415      }
416      while (body.length > 0 && body[body.length - 1] === '') body.pop();
417  
418      // Count existing leading/trailing context
419      let leadCtx = 0;
420      for (const bl of body) {
421        if (bl.startsWith(' ')) leadCtx++;
422        else break;
423      }
424      let trailCtx = 0;
425      for (let j = body.length - 1; j >= 0; j--) {
426        if (body[j].startsWith(' ')) trailCtx++;
427        else break;
428      }
429  
430      // For added files, use headStart for positioning
431      const hunkStart0 = currentFileStatus === 'added' ? headStart - 1 : baseStart - 1;
432  
433      // Prepend extra leading context
434      const extraLead = Math.max(0, targetContext - leadCtx);
435      const prependFrom = Math.max(0, hunkStart0 - extraLead);
436      const prepend = fileLines.slice(prependFrom, hunkStart0).map((l) => ' ' + l);
437  
438      // Append extra trailing context
439      const extraTrail = Math.max(0, targetContext - trailCtx);
440      const hunkEnd0 = currentFileStatus === 'added' ? hunkStart0 + headCount : hunkStart0 + baseCount;
441      const appendTo = Math.min(fileLines.length, hunkEnd0 + extraTrail);
442      const append = fileLines.slice(hunkEnd0, appendTo).map((l) => ' ' + l);
443  
444      // Rebuild hunk header
445      const newBaseStart = currentFileStatus === 'added' ? baseStart : Math.max(1, prependFrom + 1);
446      const newBaseCount = currentFileStatus === 'added' ? baseCount : prepend.length + baseCount + append.length;
447      const newHeadStart =
448        currentFileStatus === 'added' ? Math.max(1, headStart - prepend.length) : Math.max(1, headStart - prepend.length);
449      const newHeadCount = prepend.length + headCount + append.length;
450  
451      result.push(`@@ -${newBaseStart},${newBaseCount} +${newHeadStart},${newHeadCount} @@${suffix}`);
452      result.push(...prepend, ...body, ...append);
453    }
454  
455    return result.join('\n');
456  }
457  
458  // ── Format hunk index as XML for the AI prompt ───────────────────
459  
460  export function formatHunkIndexForPrompt(hunks: IndexedHunk[]): string {
461    const entries = hunks.map((h) => {
462      const escapedFile = h.filePath.replace(/&/g, '&amp;').replace(/"/g, '&quot;');
463      const escapedHeader = h.hunkHeader
464        .replace(/&/g, '&amp;')
465        .replace(/"/g, '&quot;')
466        .replace(/</g, '&lt;')
467        .replace(/>/g, '&gt;');
468      return `  <hunk id="${h.id}" file="${escapedFile}" header="${escapedHeader}" additions="${h.additions}" deletions="${h.deletions}" />`;
469    });
470  
471    return `<hunk_index>\n${entries.join('\n')}\n</hunk_index>`;
472  }
473  
474  // ── Sort diff hunks by file path then base start line ────────────
475  
476  export function sortDiffHunks(hunks: DiffHunk[]): DiffHunk[] {
477    return [...hunks].sort((a, b) => {
478      const pathCmp = a.filePath.localeCompare(b.filePath);
479      if (pathCmp !== 0) return pathCmp;
480  
481      const aStart = parseHunkHeader(a.hunkHeader)?.baseStart ?? 0;
482      const bStart = parseHunkHeader(b.hunkHeader)?.baseStart ?? 0;
483      return aStart - bStart;
484    });
485  }