diff-parse.ts
1 import type { DiffHunk } from './types'; 2 import { inferLanguage } from './highlight'; 3 4 // ── Types ──────────────────────────────────────────────────────── 5 6 export interface ParsedHunk { 7 id: string; 8 filePath: string; 9 fileStatus: 'added' | 'deleted' | 'modified' | 'renamed'; 10 hunkHeader: string; 11 content: string; 12 additions: number; 13 deletions: number; 14 scopeName: string | null; 15 } 16 17 export interface IndexedHunk extends ParsedHunk { 18 language: string; 19 expandedHunkHeader: string; 20 expandedContent: string; 21 } 22 23 // ── Hunk header parsing ───────────────────────────────────────── 24 25 const HUNK_HEADER_RE = /^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)/; 26 27 function parseHunkHeader(header: string) { 28 const m = header.match(HUNK_HEADER_RE); 29 if (!m) return null; 30 return { 31 baseStart: parseInt(m[1], 10), 32 // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- regex optional group can be undefined at runtime 33 baseCount: m[2] !== undefined ? parseInt(m[2], 10) : 1, 34 headStart: parseInt(m[3], 10), 35 // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- regex optional group can be undefined at runtime 36 headCount: m[4] !== undefined ? parseInt(m[4], 10) : 1, 37 // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- regex optional group 38 suffix: m[5] ?? '', 39 }; 40 } 41 42 function buildHunkHeader( 43 baseStart: number, 44 baseCount: number, 45 headStart: number, 46 headCount: number, 47 suffix: string 48 ): string { 49 return `@@ -${baseStart},${baseCount} +${headStart},${headCount} @@${suffix}`; 50 } 51 52 // ── Parse unified diff into per-hunk objects ───────────────────── 53 54 export function parseUnifiedDiff(diff: string): ParsedHunk[] { 55 const hunks: ParsedHunk[] = []; 56 const lines = diff.split('\n'); 57 let i = 0; 58 let hunkCounter = 0; 59 60 let currentFilePath: string | null = null; 61 let currentFileStatus: 'added' | 'deleted' | 'modified' | 'renamed' = 'modified'; 62 let isRename = false; 63 let renameFrom: string | null = null; 64 65 while (i < lines.length) { 66 const line = lines[i]; 67 68 // diff header 69 if (line.startsWith('diff --git')) { 70 isRename = false; 71 renameFrom = null; 72 i++; 73 continue; 74 } 75 76 // Detect rename markers 77 if (line.startsWith('rename from ')) { 78 renameFrom = line.slice('rename from '.length); 79 isRename = true; 80 i++; 81 continue; 82 } 83 if (line.startsWith('rename to ')) { 84 isRename = true; 85 i++; 86 continue; 87 } 88 89 // similarity / dissimilarity index, old mode, new mode, etc. 90 if ( 91 line.startsWith('index ') || 92 line.startsWith('old mode') || 93 line.startsWith('new mode') || 94 line.startsWith('new file mode') || 95 line.startsWith('deleted file mode') || 96 line.startsWith('similarity index') || 97 line.startsWith('dissimilarity index') || 98 line.startsWith('copy from') || 99 line.startsWith('copy to') || 100 line.startsWith('Binary files') 101 ) { 102 if (line.startsWith('new file mode')) currentFileStatus = 'added'; 103 if (line.startsWith('deleted file mode')) currentFileStatus = 'deleted'; 104 i++; 105 continue; 106 } 107 108 // --- a/path or --- /dev/null 109 if (line.startsWith('--- ')) { 110 if (line === '--- /dev/null') { 111 currentFileStatus = 'added'; 112 } 113 i++; 114 continue; 115 } 116 117 // +++ b/path or +++ /dev/null 118 if (line.startsWith('+++ ')) { 119 if (line === '+++ /dev/null') { 120 currentFileStatus = 'deleted'; 121 } else { 122 currentFilePath = line.slice('+++ b/'.length); 123 if (isRename && renameFrom) { 124 currentFileStatus = 'renamed'; 125 } else if (currentFileStatus !== 'added' && currentFileStatus !== 'deleted') { 126 currentFileStatus = 'modified'; 127 } 128 } 129 i++; 130 continue; 131 } 132 133 // Hunk header 134 if (line.startsWith('@@ ') && currentFilePath) { 135 const match = line.match(HUNK_HEADER_RE); 136 if (!match) { 137 i++; 138 continue; 139 } 140 141 // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- regex optional group 142 const suffix = match[5] ?? ''; 143 const scopeName = suffix.trim() || null; 144 145 // Collect hunk body lines 146 i++; 147 const body: string[] = []; 148 while (i < lines.length) { 149 const bodyLine = lines[i]; 150 if (bodyLine.startsWith('@@ ') || bodyLine.startsWith('diff --git')) { 151 break; 152 } 153 // Skip "\ No newline at end of file" 154 if (bodyLine.startsWith('\\')) { 155 i++; 156 continue; 157 } 158 body.push(bodyLine); 159 i++; 160 } 161 162 // Remove trailing empty lines from split artifact 163 while (body.length > 0 && body[body.length - 1] === '') body.pop(); 164 165 const content = body.join('\n'); 166 167 let additions = 0; 168 let deletions = 0; 169 for (const bl of body) { 170 if (bl.startsWith('+')) additions++; 171 else if (bl.startsWith('-')) deletions++; 172 } 173 174 hunks.push({ 175 id: `hunk-${hunkCounter++}`, 176 filePath: currentFilePath, 177 fileStatus: currentFileStatus, 178 hunkHeader: line, 179 content, 180 additions, 181 deletions, 182 scopeName, 183 }); 184 185 continue; 186 } 187 188 i++; 189 } 190 191 return hunks; 192 } 193 194 // ── Expand context for a single hunk ───────────────────────────── 195 196 export function expandHunkContext( 197 hunk: ParsedHunk, 198 filesBefore: Record<string, string>, 199 filesAfter: Record<string, string>, 200 targetContext = 15 201 ): { expandedHunkHeader: string; expandedContent: string } { 202 const parsed = parseHunkHeader(hunk.hunkHeader); 203 if (!parsed) { 204 return { expandedHunkHeader: hunk.hunkHeader, expandedContent: hunk.content }; 205 } 206 207 // Choose source file for context padding 208 // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- Record index may be undefined at runtime 209 const baseLines = filesBefore[hunk.filePath]?.split('\n'); 210 // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- Record index may be undefined at runtime 211 const headLines = filesAfter[hunk.filePath]?.split('\n'); 212 213 // For added files, context comes from head; for others, from base 214 // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- Record index may be undefined at runtime 215 const ctxSource = hunk.fileStatus === 'added' ? headLines : (baseLines ?? headLines); 216 217 // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- Record index may be undefined at runtime 218 if (!ctxSource) { 219 return { expandedHunkHeader: hunk.hunkHeader, expandedContent: hunk.content }; 220 } 221 222 // Parse body lines 223 const rawLines = hunk.content.split('\n'); 224 const bodyLines: { marker: string; text: string }[] = []; 225 for (const raw of rawLines) { 226 if (raw === '') continue; 227 const marker = raw[0]; 228 bodyLines.push({ marker, text: raw.slice(1) }); 229 } 230 231 if (bodyLines.length === 0) { 232 return { expandedHunkHeader: hunk.hunkHeader, expandedContent: hunk.content }; 233 } 234 235 // Count existing leading/trailing context 236 let leadCtx = 0; 237 for (const bl of bodyLines) { 238 if (bl.marker === ' ') leadCtx++; 239 else break; 240 } 241 let trailCtx = 0; 242 for (let j = bodyLines.length - 1; j >= 0; j--) { 243 if (bodyLines[j].marker === ' ') trailCtx++; 244 else break; 245 } 246 247 // For added files (all + lines), use headStart for positioning 248 const hunkStart0 = hunk.fileStatus === 'added' ? parsed.headStart - 1 : parsed.baseStart - 1; 249 250 // Prepend leading context 251 const extraLead = Math.max(0, targetContext - leadCtx); 252 const prependFrom = Math.max(0, hunkStart0 - extraLead); 253 const prepend = ctxSource.slice(prependFrom, hunkStart0).map((l) => ' ' + l); 254 255 // Calculate where the hunk body ends in the source file 256 // Walk through body lines to find where base pointer ends 257 let basePos = parsed.baseStart - 1; 258 let headPos = parsed.headStart - 1; 259 for (const bl of bodyLines) { 260 if (bl.marker === ' ') { 261 basePos++; 262 headPos++; 263 } else if (bl.marker === '-') { 264 basePos++; 265 } else if (bl.marker === '+') { 266 headPos++; 267 } 268 } 269 const hunkEnd0 = hunk.fileStatus === 'added' ? headPos : basePos; 270 271 // Append trailing context 272 const extraTrail = Math.max(0, targetContext - trailCtx); 273 const appendTo = Math.min(ctxSource.length, hunkEnd0 + extraTrail); 274 const append = ctxSource.slice(hunkEnd0, appendTo).map((l) => ' ' + l); 275 276 // Rebuild hunk header with new counts 277 const allContent = [...prepend, ...rawLines.filter((l) => l !== ''), ...append]; 278 279 let newBaseCount = 0; 280 let newHeadCount = 0; 281 for (const l of allContent) { 282 const m = l[0]; 283 if (m === ' ') { 284 newBaseCount++; 285 newHeadCount++; 286 } else if (m === '-') { 287 newBaseCount++; 288 } else if (m === '+') { 289 newHeadCount++; 290 } 291 } 292 293 const newBaseStart = Math.max(1, parsed.baseStart - prepend.length); 294 const newHeadStart = Math.max(1, parsed.headStart - prepend.length); 295 296 const expandedHunkHeader = buildHunkHeader(newBaseStart, newBaseCount, newHeadStart, newHeadCount, parsed.suffix); 297 const expandedContent = allContent.join('\n'); 298 299 return { expandedHunkHeader, expandedContent }; 300 } 301 302 // ── Full pipeline: parse → expand → index ──────────────────────── 303 304 export function buildIndexedHunks( 305 diff: string, 306 filesBefore: Record<string, string>, 307 filesAfter: Record<string, string>, 308 targetContext = 15 309 ): IndexedHunk[] { 310 const parsed = parseUnifiedDiff(diff); 311 312 return parsed.map((hunk) => { 313 const { expandedHunkHeader, expandedContent } = expandHunkContext(hunk, filesBefore, filesAfter, targetContext); 314 const language = inferLanguage(hunk.filePath); 315 316 return { 317 ...hunk, 318 language, 319 expandedHunkHeader, 320 expandedContent, 321 }; 322 }); 323 } 324 325 // ── Expand full diff as a single string ────────────────────────── 326 // Replaces expandDiffContext from context-builder.ts but also 327 // handles added files by using head file contents. 328 329 export function expandFullDiff( 330 diff: string, 331 filesBefore: Record<string, string>, 332 filesAfter: Record<string, string>, 333 targetContext = 15 334 ): string { 335 const result: string[] = []; 336 let currentBasePath: string | null = null; 337 let currentFileStatus: 'added' | 'deleted' | 'modified' = 'modified'; 338 const lines = diff.split('\n'); 339 let i = 0; 340 341 while (i < lines.length) { 342 const line = lines[i]; 343 344 if (line.startsWith('--- /dev/null')) { 345 currentBasePath = null; 346 currentFileStatus = 'added'; 347 result.push(line); 348 i++; 349 continue; 350 } 351 352 if (line.startsWith('--- a/')) { 353 currentBasePath = line.slice('--- a/'.length); 354 currentFileStatus = 'modified'; 355 result.push(line); 356 i++; 357 continue; 358 } 359 360 if (line.startsWith('+++ /dev/null')) { 361 currentFileStatus = 'deleted'; 362 result.push(line); 363 i++; 364 continue; 365 } 366 367 if (line.startsWith('+++ b/')) { 368 const headPath = line.slice('+++ b/'.length); 369 if (currentFileStatus === 'added') { 370 currentBasePath = headPath; // use headPath for context lookup 371 } 372 result.push(line); 373 i++; 374 continue; 375 } 376 377 if (!line.startsWith('@@ ')) { 378 result.push(line); 379 i++; 380 continue; 381 } 382 383 // Hunk header 384 const match = line.match(HUNK_HEADER_RE); 385 386 // Choose file lines for context expansion 387 // For added files, use head file; for others, use base file 388 let fileLines: string[] | undefined; 389 if (currentFileStatus === 'added' && currentBasePath) { 390 // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- Record index may be undefined at runtime 391 fileLines = filesAfter[currentBasePath]?.split('\n'); 392 } else if (currentBasePath) { 393 // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- Record index may be undefined at runtime 394 fileLines = filesBefore[currentBasePath]?.split('\n'); 395 } 396 397 if (!match || !fileLines) { 398 result.push(line); 399 i++; 400 continue; 401 } 402 403 const baseStart = parseInt(match[1]); 404 const baseCount = parseInt(match[2]) || 1; 405 const headStart = parseInt(match[3]); 406 const headCount = parseInt(match[4]) || 1; 407 const suffix = match[5]; 408 409 // Collect hunk body 410 i++; 411 const body: string[] = []; 412 while (i < lines.length && !lines[i].startsWith('@@ ') && !lines[i].startsWith('diff ')) { 413 body.push(lines[i]); 414 i++; 415 } 416 while (body.length > 0 && body[body.length - 1] === '') body.pop(); 417 418 // Count existing leading/trailing context 419 let leadCtx = 0; 420 for (const bl of body) { 421 if (bl.startsWith(' ')) leadCtx++; 422 else break; 423 } 424 let trailCtx = 0; 425 for (let j = body.length - 1; j >= 0; j--) { 426 if (body[j].startsWith(' ')) trailCtx++; 427 else break; 428 } 429 430 // For added files, use headStart for positioning 431 const hunkStart0 = currentFileStatus === 'added' ? headStart - 1 : baseStart - 1; 432 433 // Prepend extra leading context 434 const extraLead = Math.max(0, targetContext - leadCtx); 435 const prependFrom = Math.max(0, hunkStart0 - extraLead); 436 const prepend = fileLines.slice(prependFrom, hunkStart0).map((l) => ' ' + l); 437 438 // Append extra trailing context 439 const extraTrail = Math.max(0, targetContext - trailCtx); 440 const hunkEnd0 = currentFileStatus === 'added' ? hunkStart0 + headCount : hunkStart0 + baseCount; 441 const appendTo = Math.min(fileLines.length, hunkEnd0 + extraTrail); 442 const append = fileLines.slice(hunkEnd0, appendTo).map((l) => ' ' + l); 443 444 // Rebuild hunk header 445 const newBaseStart = currentFileStatus === 'added' ? baseStart : Math.max(1, prependFrom + 1); 446 const newBaseCount = currentFileStatus === 'added' ? baseCount : prepend.length + baseCount + append.length; 447 const newHeadStart = 448 currentFileStatus === 'added' ? Math.max(1, headStart - prepend.length) : Math.max(1, headStart - prepend.length); 449 const newHeadCount = prepend.length + headCount + append.length; 450 451 result.push(`@@ -${newBaseStart},${newBaseCount} +${newHeadStart},${newHeadCount} @@${suffix}`); 452 result.push(...prepend, ...body, ...append); 453 } 454 455 return result.join('\n'); 456 } 457 458 // ── Format hunk index as XML for the AI prompt ─────────────────── 459 460 export function formatHunkIndexForPrompt(hunks: IndexedHunk[]): string { 461 const entries = hunks.map((h) => { 462 const escapedFile = h.filePath.replace(/&/g, '&').replace(/"/g, '"'); 463 const escapedHeader = h.hunkHeader 464 .replace(/&/g, '&') 465 .replace(/"/g, '"') 466 .replace(/</g, '<') 467 .replace(/>/g, '>'); 468 return ` <hunk id="${h.id}" file="${escapedFile}" header="${escapedHeader}" additions="${h.additions}" deletions="${h.deletions}" />`; 469 }); 470 471 return `<hunk_index>\n${entries.join('\n')}\n</hunk_index>`; 472 } 473 474 // ── Sort diff hunks by file path then base start line ──────────── 475 476 export function sortDiffHunks(hunks: DiffHunk[]): DiffHunk[] { 477 return [...hunks].sort((a, b) => { 478 const pathCmp = a.filePath.localeCompare(b.filePath); 479 if (pathCmp !== 0) return pathCmp; 480 481 const aStart = parseHunkHeader(a.hunkHeader)?.baseStart ?? 0; 482 const bStart = parseHunkHeader(b.hunkHeader)?.baseStart ?? 0; 483 return aStart - bStart; 484 }); 485 }