benchmark.ts
1 /** 2 * Benchmark: diff parsing + context expansion pipeline 3 * 4 * Measures the core CPU-bound code path: 5 * raw diff string → parseUnifiedDiff → expandHunkContext → buildIndexedHunks 6 * 7 * Uses a synthetic diff representing a medium-sized PR (~20 files, ~100 hunks). 8 */ 9 10 import { parseUnifiedDiff, buildIndexedHunks, expandFullDiff } from '../lib/diff-parse'; 11 import { classifyFiles, filterDiff } from '../lib/file-filter'; 12 import { buildContextPackage } from '../lib/context-builder'; 13 import type { ChangedFile, PrMetadata } from '../lib/types'; 14 15 // ── Generate synthetic data ────────────────────────────────────── 16 17 function generateSourceFile(lines: number): string { 18 const out: string[] = []; 19 for (let i = 0; i < lines; i++) { 20 out.push(` const value_${i} = compute(${i}, ${i * 2}); // line ${i + 1}`); 21 } 22 return out.join('\n'); 23 } 24 25 function generateDiff(fileCount: number, hunksPerFile: number): string { 26 const parts: string[] = []; 27 for (let f = 0; f < fileCount; f++) { 28 const ext = ['ts', 'tsx', 'py', 'go', 'rs'][f % 5]; 29 const path = `src/module${f}/handler.${ext}`; 30 parts.push(`diff --git a/${path} b/${path}`); 31 parts.push(`index abc1234..def5678 100644`); 32 parts.push(`--- a/${path}`); 33 parts.push(`+++ b/${path}`); 34 35 for (let h = 0; h < hunksPerFile; h++) { 36 const start = 10 + h * 30; 37 // 3 lines context, some adds/removes 38 parts.push(`@@ -${start},10 +${start},12 @@ function handler_${h}() {`); 39 parts.push(` const a = 1;`); 40 parts.push(` const b = 2;`); 41 parts.push(` const c = 3;`); 42 parts.push(`-const old_val = legacy(a, b);`); 43 parts.push(`-const old_res = transform(old_val);`); 44 parts.push(`+const new_val = modern(a, b, c);`); 45 parts.push(`+const new_res = process(new_val);`); 46 parts.push(`+const extra = validate(new_res);`); 47 parts.push(` return result;`); 48 parts.push(` const d = 4;`); 49 parts.push(` const e = 5;`); 50 parts.push(` const f = 6;`); 51 } 52 } 53 return parts.join('\n'); 54 } 55 56 function generateFileContents( 57 fileCount: number 58 ): { before: Record<string, string>; after: Record<string, string> } { 59 const before: Record<string, string> = {}; 60 const after: Record<string, string> = {}; 61 for (let f = 0; f < fileCount; f++) { 62 const ext = ['ts', 'tsx', 'py', 'go', 'rs'][f % 5]; 63 const path = `src/module${f}/handler.${ext}`; 64 before[path] = generateSourceFile(200); 65 after[path] = generateSourceFile(210); 66 } 67 return { before, after }; 68 } 69 70 function generateChangedFiles(fileCount: number): ChangedFile[] { 71 const files: ChangedFile[] = []; 72 for (let f = 0; f < fileCount; f++) { 73 const ext = ['ts', 'tsx', 'py', 'go', 'rs'][f % 5]; 74 files.push({ 75 filename: `src/module${f}/handler.${ext}`, 76 status: 'modified', 77 additions: 15, 78 deletions: 10, 79 }); 80 } 81 // Add a lock file to test filtering 82 files.push({ filename: 'package-lock.json', status: 'modified', additions: 500, deletions: 400 }); 83 return files; 84 } 85 86 // ── Run benchmark ──────────────────────────────────────────────── 87 88 const FILE_COUNT = 20; 89 const HUNKS_PER_FILE = 5; 90 const ITERATIONS = 50; 91 92 const diff = generateDiff(FILE_COUNT, HUNKS_PER_FILE); 93 const { before, after } = generateFileContents(FILE_COUNT); 94 const changedFiles = generateChangedFiles(FILE_COUNT); 95 const prData: PrMetadata = { 96 title: 'Benchmark PR', 97 author: 'bench-bot', 98 description: 'Synthetic benchmark workload', 99 baseBranch: 'main', 100 headBranch: 'feature/bench', 101 headSha: 'def456', 102 merged: false, 103 state: 'open', 104 createdAt: '2026-01-01T00:00:00Z', 105 updatedAt: '2026-01-01T00:00:00Z', 106 url: 'https://github.com/test/bench/pull/1', 107 labels: [], 108 mergeable: true, 109 isDraft: false, 110 commitCount: 3, 111 requestedReviewers: [], 112 requestedTeams: [], 113 mergeableState: 'clean', 114 autoMerge: null, 115 milestone: null, 116 }; 117 118 // Warmup 119 parseUnifiedDiff(diff); 120 buildIndexedHunks(diff, before, after); 121 122 const start = performance.now(); 123 124 for (let i = 0; i < ITERATIONS; i++) { 125 // 1. File classification & filtering 126 const { normalFiles, generatedFiles } = classifyFiles(changedFiles); 127 const generatedSet = new Set(generatedFiles.map((f) => f.filename)); 128 const filteredDiff = filterDiff(diff, generatedSet); 129 130 // 2. Parse diff into hunks 131 const hunks = parseUnifiedDiff(filteredDiff); 132 133 // 3. Build indexed hunks (parse + expand context + infer language) 134 const indexed = buildIndexedHunks(filteredDiff, before, after, 15); 135 136 // 4. Expand full diff with context 137 const expanded = expandFullDiff(filteredDiff, before, after, 15); 138 139 // 5. Build context package 140 const hunkIndex = indexed 141 .map((h) => ` <hunk id="${h.id}" file="${h.filePath}" />`) 142 .join('\n'); 143 buildContextPackage( 144 prData, 145 expanded, 146 normalFiles, 147 before, 148 after, 149 {}, 150 `<hunk_index>\n${hunkIndex}\n</hunk_index>` 151 ); 152 153 // Prevent dead-code elimination 154 if (hunks.length === 0 || indexed.length === 0 || expanded.length === 0) { 155 throw new Error('unexpected empty result'); 156 } 157 } 158 159 const elapsed = performance.now() - start; 160 const avgMs = elapsed / ITERATIONS; 161 162 console.log(`diff_parse_pipeline : ${avgMs.toFixed(1)} ms`);