/ bench / benchmark.ts
benchmark.ts
  1  /**
  2   * Benchmark: diff parsing + context expansion pipeline
  3   *
  4   * Measures the core CPU-bound code path:
  5   *   raw diff string → parseUnifiedDiff → expandHunkContext → buildIndexedHunks
  6   *
  7   * Uses a synthetic diff representing a medium-sized PR (~20 files, ~100 hunks).
  8   */
  9  
 10  import { parseUnifiedDiff, buildIndexedHunks, expandFullDiff } from '../lib/diff-parse';
 11  import { classifyFiles, filterDiff } from '../lib/file-filter';
 12  import { buildContextPackage } from '../lib/context-builder';
 13  import type { ChangedFile, PrMetadata } from '../lib/types';
 14  
 15  // ── Generate synthetic data ──────────────────────────────────────
 16  
 17  function generateSourceFile(lines: number): string {
 18    const out: string[] = [];
 19    for (let i = 0; i < lines; i++) {
 20      out.push(`  const value_${i} = compute(${i}, ${i * 2}); // line ${i + 1}`);
 21    }
 22    return out.join('\n');
 23  }
 24  
 25  function generateDiff(fileCount: number, hunksPerFile: number): string {
 26    const parts: string[] = [];
 27    for (let f = 0; f < fileCount; f++) {
 28      const ext = ['ts', 'tsx', 'py', 'go', 'rs'][f % 5];
 29      const path = `src/module${f}/handler.${ext}`;
 30      parts.push(`diff --git a/${path} b/${path}`);
 31      parts.push(`index abc1234..def5678 100644`);
 32      parts.push(`--- a/${path}`);
 33      parts.push(`+++ b/${path}`);
 34  
 35      for (let h = 0; h < hunksPerFile; h++) {
 36        const start = 10 + h * 30;
 37        // 3 lines context, some adds/removes
 38        parts.push(`@@ -${start},10 +${start},12 @@ function handler_${h}() {`);
 39        parts.push(` const a = 1;`);
 40        parts.push(` const b = 2;`);
 41        parts.push(` const c = 3;`);
 42        parts.push(`-const old_val = legacy(a, b);`);
 43        parts.push(`-const old_res = transform(old_val);`);
 44        parts.push(`+const new_val = modern(a, b, c);`);
 45        parts.push(`+const new_res = process(new_val);`);
 46        parts.push(`+const extra = validate(new_res);`);
 47        parts.push(` return result;`);
 48        parts.push(` const d = 4;`);
 49        parts.push(` const e = 5;`);
 50        parts.push(` const f = 6;`);
 51      }
 52    }
 53    return parts.join('\n');
 54  }
 55  
 56  function generateFileContents(
 57    fileCount: number
 58  ): { before: Record<string, string>; after: Record<string, string> } {
 59    const before: Record<string, string> = {};
 60    const after: Record<string, string> = {};
 61    for (let f = 0; f < fileCount; f++) {
 62      const ext = ['ts', 'tsx', 'py', 'go', 'rs'][f % 5];
 63      const path = `src/module${f}/handler.${ext}`;
 64      before[path] = generateSourceFile(200);
 65      after[path] = generateSourceFile(210);
 66    }
 67    return { before, after };
 68  }
 69  
 70  function generateChangedFiles(fileCount: number): ChangedFile[] {
 71    const files: ChangedFile[] = [];
 72    for (let f = 0; f < fileCount; f++) {
 73      const ext = ['ts', 'tsx', 'py', 'go', 'rs'][f % 5];
 74      files.push({
 75        filename: `src/module${f}/handler.${ext}`,
 76        status: 'modified',
 77        additions: 15,
 78        deletions: 10,
 79      });
 80    }
 81    // Add a lock file to test filtering
 82    files.push({ filename: 'package-lock.json', status: 'modified', additions: 500, deletions: 400 });
 83    return files;
 84  }
 85  
 86  // ── Run benchmark ────────────────────────────────────────────────
 87  
 88  const FILE_COUNT = 20;
 89  const HUNKS_PER_FILE = 5;
 90  const ITERATIONS = 50;
 91  
 92  const diff = generateDiff(FILE_COUNT, HUNKS_PER_FILE);
 93  const { before, after } = generateFileContents(FILE_COUNT);
 94  const changedFiles = generateChangedFiles(FILE_COUNT);
 95  const prData: PrMetadata = {
 96    title: 'Benchmark PR',
 97    author: 'bench-bot',
 98    description: 'Synthetic benchmark workload',
 99    baseBranch: 'main',
100    headBranch: 'feature/bench',
101    headSha: 'def456',
102    merged: false,
103    state: 'open',
104    createdAt: '2026-01-01T00:00:00Z',
105    updatedAt: '2026-01-01T00:00:00Z',
106    url: 'https://github.com/test/bench/pull/1',
107    labels: [],
108    mergeable: true,
109    isDraft: false,
110    commitCount: 3,
111    requestedReviewers: [],
112    requestedTeams: [],
113    mergeableState: 'clean',
114    autoMerge: null,
115    milestone: null,
116  };
117  
118  // Warmup
119  parseUnifiedDiff(diff);
120  buildIndexedHunks(diff, before, after);
121  
122  const start = performance.now();
123  
124  for (let i = 0; i < ITERATIONS; i++) {
125    // 1. File classification & filtering
126    const { normalFiles, generatedFiles } = classifyFiles(changedFiles);
127    const generatedSet = new Set(generatedFiles.map((f) => f.filename));
128    const filteredDiff = filterDiff(diff, generatedSet);
129  
130    // 2. Parse diff into hunks
131    const hunks = parseUnifiedDiff(filteredDiff);
132  
133    // 3. Build indexed hunks (parse + expand context + infer language)
134    const indexed = buildIndexedHunks(filteredDiff, before, after, 15);
135  
136    // 4. Expand full diff with context
137    const expanded = expandFullDiff(filteredDiff, before, after, 15);
138  
139    // 5. Build context package
140    const hunkIndex = indexed
141      .map((h) => `  <hunk id="${h.id}" file="${h.filePath}" />`)
142      .join('\n');
143    buildContextPackage(
144      prData,
145      expanded,
146      normalFiles,
147      before,
148      after,
149      {},
150      `<hunk_index>\n${hunkIndex}\n</hunk_index>`
151    );
152  
153    // Prevent dead-code elimination
154    if (hunks.length === 0 || indexed.length === 0 || expanded.length === 0) {
155      throw new Error('unexpected empty result');
156    }
157  }
158  
159  const elapsed = performance.now() - start;
160  const avgMs = elapsed / ITERATIONS;
161  
162  console.log(`diff_parse_pipeline : ${avgMs.toFixed(1)} ms`);