/ lib / context-builder.ts
context-builder.ts
  1  import type { PrMetadata, ChangedFile } from './types';
  2  
  3  const CHARS_PER_TOKEN = 4;
  4  const MAX_TOKENS = 150_000;
  5  const MAX_CHARS = MAX_TOKENS * CHARS_PER_TOKEN;
  6  const MAX_FILE_LINES = 200;
  7  
  8  function truncateFileContent(content: string, maxLines: number): string {
  9    const lines = content.split('\n');
 10    if (lines.length <= maxLines) return content;
 11    return lines.slice(0, maxLines).join('\n') + `\n... [truncated after ${maxLines} lines]`;
 12  }
 13  
 14  export function buildContextPackage(
 15    prData: PrMetadata,
 16    expandedDiff: string,
 17    changedFiles: ChangedFile[],
 18    fileContents: Record<string, string>,
 19    headFileContents: Record<string, string>,
 20    neighborFiles: Record<string, string>,
 21    hunkIndex: string,
 22    excludedFilesSummary?: string
 23  ): string {
 24    const totalAdditions = changedFiles.reduce((s, f) => s + f.additions, 0);
 25    const totalDeletions = changedFiles.reduce((s, f) => s + f.deletions, 0);
 26  
 27    const metaSection = `<pr_metadata>
 28  Title: ${prData.title}
 29  Author: ${prData.author}
 30  Description: ${prData.description || '(no description)'}
 31  Base branch: ${prData.baseBranch}
 32  Files changed: ${changedFiles.length} | Lines added: ${totalAdditions} | Lines deleted: ${totalDeletions}
 33  </pr_metadata>`;
 34  
 35    const diffSection = `<full_diff>
 36  ${expandedDiff}
 37  </full_diff>`;
 38  
 39    const hunkIndexSection = hunkIndex;
 40  
 41    // Build file_contents (before) section
 42    let fileContentsSection = '<file_contents_before>\n';
 43    for (const [path, content] of Object.entries(fileContents)) {
 44      fileContentsSection += `  <file path="${path}">\n${content}\n  </file>\n`;
 45    }
 46    fileContentsSection += '</file_contents_before>';
 47  
 48    // Build file_contents_after (head) section
 49    let headContentsSection = '<file_contents_after>\n';
 50    for (const [path, content] of Object.entries(headFileContents)) {
 51      headContentsSection += `  <file path="${path}">\n${content}\n  </file>\n`;
 52    }
 53    headContentsSection += '</file_contents_after>';
 54  
 55    // Build neighbor_files section
 56    let neighborSection = '<neighbor_files>\n';
 57    for (const [path, content] of Object.entries(neighborFiles)) {
 58      neighborSection += `  <file path="${path}" relationship="imported by changed files">\n${content}\n  </file>\n`;
 59    }
 60    neighborSection += '</neighbor_files>';
 61  
 62    let diffStr = diffSection;
 63    let fileContentsStr = fileContentsSection;
 64    let headContentsStr = headContentsSection;
 65    let neighborStr = neighborSection;
 66  
 67    const excludedStr = excludedFilesSummary ?? '';
 68  
 69    function totalSize(): number {
 70      return (
 71        metaSection.length +
 72        excludedStr.length +
 73        4 +
 74        diffStr.length +
 75        4 +
 76        hunkIndexSection.length +
 77        4 +
 78        fileContentsStr.length +
 79        4 +
 80        headContentsStr.length +
 81        4 +
 82        neighborStr.length
 83      );
 84    }
 85  
 86    // Step 1: Drop neighbor files to fit budget
 87    if (totalSize() > MAX_CHARS) {
 88      const neighborBudget = Math.max(
 89        0,
 90        MAX_CHARS -
 91          metaSection.length -
 92          diffStr.length -
 93          hunkIndexSection.length -
 94          fileContentsStr.length -
 95          headContentsStr.length -
 96          20
 97      );
 98      let neighborChars = 0;
 99      const truncatedNeighbor: Record<string, string> = {};
100      for (const [p, content] of Object.entries(neighborFiles)) {
101        const entry = `  <file path="${p}" relationship="imported by changed files">\n${content}\n  </file>\n`;
102        if (neighborChars + entry.length > neighborBudget) break;
103        truncatedNeighbor[p] = content;
104        neighborChars += entry.length;
105      }
106      neighborStr = '<neighbor_files>\n';
107      for (const [p, content] of Object.entries(truncatedNeighbor)) {
108        neighborStr += `  <file path="${p}" relationship="imported by changed files">\n${content}\n  </file>\n`;
109      }
110      neighborStr += '</neighbor_files>';
111    }
112  
113    // Step 2: Truncate file contents to 200 lines each
114    if (totalSize() > MAX_CHARS) {
115      console.warn('[context-builder] Context too large — truncating file contents to 200 lines each');
116      fileContentsStr = '<file_contents_before>\n';
117      for (const [p, content] of Object.entries(fileContents)) {
118        fileContentsStr += `  <file path="${p}">\n${truncateFileContent(content, MAX_FILE_LINES)}\n  </file>\n`;
119      }
120      fileContentsStr += '</file_contents_before>';
121  
122      headContentsStr = '<file_contents_after>\n';
123      for (const [p, content] of Object.entries(headFileContents)) {
124        headContentsStr += `  <file path="${p}">\n${truncateFileContent(content, MAX_FILE_LINES)}\n  </file>\n`;
125      }
126      headContentsStr += '</file_contents_after>';
127    }
128  
129    // Step 3: Drop neighbor files entirely
130    if (totalSize() > MAX_CHARS) {
131      console.warn('[context-builder] Still too large — dropping neighbor files');
132      neighborStr = '<neighbor_files>\n<!-- omitted: context budget exceeded -->\n</neighbor_files>';
133    }
134  
135    // Step 4: Drop file contents entirely (keep only the diff)
136    if (totalSize() > MAX_CHARS) {
137      console.warn('[context-builder] Still too large — dropping file contents, keeping diff only');
138      fileContentsStr = '<file_contents_before>\n<!-- omitted: context budget exceeded -->\n</file_contents_before>';
139      headContentsStr = '<file_contents_after>\n<!-- omitted: context budget exceeded -->\n</file_contents_after>';
140    }
141  
142    // Step 5: Truncate the diff itself as a last resort
143    if (totalSize() > MAX_CHARS) {
144      const overhead =
145        metaSection.length +
146        hunkIndexSection.length +
147        fileContentsStr.length +
148        headContentsStr.length +
149        neighborStr.length +
150        20;
151      const diffBudget = MAX_CHARS - overhead;
152      console.warn(
153        `[context-builder] Diff too large (${expandedDiff.length} chars) — truncating to fit budget (${diffBudget} chars)`
154      );
155      const truncatedDiff = expandedDiff.slice(0, diffBudget);
156      diffStr = `<full_diff>\n${truncatedDiff}\n... [truncated — diff exceeded context budget]\n</full_diff>`;
157    }
158  
159    const finalPackage =
160      metaSection +
161      (excludedStr ? '\n\n' + excludedStr : '') +
162      '\n\n' +
163      diffStr +
164      '\n\n' +
165      hunkIndexSection +
166      '\n\n' +
167      fileContentsStr +
168      '\n\n' +
169      headContentsStr +
170      '\n\n' +
171      neighborStr;
172    const estimatedTokens = Math.ceil(finalPackage.length / CHARS_PER_TOKEN);
173  
174    console.log(
175      `[context-builder] Section sizes (chars): diff=${expandedDiff.length}, hunkIndex=${hunkIndexSection.length}, fileBefore=${fileContentsStr.length}, fileAfter=${headContentsStr.length}, neighbors=${neighborStr.length}`
176    );
177    console.log(
178      `[context-builder] Total context: ${finalPackage.length} chars (~${estimatedTokens.toLocaleString()} tokens) | Budget: ${MAX_CHARS} chars (~${MAX_TOKENS.toLocaleString()} tokens)`
179    );
180  
181    return finalPackage;
182  }