context-builder.ts
1 import type { PrMetadata, ChangedFile } from './types'; 2 3 const CHARS_PER_TOKEN = 4; 4 const MAX_TOKENS = 150_000; 5 const MAX_CHARS = MAX_TOKENS * CHARS_PER_TOKEN; 6 const MAX_FILE_LINES = 200; 7 8 function truncateFileContent(content: string, maxLines: number): string { 9 const lines = content.split('\n'); 10 if (lines.length <= maxLines) return content; 11 return lines.slice(0, maxLines).join('\n') + `\n... [truncated after ${maxLines} lines]`; 12 } 13 14 export function buildContextPackage( 15 prData: PrMetadata, 16 expandedDiff: string, 17 changedFiles: ChangedFile[], 18 fileContents: Record<string, string>, 19 headFileContents: Record<string, string>, 20 neighborFiles: Record<string, string>, 21 hunkIndex: string, 22 excludedFilesSummary?: string 23 ): string { 24 const totalAdditions = changedFiles.reduce((s, f) => s + f.additions, 0); 25 const totalDeletions = changedFiles.reduce((s, f) => s + f.deletions, 0); 26 27 const metaSection = `<pr_metadata> 28 Title: ${prData.title} 29 Author: ${prData.author} 30 Description: ${prData.description || '(no description)'} 31 Base branch: ${prData.baseBranch} 32 Files changed: ${changedFiles.length} | Lines added: ${totalAdditions} | Lines deleted: ${totalDeletions} 33 </pr_metadata>`; 34 35 const diffSection = `<full_diff> 36 ${expandedDiff} 37 </full_diff>`; 38 39 const hunkIndexSection = hunkIndex; 40 41 // Build file_contents (before) section 42 let fileContentsSection = '<file_contents_before>\n'; 43 for (const [path, content] of Object.entries(fileContents)) { 44 fileContentsSection += ` <file path="${path}">\n${content}\n </file>\n`; 45 } 46 fileContentsSection += '</file_contents_before>'; 47 48 // Build file_contents_after (head) section 49 let headContentsSection = '<file_contents_after>\n'; 50 for (const [path, content] of Object.entries(headFileContents)) { 51 headContentsSection += ` <file path="${path}">\n${content}\n </file>\n`; 52 } 53 headContentsSection += '</file_contents_after>'; 54 55 // Build neighbor_files section 56 let neighborSection = '<neighbor_files>\n'; 57 for (const [path, content] of Object.entries(neighborFiles)) { 58 neighborSection += ` <file path="${path}" relationship="imported by changed files">\n${content}\n </file>\n`; 59 } 60 neighborSection += '</neighbor_files>'; 61 62 let diffStr = diffSection; 63 let fileContentsStr = fileContentsSection; 64 let headContentsStr = headContentsSection; 65 let neighborStr = neighborSection; 66 67 const excludedStr = excludedFilesSummary ?? ''; 68 69 function totalSize(): number { 70 return ( 71 metaSection.length + 72 excludedStr.length + 73 4 + 74 diffStr.length + 75 4 + 76 hunkIndexSection.length + 77 4 + 78 fileContentsStr.length + 79 4 + 80 headContentsStr.length + 81 4 + 82 neighborStr.length 83 ); 84 } 85 86 // Step 1: Drop neighbor files to fit budget 87 if (totalSize() > MAX_CHARS) { 88 const neighborBudget = Math.max( 89 0, 90 MAX_CHARS - 91 metaSection.length - 92 diffStr.length - 93 hunkIndexSection.length - 94 fileContentsStr.length - 95 headContentsStr.length - 96 20 97 ); 98 let neighborChars = 0; 99 const truncatedNeighbor: Record<string, string> = {}; 100 for (const [p, content] of Object.entries(neighborFiles)) { 101 const entry = ` <file path="${p}" relationship="imported by changed files">\n${content}\n </file>\n`; 102 if (neighborChars + entry.length > neighborBudget) break; 103 truncatedNeighbor[p] = content; 104 neighborChars += entry.length; 105 } 106 neighborStr = '<neighbor_files>\n'; 107 for (const [p, content] of Object.entries(truncatedNeighbor)) { 108 neighborStr += ` <file path="${p}" relationship="imported by changed files">\n${content}\n </file>\n`; 109 } 110 neighborStr += '</neighbor_files>'; 111 } 112 113 // Step 2: Truncate file contents to 200 lines each 114 if (totalSize() > MAX_CHARS) { 115 console.warn('[context-builder] Context too large — truncating file contents to 200 lines each'); 116 fileContentsStr = '<file_contents_before>\n'; 117 for (const [p, content] of Object.entries(fileContents)) { 118 fileContentsStr += ` <file path="${p}">\n${truncateFileContent(content, MAX_FILE_LINES)}\n </file>\n`; 119 } 120 fileContentsStr += '</file_contents_before>'; 121 122 headContentsStr = '<file_contents_after>\n'; 123 for (const [p, content] of Object.entries(headFileContents)) { 124 headContentsStr += ` <file path="${p}">\n${truncateFileContent(content, MAX_FILE_LINES)}\n </file>\n`; 125 } 126 headContentsStr += '</file_contents_after>'; 127 } 128 129 // Step 3: Drop neighbor files entirely 130 if (totalSize() > MAX_CHARS) { 131 console.warn('[context-builder] Still too large — dropping neighbor files'); 132 neighborStr = '<neighbor_files>\n<!-- omitted: context budget exceeded -->\n</neighbor_files>'; 133 } 134 135 // Step 4: Drop file contents entirely (keep only the diff) 136 if (totalSize() > MAX_CHARS) { 137 console.warn('[context-builder] Still too large — dropping file contents, keeping diff only'); 138 fileContentsStr = '<file_contents_before>\n<!-- omitted: context budget exceeded -->\n</file_contents_before>'; 139 headContentsStr = '<file_contents_after>\n<!-- omitted: context budget exceeded -->\n</file_contents_after>'; 140 } 141 142 // Step 5: Truncate the diff itself as a last resort 143 if (totalSize() > MAX_CHARS) { 144 const overhead = 145 metaSection.length + 146 hunkIndexSection.length + 147 fileContentsStr.length + 148 headContentsStr.length + 149 neighborStr.length + 150 20; 151 const diffBudget = MAX_CHARS - overhead; 152 console.warn( 153 `[context-builder] Diff too large (${expandedDiff.length} chars) — truncating to fit budget (${diffBudget} chars)` 154 ); 155 const truncatedDiff = expandedDiff.slice(0, diffBudget); 156 diffStr = `<full_diff>\n${truncatedDiff}\n... [truncated — diff exceeded context budget]\n</full_diff>`; 157 } 158 159 const finalPackage = 160 metaSection + 161 (excludedStr ? '\n\n' + excludedStr : '') + 162 '\n\n' + 163 diffStr + 164 '\n\n' + 165 hunkIndexSection + 166 '\n\n' + 167 fileContentsStr + 168 '\n\n' + 169 headContentsStr + 170 '\n\n' + 171 neighborStr; 172 const estimatedTokens = Math.ceil(finalPackage.length / CHARS_PER_TOKEN); 173 174 console.log( 175 `[context-builder] Section sizes (chars): diff=${expandedDiff.length}, hunkIndex=${hunkIndexSection.length}, fileBefore=${fileContentsStr.length}, fileAfter=${headContentsStr.length}, neighbors=${neighborStr.length}` 176 ); 177 console.log( 178 `[context-builder] Total context: ${finalPackage.length} chars (~${estimatedTokens.toLocaleString()} tokens) | Budget: ${MAX_CHARS} chars (~${MAX_TOKENS.toLocaleString()} tokens)` 179 ); 180 181 return finalPackage; 182 }