/ lib / diff-lines.ts
diff-lines.ts
  1  import type { DiffSide } from './types';
  2  
  3  export interface DiffLineInfo {
  4    lineNumber: number;
  5    baseLineNumber: number | null;
  6    headLineNumber: number | null;
  7    side: DiffSide;
  8    type: 'add' | 'remove' | 'context';
  9    text: string;
 10  }
 11  
 12  export interface SplitRow {
 13    left: { info: DiffLineInfo; html: string } | null;
 14    right: { info: DiffLineInfo; html: string } | null;
 15  }
 16  
 17  /**
 18   * Detect whether the content string has unified diff prefix markers on every
 19   * non-empty line (`+`, `-`, or ` `). Returns false on the first line that
 20   * doesn't start with one of these characters.
 21   */
 22  export function contentHasDiffMarkers(content: string): boolean {
 23    const lines = content.split('\n');
 24    let nonEmptyCount = 0;
 25    for (const line of lines) {
 26      if (line === '') continue;
 27      nonEmptyCount++;
 28      const ch = line[0];
 29      if (ch !== '+' && ch !== '-' && ch !== ' ') return false;
 30    }
 31    return nonEmptyCount > 0;
 32  }
 33  
 34  /**
 35   * Parse a hunk header and its content lines into per-line metadata.
 36   *
 37   * The hunk header format is: @@ -oldStart,oldCount +newStart,newCount @@
 38   * Content lines are prefixed with '+' (add), '-' (remove), or ' ' (context).
 39   *
 40   * When the AI omits diff prefix markers, the function detects this and treats
 41   * every line as full text. For new-file hunks (oldCount === 0) lines are
 42   * treated as additions; otherwise they are treated as context.
 43   *
 44   * This mirrors the same line-splitting logic used by renderDiffHunk in
 45   * lib/highlight.ts so that indices stay aligned with the rendered Shiki HTML.
 46   */
 47  export function parseDiffLines(hunkHeader: string, content: string): DiffLineInfo[] {
 48    const match = hunkHeader.match(/@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/);
 49    if (!match) return [];
 50  
 51    let oldLine = parseInt(match[1], 10);
 52    let newLine = parseInt(match[3], 10);
 53  
 54    const hasMarkers = contentHasDiffMarkers(content);
 55    // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- regex optional group can be undefined at runtime
 56    const oldCount = match[2] !== undefined ? parseInt(match[2], 10) : 1;
 57    const isNewFile = oldLine === 0 && oldCount === 0;
 58  
 59    const result: DiffLineInfo[] = [];
 60  
 61    for (const line of content.split('\n')) {
 62      if (line === '') continue;
 63  
 64      if (hasMarkers) {
 65        const prefix = line[0];
 66        const text = line.slice(1);
 67  
 68        if (prefix === '+') {
 69          result.push({
 70            lineNumber: newLine,
 71            baseLineNumber: null,
 72            headLineNumber: newLine,
 73            side: 'RIGHT',
 74            type: 'add',
 75            text,
 76          });
 77          newLine++;
 78        } else if (prefix === '-') {
 79          result.push({
 80            lineNumber: oldLine,
 81            baseLineNumber: oldLine,
 82            headLineNumber: null,
 83            side: 'LEFT',
 84            type: 'remove',
 85            text,
 86          });
 87          oldLine++;
 88        } else {
 89          result.push({
 90            lineNumber: newLine,
 91            baseLineNumber: oldLine,
 92            headLineNumber: newLine,
 93            side: 'RIGHT',
 94            type: 'context',
 95            text,
 96          });
 97          oldLine++;
 98          newLine++;
 99        }
100      } else {
101        // No diff markers — use full line text
102        if (isNewFile) {
103          result.push({
104            lineNumber: newLine,
105            baseLineNumber: null,
106            headLineNumber: newLine,
107            side: 'RIGHT',
108            type: 'add',
109            text: line,
110          });
111          newLine++;
112        } else {
113          result.push({
114            lineNumber: newLine,
115            baseLineNumber: oldLine,
116            headLineNumber: newLine,
117            side: 'RIGHT',
118            type: 'context',
119            text: line,
120          });
121          oldLine++;
122          newLine++;
123        }
124      }
125    }
126  
127    return result;
128  }
129  
130  /**
131   * Build side-by-side split rows from parsed diff lines and their corresponding
132   * Shiki HTML. Context lines appear on both sides. Change chunks (contiguous
133   * removes followed by adds) are zipped into paired rows, with the shorter
134   * side padded with null.
135   */
136  export function buildSplitRows(lineInfos: DiffLineInfo[], lineHtmls: string[]): SplitRow[] {
137    const rows: SplitRow[] = [];
138    let i = 0;
139  
140    while (i < lineInfos.length) {
141      const info = lineInfos[i];
142  
143      if (info.type === 'context') {
144        rows.push({
145          left: {
146            info: { ...info, lineNumber: info.baseLineNumber ?? info.lineNumber, side: 'LEFT' },
147            html: lineHtmls[i],
148          },
149          right: {
150            info: { ...info, lineNumber: info.headLineNumber ?? info.lineNumber, side: 'RIGHT' },
151            html: lineHtmls[i],
152          },
153        });
154        i++;
155        continue;
156      }
157  
158      // Collect a contiguous change chunk: removes then adds
159      const removes: { info: DiffLineInfo; html: string }[] = [];
160      const adds: { info: DiffLineInfo; html: string }[] = [];
161  
162      while (i < lineInfos.length && lineInfos[i].type === 'remove') {
163        removes.push({ info: lineInfos[i], html: lineHtmls[i] });
164        i++;
165      }
166      while (i < lineInfos.length && lineInfos[i].type === 'add') {
167        adds.push({ info: lineInfos[i], html: lineHtmls[i] });
168        i++;
169      }
170  
171      const maxLen = Math.max(removes.length, adds.length);
172      for (let j = 0; j < maxLen; j++) {
173        rows.push({
174          left: j < removes.length ? removes[j] : null,
175          right: j < adds.length ? adds[j] : null,
176        });
177      }
178    }
179  
180    return rows;
181  }
182  
183  /**
184   * Parse a GitHub file patch (from pulls.listFiles) into a set of valid
185   * "line:side" keys. These represent lines that GitHub's API will accept
186   * for line-level review comments.
187   *
188   * This is needed because Gnosis expands diff context from 3 to 10 lines
189   * before sending to the AI, so the AI-generated hunks may include lines
190   * that aren't in the original GitHub diff.
191   */
192  export function parsePatchValidLines(patch: string): Set<string> {
193    const valid = new Set<string>();
194    let oldLine = 0;
195    let newLine = 0;
196  
197    for (const rawLine of patch.split('\n')) {
198      const hunkMatch = rawLine.match(/^@@ -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
199      if (hunkMatch) {
200        oldLine = parseInt(hunkMatch[1], 10);
201        newLine = parseInt(hunkMatch[2], 10);
202        continue;
203      }
204  
205      // Skip non-diff metadata lines
206      if (
207        rawLine.startsWith('\\') ||
208        rawLine.startsWith('diff ') ||
209        rawLine.startsWith('index ') ||
210        rawLine.startsWith('--- ') ||
211        rawLine.startsWith('+++ ')
212      ) {
213        continue;
214      }
215  
216      const prefix = rawLine[0];
217      if (prefix === '+') {
218        valid.add(`${newLine}:RIGHT`);
219        newLine++;
220      } else if (prefix === '-') {
221        valid.add(`${oldLine}:LEFT`);
222        oldLine++;
223      } else if (prefix === ' ') {
224        valid.add(`${newLine}:RIGHT`);
225        valid.add(`${oldLine}:LEFT`);
226        oldLine++;
227        newLine++;
228      }
229    }
230  
231    return valid;
232  }