/ lib / github.ts
github.ts
  1  import { Octokit } from '@octokit/rest';
  2  import { getProvider } from './provider';
  3  import type { ChangedFile, CiCheck, FileMetadata, PrMetadata, PrSearchResult, Provider, ReviewSummary } from './types';
  4  
  5  export function parsePrUrl(url: string): { owner: string; repo: string; pullNumber: number } {
  6    const match = url.match(/github\.com\/([^/]+)\/([^/]+)\/pulls?\/(\d+)/);
  7    if (!match) {
  8      throw new Error(`Invalid GitHub PR URL: ${url}`);
  9    }
 10    return {
 11      owner: match[1],
 12      repo: match[2],
 13      pullNumber: parseInt(match[3], 10),
 14    };
 15  }
 16  
 17  export async function getPrMetadata(
 18    octokit: Octokit,
 19    owner: string,
 20    repo: string,
 21    pullNumber: number
 22  ): Promise<PrMetadata> {
 23    const { data } = await octokit.pulls.get({ owner, repo, pull_number: pullNumber });
 24    return {
 25      title: data.title,
 26      description: data.body ?? '',
 27      // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- GitHub API can return null
 28      author: data.user?.login ?? 'unknown',
 29      baseBranch: data.base.ref,
 30      headBranch: data.head.ref,
 31      baseSha: data.base.sha,
 32      headSha: data.head.sha,
 33      // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- GitHub API can return null
 34      merged: data.merged ?? false,
 35      state: data.state,
 36      createdAt: data.created_at,
 37      updatedAt: data.updated_at,
 38      url: data.html_url,
 39      // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- GitHub API can return null
 40      labels: (data.labels ?? []).map((l) => (typeof l === 'string' ? l : (l.name ?? ''))).filter(Boolean),
 41      mergeable: data.mergeable ?? null,
 42      isDraft: data.draft ?? false,
 43      commitCount: data.commits,
 44      requestedReviewers: (data.requested_reviewers ?? []).map((u) => u.login),
 45      requestedTeams: (data.requested_teams ?? []).map((t) => t.name),
 46      // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- GitHub API can return null
 47      mergeableState: data.mergeable_state ?? null,
 48      autoMerge: data.auto_merge ? { method: data.auto_merge.merge_method } : null,
 49      milestone: data.milestone ? { title: data.milestone.title, dueOn: data.milestone.due_on } : null,
 50    };
 51  }
 52  
 53  export async function getPrDiff(octokit: Octokit, owner: string, repo: string, pullNumber: number): Promise<string> {
 54    const response = await octokit.request('GET /repos/{owner}/{repo}/pulls/{pull_number}', {
 55      owner,
 56      repo,
 57      pull_number: pullNumber,
 58      headers: {
 59        accept: 'application/vnd.github.v3.diff',
 60      },
 61    });
 62    return response.data as unknown as string;
 63  }
 64  
 65  export async function getChangedFiles(
 66    octokit: Octokit,
 67    owner: string,
 68    repo: string,
 69    pullNumber: number
 70  ): Promise<ChangedFile[]> {
 71    const files: ChangedFile[] = [];
 72    let page = 1;
 73    // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- pagination loop
 74    while (true) {
 75      const { data } = await octokit.pulls.listFiles({
 76        owner,
 77        repo,
 78        pull_number: pullNumber,
 79        per_page: 100,
 80        page,
 81      });
 82      for (const f of data) {
 83        files.push({
 84          filename: f.filename,
 85          status: f.status as ChangedFile['status'],
 86          additions: f.additions,
 87          deletions: f.deletions,
 88          previous_filename: f.previous_filename,
 89        });
 90      }
 91      if (data.length < 100) break;
 92      page++;
 93    }
 94    return files;
 95  }
 96  
 97  export async function getFileContent(
 98    octokit: Octokit,
 99    owner: string,
100    repo: string,
101    path: string,
102    ref: string
103  ): Promise<string | null> {
104    try {
105      const { data } = await octokit.repos.getContent({ owner, repo, path, ref });
106      if (Array.isArray(data) || data.type !== 'file') return null;
107      if ('content' in data && data.content) {
108        return Buffer.from(data.content, 'base64').toString('utf-8');
109      }
110      return null;
111    } catch {
112      return null;
113    }
114  }
115  
116  export async function searchPullRequests(octokit: Octokit, login: string, limit = 30): Promise<PrSearchResult[]> {
117    const queries = [
118      { q: `is:pr is:open author:${login}`, role: 'author' as const },
119      { q: `is:pr is:open review-requested:${login}`, role: 'review-requested' as const },
120    ];
121  
122    const results = await Promise.all(
123      queries.map(async ({ q, role }) => {
124        const { data } = await octokit.search.issuesAndPullRequests({
125          q,
126          sort: 'updated',
127          order: 'desc',
128          per_page: limit,
129        });
130        return data.items.map((item) => {
131          // repository_url looks like "https://api.github.com/repos/owner/name"
132          const repoParts = item.repository_url.split('/');
133          const repoName = repoParts.at(-1) ?? '';
134          const repoOwner = repoParts.at(-2) ?? '';
135          return {
136            number: item.number,
137            title: item.title,
138            url: item.html_url,
139            repoOwner,
140            repoName,
141            author: item.user?.login ?? 'unknown',
142            updatedAt: item.updated_at,
143            isDraft: item.draft ?? false,
144            role,
145          };
146        });
147      })
148    );
149  
150    // Deduplicate by URL (a PR can appear in both queries), prefer 'review-requested' role
151    const seen = new Map<string, PrSearchResult>();
152    for (const list of results) {
153      for (const pr of list) {
154        const existing = seen.get(pr.url);
155        if (!existing || (existing.role === 'author' && pr.role === 'review-requested')) {
156          seen.set(pr.url, pr);
157        }
158      }
159    }
160  
161    return Array.from(seen.values())
162      .sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime())
163      .slice(0, limit);
164  }
165  
166  export async function getCiStatus(
167    octokit: Octokit,
168    owner: string,
169    repo: string,
170    ref: string
171  ): Promise<{ checks: CiCheck[]; conclusion: 'success' | 'failure' | 'pending' | 'neutral' }> {
172    const { data } = await octokit.checks.listForRef({ owner, repo, ref, per_page: 100 });
173    const checks: CiCheck[] = data.check_runs.map((run) => ({
174      name: run.name,
175      status: run.status as CiCheck['status'],
176      conclusion: run.conclusion ?? null,
177    }));
178  
179    let conclusion: 'success' | 'failure' | 'pending' | 'neutral' = 'success';
180    if (checks.length === 0) {
181      conclusion = 'neutral';
182    } else if (
183      checks.some((c) => c.conclusion === 'failure' || c.conclusion === 'timed_out' || c.conclusion === 'cancelled')
184    ) {
185      conclusion = 'failure';
186    } else if (checks.some((c) => c.status === 'in_progress' || c.status === 'queued')) {
187      conclusion = 'pending';
188    } else if (
189      checks.every((c) => c.conclusion === 'success' || c.conclusion === 'skipped' || c.conclusion === 'neutral')
190    ) {
191      conclusion = 'success';
192    } else {
193      conclusion = 'neutral';
194    }
195  
196    return { checks, conclusion };
197  }
198  
199  export async function getReviewStatus(
200    octokit: Octokit,
201    owner: string,
202    repo: string,
203    pullNumber: number
204  ): Promise<ReviewSummary> {
205    const { data: reviews } = await octokit.pulls.listReviews({
206      owner,
207      repo,
208      pull_number: pullNumber,
209      per_page: 100,
210    });
211  
212    // Keep only the latest review per reviewer
213    const latestByUser = new Map<string, string>();
214    for (const r of reviews) {
215      const login = r.user?.login ?? 'unknown';
216      const state = r.state;
217      if (state === 'DISMISSED' || state === 'PENDING') continue;
218      latestByUser.set(login, state);
219    }
220  
221    let approved = 0;
222    let changesRequested = 0;
223    let commented = 0;
224    for (const state of latestByUser.values()) {
225      if (state === 'APPROVED') approved++;
226      else if (state === 'CHANGES_REQUESTED') changesRequested++;
227      else if (state === 'COMMENTED') commented++;
228    }
229  
230    return { approved, changesRequested, commented };
231  }
232  
233  function extractImports(content: string, filePath: string): string[] {
234    const imports: string[] = [];
235    const dir = filePath.split('/').slice(0, -1).join('/');
236  
237    // Match ES import statements
238    const importRegex = /import\s+(?:.*?\s+from\s+)?['"]([^'"]+)['"]/g;
239    let match;
240    while ((match = importRegex.exec(content)) !== null) {
241      const importPath = match[1];
242      if (importPath.startsWith('.')) {
243        // Relative import — resolve to a file path
244        const resolved = resolveRelativePath(dir, importPath);
245        if (resolved) imports.push(resolved);
246      }
247    }
248  
249    return imports;
250  }
251  
252  function resolveRelativePath(dir: string, importPath: string): string | null {
253    const parts = (dir ? dir + '/' + importPath : importPath).split('/');
254    const normalized: string[] = [];
255    for (const part of parts) {
256      if (part === '..') normalized.pop();
257      else if (part !== '.') normalized.push(part);
258    }
259    const base = normalized.join('/');
260    // Return without extension — caller will try common extensions
261    return base;
262  }
263  
264  const TS_EXTENSIONS = ['', '.ts', '.tsx', '.js', '.jsx', '/index.ts', '/index.tsx', '/index.js'];
265  
266  const SMART_IMPORTS_SYSTEM_PROMPT = `You are a code analysis tool. Given source files from a repository, identify all local/internal file imports. Return repo-relative file paths as a JSON array of strings. Nothing else.
267  
268  Rules:
269  - Only include imports that reference files within the same repository
270  - Skip standard library, external packages, and framework imports
271  - Resolve relative imports to repo-relative paths using each file's location
272  - For C# \`using\` statements, infer the likely file path from the namespace (use the file's own namespace declaration for context)
273  - Include file extensions (e.g., .cs, .rs, .py, .go, .ts)
274  - Return unique paths only`;
275  
276  async function extractImportsWithLLM(
277    changedFileContents: Record<string, string>,
278    changedFilePaths: string[],
279    providerName: Provider
280  ): Promise<string[]> {
281    const fileEntries = changedFilePaths
282      .filter((p) => changedFileContents[p])
283      .map((p) => `--- ${p} ---\n${changedFileContents[p]}`)
284      .join('\n\n');
285  
286    if (!fileEntries) return [];
287  
288    const provider = getProvider(providerName);
289    const quickEntry = provider.models.find((m) => m.quick) ?? provider.models[0];
290    const quickModel = quickEntry.id;
291  
292    try {
293      const result = await provider.quick({
294        content: fileEntries,
295        systemPrompt: SMART_IMPORTS_SYSTEM_PROMPT,
296        model: quickModel,
297      });
298  
299      // Extract JSON array from response
300      const start = result.indexOf('[');
301      const end = result.lastIndexOf(']');
302      if (start === -1 || end <= start) return [];
303  
304      const parsed: unknown = JSON.parse(result.slice(start, end + 1));
305      if (!Array.isArray(parsed)) return [];
306      return parsed.filter((p): p is string => typeof p === 'string');
307    } catch (err) {
308      console.warn('[github] Smart import extraction failed, returning empty:', err);
309      return [];
310    }
311  }
312  
313  export async function getNeighborFiles(
314    octokit: Octokit,
315    owner: string,
316    repo: string,
317    changedFilePaths: string[],
318    changedFileContents: Record<string, string>,
319    ref: string,
320    smartImportsProvider?: Provider
321  ): Promise<Record<string, string>> {
322    if (smartImportsProvider) {
323      console.log(`[github] Using smart (${smartImportsProvider}) import extraction`);
324      const importPaths = await extractImportsWithLLM(changedFileContents, changedFilePaths, smartImportsProvider);
325      console.log(`[github] ${smartImportsProvider} found ${importPaths.length} import(s):`, importPaths);
326  
327      // Filter out paths already in the changed set
328      const neighborPaths = importPaths.filter((p) => !changedFilePaths.includes(p));
329      console.log(`[github] ${neighborPaths.length} neighbor file(s) to fetch (after excluding changed files)`);
330      const pathsToFetch = neighborPaths.slice(0, 30);
331  
332      const results: Record<string, string> = {};
333      const concurrency = 5;
334      for (let i = 0; i < pathsToFetch.length; i += concurrency) {
335        const batch = pathsToFetch.slice(i, i + concurrency);
336        await Promise.all(
337          batch.map(async (filePath) => {
338            const content = await getFileContent(octokit, owner, repo, filePath, ref);
339            if (content !== null) {
340              results[filePath] = content;
341            }
342          })
343        );
344      }
345      console.log(`[github] Fetched ${Object.keys(results).length} neighbor file(s):`, Object.keys(results));
346      return results;
347    }
348  
349    // Default: existing regex-based extraction
350    const neighborPaths = new Set<string>();
351  
352    for (const filePath of changedFilePaths) {
353      const content = changedFileContents[filePath];
354      if (!content) continue;
355  
356      const imports = extractImports(content, filePath);
357      for (const imp of imports) {
358        const alreadyChanged = changedFilePaths.some((p) => p === imp || TS_EXTENSIONS.some((ext) => p === imp + ext));
359        if (!alreadyChanged) {
360          neighborPaths.add(imp);
361        }
362      }
363    }
364  
365    const results: Record<string, string> = {};
366    const pathsToFetch = Array.from(neighborPaths).slice(0, 30);
367  
368    const concurrency = 5;
369    for (let i = 0; i < pathsToFetch.length; i += concurrency) {
370      const batch = pathsToFetch.slice(i, i + concurrency);
371      await Promise.all(
372        batch.map(async (basePath) => {
373          for (const ext of TS_EXTENSIONS) {
374            const fullPath = basePath + ext;
375            const content = await getFileContent(octokit, owner, repo, fullPath, ref);
376            if (content !== null) {
377              results[fullPath] = content;
378              break;
379            }
380          }
381        })
382      );
383    }
384  
385    return results;
386  }
387  
388  // ── File metadata (age + churn) ───────────────────────────────
389  
390  // Fetch the last commit date for a file BEFORE the PR's base ref.
391  // Returns an ISO date string or null if the file is new.
392  async function getFileLastModified(
393    octokit: Octokit,
394    owner: string,
395    repo: string,
396    filePath: string,
397    baseSha: string
398  ): Promise<string | null> {
399    try {
400      const { data } = await octokit.repos.listCommits({
401        owner,
402        repo,
403        path: filePath,
404        sha: baseSha,
405        per_page: 1,
406      });
407      return data[0]?.commit?.committer?.date ?? null;
408    } catch {
409      return null;
410    }
411  }
412  
413  // Count how many commits in the PR touch each file. >1 = churn
414  // (the file was revised across multiple commits, indicating
415  // iteration or complexity).
416  async function getPrFileChurn(
417    octokit: Octokit,
418    owner: string,
419    repo: string,
420    pullNumber: number
421  ): Promise<Map<string, number>> {
422    const churn = new Map<string, number>();
423    try {
424      const commits = await octokit.paginate(octokit.pulls.listCommits, {
425        owner,
426        repo,
427        pull_number: pullNumber,
428        per_page: 100,
429      });
430      // For each commit, fetch its files and increment the counter.
431      // Limit to first 30 commits to avoid API rate limit issues on
432      // very large PRs.
433      const toFetch = commits.slice(0, 30);
434      for (const commit of toFetch) {
435        try {
436          const { data } = await octokit.repos.getCommit({
437            owner,
438            repo,
439            ref: commit.sha,
440          });
441          for (const f of data.files ?? []) {
442            churn.set(f.filename, (churn.get(f.filename) ?? 0) + 1);
443          }
444        } catch {
445          // Individual commit fetch failed — skip silently
446        }
447      }
448    } catch {
449      // Commit listing failed — return empty map
450    }
451    return churn;
452  }
453  
454  // Build FileMetadata[] for all changed files in a PR. Fetches file
455  // age (last modified before the PR) and churn (commit count within
456  // the PR) in parallel. Designed to be called during review
457  // generation and stored in ReviewGuide.changedFiles.
458  export async function getFileMetadata(
459    octokit: Octokit,
460    owner: string,
461    repo: string,
462    pullNumber: number,
463    baseSha: string,
464    changedFiles: ChangedFile[]
465  ): Promise<FileMetadata[]> {
466    // Fetch churn data once for the whole PR
467    const churnMap = await getPrFileChurn(octokit, owner, repo, pullNumber);
468  
469    // Fetch last-modified dates in parallel, batched 10 at a time to
470    // stay well under GitHub's rate limit.
471    const metadata: FileMetadata[] = changedFiles.map((f) => ({
472      filename: f.filename,
473      status: f.status,
474      additions: f.additions,
475      deletions: f.deletions,
476      prCommitCount: churnMap.get(f.filename) ?? 1,
477    }));
478  
479    const BATCH_SIZE = 10;
480    for (let i = 0; i < metadata.length; i += BATCH_SIZE) {
481      const batch = metadata.slice(i, i + BATCH_SIZE);
482      await Promise.all(
483        batch.map(async (fm) => {
484          if (fm.status === 'added') {
485            fm.lastModified = null;
486          } else {
487            fm.lastModified = await getFileLastModified(octokit, owner, repo, fm.filename, baseSha);
488          }
489        })
490      );
491    }
492  
493    return metadata;
494  }