/ src / utils / git.ts
git.ts
  1  import { createHash } from 'crypto'
  2  import { readFileSync, realpathSync, statSync } from 'fs'
  3  import { open, readFile, realpath, stat } from 'fs/promises'
  4  import memoize from 'lodash-es/memoize.js'
  5  import { basename, dirname, join, resolve, sep } from 'path'
  6  import { hasBinaryExtension, isBinaryContent } from '../constants/files.js'
  7  import { getCwd } from './cwd.js'
  8  import { logForDebugging } from './debug.js'
  9  import { logForDiagnosticsNoPII } from './diagLogs.js'
 10  import { execFileNoThrow } from './execFileNoThrow.js'
 11  import { getFsImplementation } from './fsOperations.js'
 12  import {
 13    getCachedBranch,
 14    getCachedDefaultBranch,
 15    getCachedHead,
 16    getCachedRemoteUrl,
 17    getWorktreeCountFromFs,
 18    isShallowClone as isShallowCloneFs,
 19    resolveGitDir,
 20  } from './git/gitFilesystem.js'
 21  import { logError } from './log.js'
 22  import { memoizeWithLRU } from './memoize.js'
 23  import { whichSync } from './which.js'
 24  
 25  const GIT_ROOT_NOT_FOUND = Symbol('git-root-not-found')
 26  
 27  const findGitRootImpl = memoizeWithLRU(
 28    (startPath: string): string | typeof GIT_ROOT_NOT_FOUND => {
 29      const startTime = Date.now()
 30      logForDiagnosticsNoPII('info', 'find_git_root_started')
 31  
 32      let current = resolve(startPath)
 33      const root = current.substring(0, current.indexOf(sep) + 1) || sep
 34      let statCount = 0
 35  
 36      while (current !== root) {
 37        try {
 38          const gitPath = join(current, '.git')
 39          statCount++
 40          const stat = statSync(gitPath)
 41          // .git can be a directory (regular repo) or file (worktree/submodule)
 42          if (stat.isDirectory() || stat.isFile()) {
 43            logForDiagnosticsNoPII('info', 'find_git_root_completed', {
 44              duration_ms: Date.now() - startTime,
 45              stat_count: statCount,
 46              found: true,
 47            })
 48            return current.normalize('NFC')
 49          }
 50        } catch {
 51          // .git doesn't exist at this level, continue up
 52        }
 53        const parent = dirname(current)
 54        if (parent === current) {
 55          break
 56        }
 57        current = parent
 58      }
 59  
 60      // Check root directory as well
 61      try {
 62        const gitPath = join(root, '.git')
 63        statCount++
 64        const stat = statSync(gitPath)
 65        if (stat.isDirectory() || stat.isFile()) {
 66          logForDiagnosticsNoPII('info', 'find_git_root_completed', {
 67            duration_ms: Date.now() - startTime,
 68            stat_count: statCount,
 69            found: true,
 70          })
 71          return root.normalize('NFC')
 72        }
 73      } catch {
 74        // .git doesn't exist at root
 75      }
 76  
 77      logForDiagnosticsNoPII('info', 'find_git_root_completed', {
 78        duration_ms: Date.now() - startTime,
 79        stat_count: statCount,
 80        found: false,
 81      })
 82      return GIT_ROOT_NOT_FOUND
 83    },
 84    path => path,
 85    50,
 86  )
 87  
 88  /**
 89   * Find the git root by walking up the directory tree.
 90   * Looks for a .git directory or file (worktrees/submodules use a file).
 91   * Returns the directory containing .git, or null if not found.
 92   *
 93   * Memoized per startPath with an LRU cache (max 50 entries) to prevent
 94   * unbounded growth — gitDiff calls this with dirname(file), so editing many
 95   * files across different directories would otherwise accumulate entries forever.
 96   */
 97  export const findGitRoot = createFindGitRoot()
 98  
 99  function createFindGitRoot(): {
100    (startPath: string): string | null
101    cache: typeof findGitRootImpl.cache
102  } {
103    function wrapper(startPath: string): string | null {
104      const result = findGitRootImpl(startPath)
105      return result === GIT_ROOT_NOT_FOUND ? null : result
106    }
107    wrapper.cache = findGitRootImpl.cache
108    return wrapper
109  }
110  
111  /**
112   * Resolve a git root to the canonical main repository root.
113   * For a regular repo this is a no-op. For a worktree, follows the
114   * `.git` file → `gitdir:` → `commondir` chain to find the main repo's
115   * working directory.
116   *
117   * Submodules (`.git` is a file but no `commondir`) fall through to the
118   * input root, which is correct since submodules are separate repos.
119   *
120   * Memoized with a small LRU to avoid repeated file reads on the hot
121   * path (permission checks, prompt building).
122   */
123  const resolveCanonicalRoot = memoizeWithLRU(
124    (gitRoot: string): string => {
125      try {
126        // In a worktree, .git is a file containing: gitdir: <path>
127        // In a regular repo, .git is a directory (readFileSync throws EISDIR).
128        const gitContent = readFileSync(join(gitRoot, '.git'), 'utf-8').trim()
129        if (!gitContent.startsWith('gitdir:')) {
130          return gitRoot
131        }
132        const worktreeGitDir = resolve(
133          gitRoot,
134          gitContent.slice('gitdir:'.length).trim(),
135        )
136        // commondir points to the shared .git directory (relative to worktree gitdir).
137        // Submodules have no commondir (readFileSync throws ENOENT) → fall through.
138        const commonDir = resolve(
139          worktreeGitDir,
140          readFileSync(join(worktreeGitDir, 'commondir'), 'utf-8').trim(),
141        )
142        // SECURITY: The .git file and commondir are attacker-controlled in a
143        // cloned/downloaded repo. Without validation, a malicious repo can point
144        // commondir at any path the victim has trusted, bypassing the trust
145        // dialog and executing hooks from .claude/settings.json on startup.
146        //
147        // Validate the structure matches what `git worktree add` creates:
148        //   1. worktreeGitDir is a direct child of <commonDir>/worktrees/
149        //      → ensures the commondir file we read lives inside the resolved
150        //        common dir, not inside the attacker's repo
151        //   2. <worktreeGitDir>/gitdir points back to <gitRoot>/.git
152        //      → ensures an attacker can't borrow a victim's existing worktree
153        //        entry by guessing its path
154        // Both are required: (1) alone fails if victim has a worktree of the
155        // trusted repo; (2) alone fails because attacker controls worktreeGitDir.
156        if (resolve(dirname(worktreeGitDir)) !== join(commonDir, 'worktrees')) {
157          return gitRoot
158        }
159        // Git writes gitdir with strbuf_realpath() (symlinks resolved), but
160        // gitRoot from findGitRoot() is only lexically resolved. Realpath gitRoot
161        // so legitimate worktrees accessed via a symlinked path (e.g. macOS
162        // /tmp → /private/tmp) aren't rejected. Realpath the directory then join
163        // '.git' — realpathing the .git file itself would follow a symlinked .git
164        // and let an attacker borrow a victim's back-link.
165        const backlink = realpathSync(
166          readFileSync(join(worktreeGitDir, 'gitdir'), 'utf-8').trim(),
167        )
168        if (backlink !== join(realpathSync(gitRoot), '.git')) {
169          return gitRoot
170        }
171        // Bare-repo worktrees: the common dir isn't inside a working directory.
172        // Use the common dir itself as the stable identity (anthropics/claude-code#27994).
173        if (basename(commonDir) !== '.git') {
174          return commonDir.normalize('NFC')
175        }
176        return dirname(commonDir).normalize('NFC')
177      } catch {
178        return gitRoot
179      }
180    },
181    root => root,
182    50,
183  )
184  
185  /**
186   * Find the canonical git repository root, resolving through worktrees.
187   *
188   * Unlike findGitRoot, which returns the worktree directory (where the `.git`
189   * file lives), this returns the main repository's working directory. This
190   * ensures all worktrees of the same repo map to the same project identity.
191   *
192   * Use this instead of findGitRoot for project-scoped state (auto-memory,
193   * project config, agent memory) so worktrees share state with the main repo.
194   */
195  export const findCanonicalGitRoot = createFindCanonicalGitRoot()
196  
197  function createFindCanonicalGitRoot(): {
198    (startPath: string): string | null
199    cache: typeof resolveCanonicalRoot.cache
200  } {
201    function wrapper(startPath: string): string | null {
202      const root = findGitRoot(startPath)
203      if (!root) {
204        return null
205      }
206      return resolveCanonicalRoot(root)
207    }
208    wrapper.cache = resolveCanonicalRoot.cache
209    return wrapper
210  }
211  
212  export const gitExe = memoize((): string => {
213    // Every time we spawn a process, we have to lookup the path.
214    // Let's instead avoid that lookup so we only do it once.
215    return whichSync('git') || 'git'
216  })
217  
218  export const getIsGit = memoize(async (): Promise<boolean> => {
219    const startTime = Date.now()
220    logForDiagnosticsNoPII('info', 'is_git_check_started')
221  
222    const isGit = findGitRoot(getCwd()) !== null
223  
224    logForDiagnosticsNoPII('info', 'is_git_check_completed', {
225      duration_ms: Date.now() - startTime,
226      is_git: isGit,
227    })
228    return isGit
229  })
230  
231  export function getGitDir(cwd: string): Promise<string | null> {
232    return resolveGitDir(cwd)
233  }
234  
235  export async function isAtGitRoot(): Promise<boolean> {
236    const cwd = getCwd()
237    const gitRoot = findGitRoot(cwd)
238    if (!gitRoot) {
239      return false
240    }
241    // Resolve symlinks for accurate comparison
242    try {
243      const [resolvedCwd, resolvedGitRoot] = await Promise.all([
244        realpath(cwd),
245        realpath(gitRoot),
246      ])
247      return resolvedCwd === resolvedGitRoot
248    } catch {
249      return cwd === gitRoot
250    }
251  }
252  
253  export const dirIsInGitRepo = async (cwd: string): Promise<boolean> => {
254    return findGitRoot(cwd) !== null
255  }
256  
257  export const getHead = async (): Promise<string> => {
258    return getCachedHead()
259  }
260  
261  export const getBranch = async (): Promise<string> => {
262    return getCachedBranch()
263  }
264  
265  export const getDefaultBranch = async (): Promise<string> => {
266    return getCachedDefaultBranch()
267  }
268  
269  export const getRemoteUrl = async (): Promise<string | null> => {
270    return getCachedRemoteUrl()
271  }
272  
273  /**
274   * Normalizes a git remote URL to a canonical form for hashing.
275   * Converts SSH and HTTPS URLs to the same format: host/owner/repo (lowercase, no .git)
276   *
277   * Examples:
278   * - git@github.com:owner/repo.git -> github.com/owner/repo
279   * - https://github.com/owner/repo.git -> github.com/owner/repo
280   * - ssh://git@github.com/owner/repo -> github.com/owner/repo
281   * - http://local_proxy@127.0.0.1:16583/git/owner/repo -> github.com/owner/repo
282   */
283  export function normalizeGitRemoteUrl(url: string): string | null {
284    const trimmed = url.trim()
285    if (!trimmed) return null
286  
287    // Handle SSH format: git@host:owner/repo.git
288    const sshMatch = trimmed.match(/^git@([^:]+):(.+?)(?:\.git)?$/)
289    if (sshMatch && sshMatch[1] && sshMatch[2]) {
290      return `${sshMatch[1]}/${sshMatch[2]}`.toLowerCase()
291    }
292  
293    // Handle HTTPS/SSH URL format: https://host/owner/repo.git or ssh://git@host/owner/repo
294    const urlMatch = trimmed.match(
295      /^(?:https?|ssh):\/\/(?:[^@]+@)?([^/]+)\/(.+?)(?:\.git)?$/,
296    )
297    if (urlMatch && urlMatch[1] && urlMatch[2]) {
298      const host = urlMatch[1]
299      const path = urlMatch[2]
300  
301      // CCR git proxy URLs use format:
302      //   Legacy:  http://...@127.0.0.1:PORT/git/owner/repo       (github.com assumed)
303      //   GHE:     http://...@127.0.0.1:PORT/git/ghe.host/owner/repo (host encoded in path)
304      // Strip the /git/ prefix. If the first segment contains a dot, it's a
305      // hostname (GitHub org names cannot contain dots). Otherwise assume github.com.
306      if (isLocalHost(host) && path.startsWith('git/')) {
307        const proxyPath = path.slice(4) // Remove "git/" prefix
308        const segments = proxyPath.split('/')
309        // 3+ segments where first contains a dot → host/owner/repo (GHE format)
310        if (segments.length >= 3 && segments[0]!.includes('.')) {
311          return proxyPath.toLowerCase()
312        }
313        // 2 segments → owner/repo (legacy format, assume github.com)
314        return `github.com/${proxyPath}`.toLowerCase()
315      }
316  
317      return `${host}/${path}`.toLowerCase()
318    }
319  
320    return null
321  }
322  
323  /**
324   * Returns a SHA256 hash (first 16 chars) of the normalized git remote URL.
325   * This provides a globally unique identifier for the repository that:
326   * - Is the same regardless of SSH vs HTTPS clone
327   * - Does not expose the actual repository name in logs
328   */
329  export async function getRepoRemoteHash(): Promise<string | null> {
330    const remoteUrl = await getRemoteUrl()
331    if (!remoteUrl) return null
332  
333    const normalized = normalizeGitRemoteUrl(remoteUrl)
334    if (!normalized) return null
335  
336    const hash = createHash('sha256').update(normalized).digest('hex')
337    return hash.substring(0, 16)
338  }
339  
340  export const getIsHeadOnRemote = async (): Promise<boolean> => {
341    const { code } = await execFileNoThrow(gitExe(), ['rev-parse', '@{u}'], {
342      preserveOutputOnError: false,
343    })
344    return code === 0
345  }
346  
347  export const hasUnpushedCommits = async (): Promise<boolean> => {
348    const { stdout, code } = await execFileNoThrow(
349      gitExe(),
350      ['rev-list', '--count', '@{u}..HEAD'],
351      { preserveOutputOnError: false },
352    )
353    return code === 0 && parseInt(stdout.trim(), 10) > 0
354  }
355  
356  export const getIsClean = async (options?: {
357    ignoreUntracked?: boolean
358  }): Promise<boolean> => {
359    const args = ['--no-optional-locks', 'status', '--porcelain']
360    if (options?.ignoreUntracked) {
361      args.push('-uno')
362    }
363    const { stdout } = await execFileNoThrow(gitExe(), args, {
364      preserveOutputOnError: false,
365    })
366    return stdout.trim().length === 0
367  }
368  
369  export const getChangedFiles = async (): Promise<string[]> => {
370    const { stdout } = await execFileNoThrow(
371      gitExe(),
372      ['--no-optional-locks', 'status', '--porcelain'],
373      {
374        preserveOutputOnError: false,
375      },
376    )
377    return stdout
378      .trim()
379      .split('\n')
380      .map(line => line.trim().split(' ', 2)[1]?.trim()) // Remove status prefix (e.g., "M ", "A ", "??")
381      .filter(line => typeof line === 'string') // Remove empty entries
382  }
383  
384  export type GitFileStatus = {
385    tracked: string[]
386    untracked: string[]
387  }
388  
389  export const getFileStatus = async (): Promise<GitFileStatus> => {
390    const { stdout } = await execFileNoThrow(
391      gitExe(),
392      ['--no-optional-locks', 'status', '--porcelain'],
393      {
394        preserveOutputOnError: false,
395      },
396    )
397  
398    const tracked: string[] = []
399    const untracked: string[] = []
400  
401    stdout
402      .trim()
403      .split('\n')
404      .filter(line => line.length > 0)
405      .forEach(line => {
406        const status = line.substring(0, 2)
407        const filename = line.substring(2).trim()
408  
409        if (status === '??') {
410          untracked.push(filename)
411        } else if (filename) {
412          tracked.push(filename)
413        }
414      })
415  
416    return { tracked, untracked }
417  }
418  
419  export const getWorktreeCount = async (): Promise<number> => {
420    return getWorktreeCountFromFs()
421  }
422  
423  /**
424   * Stashes all changes (including untracked files) to return git to a clean porcelain state
425   * Important: This function stages untracked files before stashing to prevent data loss
426   * @param message - Optional custom message for the stash
427   * @returns Promise<boolean> - true if stash was successful, false otherwise
428   */
429  export const stashToCleanState = async (message?: string): Promise<boolean> => {
430    try {
431      const stashMessage =
432        message || `Claude Code auto-stash - ${new Date().toISOString()}`
433  
434      // First, check if we have untracked files
435      const { untracked } = await getFileStatus()
436  
437      // If we have untracked files, add them to the index first
438      // This prevents them from being deleted
439      if (untracked.length > 0) {
440        const { code: addCode } = await execFileNoThrow(
441          gitExe(),
442          ['add', ...untracked],
443          { preserveOutputOnError: false },
444        )
445  
446        if (addCode !== 0) {
447          return false
448        }
449      }
450  
451      // Now stash everything (staged and unstaged changes)
452      const { code } = await execFileNoThrow(
453        gitExe(),
454        ['stash', 'push', '--message', stashMessage],
455        { preserveOutputOnError: false },
456      )
457      return code === 0
458    } catch (_) {
459      return false
460    }
461  }
462  
463  export type GitRepoState = {
464    commitHash: string
465    branchName: string
466    remoteUrl: string | null
467    isHeadOnRemote: boolean
468    isClean: boolean
469    worktreeCount: number
470  }
471  
472  export async function getGitState(): Promise<GitRepoState | null> {
473    try {
474      const [
475        commitHash,
476        branchName,
477        remoteUrl,
478        isHeadOnRemote,
479        isClean,
480        worktreeCount,
481      ] = await Promise.all([
482        getHead(),
483        getBranch(),
484        getRemoteUrl(),
485        getIsHeadOnRemote(),
486        getIsClean(),
487        getWorktreeCount(),
488      ])
489  
490      return {
491        commitHash,
492        branchName,
493        remoteUrl,
494        isHeadOnRemote,
495        isClean,
496        worktreeCount,
497      }
498    } catch (_) {
499      // Fail silently - git state is best effort
500      return null
501    }
502  }
503  
504  export async function getGithubRepo(): Promise<string | null> {
505    const { parseGitRemote } = await import('./detectRepository.js')
506    const remoteUrl = await getRemoteUrl()
507    if (!remoteUrl) {
508      logForDebugging('Local GitHub repo: unknown')
509      return null
510    }
511    // Only return results for github.com — callers (e.g. issue submission)
512    // assume the result is a github.com repository.
513    const parsed = parseGitRemote(remoteUrl)
514    if (parsed && parsed.host === 'github.com') {
515      const result = `${parsed.owner}/${parsed.name}`
516      logForDebugging(`Local GitHub repo: ${result}`)
517      return result
518    }
519    logForDebugging('Local GitHub repo: unknown')
520    return null
521  }
522  
523  /**
524   * Preserved git state for issue submission.
525   * Uses remote base (e.g., origin/main) which is rarely force-pushed,
526   * unlike local commits that can be GC'd after force push.
527   */
528  export type PreservedGitState = {
529    /** The SHA of the merge-base with the remote branch */
530    remote_base_sha: string | null
531    /** The remote branch used (e.g., "origin/main") */
532    remote_base: string | null
533    /** Patch from merge-base to current state (includes uncommitted changes) */
534    patch: string
535    /** Untracked files with their contents */
536    untracked_files: Array<{ path: string; content: string }>
537    /** git format-patch output for committed changes between merge-base and HEAD.
538     *  Used to reconstruct the actual commit chain (author, date, message) in
539     *  replay containers. null when there are no commits between merge-base and HEAD. */
540    format_patch: string | null
541    /** The current HEAD SHA (tip of the feature branch) */
542    head_sha: string | null
543    /** The current branch name (e.g., "feat/my-feature") */
544    branch_name: string | null
545  }
546  
547  // Size limits for untracked file capture
548  const MAX_FILE_SIZE_BYTES = 500 * 1024 * 1024 // 500MB per file
549  const MAX_TOTAL_SIZE_BYTES = 5 * 1024 * 1024 * 1024 // 5GB total
550  const MAX_FILE_COUNT = 20000
551  
552  // Initial read buffer for binary detection + content reuse. 64KB covers
553  // most source files in a single read; isBinaryContent() internally scans
554  // only its first 8KB for the binary heuristic, so the extra bytes are
555  // purely for avoiding a second read when the file turns out to be text.
556  const SNIFF_BUFFER_SIZE = 64 * 1024
557  
558  /**
559   * Find the best remote branch to use as a base.
560   * Priority: tracking branch > origin/main > origin/staging > origin/master
561   */
562  export async function findRemoteBase(): Promise<string | null> {
563    // First try: get the tracking branch for the current branch
564    const { stdout: trackingBranch, code: trackingCode } = await execFileNoThrow(
565      gitExe(),
566      ['rev-parse', '--abbrev-ref', '--symbolic-full-name', '@{u}'],
567      { preserveOutputOnError: false },
568    )
569  
570    if (trackingCode === 0 && trackingBranch.trim()) {
571      return trackingBranch.trim()
572    }
573  
574    // Second try: check for common default branch names on origin
575    const { stdout: remoteRefs, code: remoteCode } = await execFileNoThrow(
576      gitExe(),
577      ['remote', 'show', 'origin', '--', 'HEAD'],
578      { preserveOutputOnError: false },
579    )
580  
581    if (remoteCode === 0) {
582      // Parse the default branch from remote show output
583      const match = remoteRefs.match(/HEAD branch: (\S+)/)
584      if (match && match[1]) {
585        return `origin/${match[1]}`
586      }
587    }
588  
589    // Third try: check which common branches exist
590    const candidates = ['origin/main', 'origin/staging', 'origin/master']
591    for (const candidate of candidates) {
592      const { code } = await execFileNoThrow(
593        gitExe(),
594        ['rev-parse', '--verify', candidate],
595        { preserveOutputOnError: false },
596      )
597      if (code === 0) {
598        return candidate
599      }
600    }
601  
602    return null
603  }
604  
605  /**
606   * Check if we're in a shallow clone by looking for <gitDir>/shallow.
607   */
608  function isShallowClone(): Promise<boolean> {
609    return isShallowCloneFs()
610  }
611  
612  /**
613   * Capture untracked files (git diff doesn't include them).
614   * Respects size limits and skips binary files.
615   */
616  async function captureUntrackedFiles(): Promise<
617    Array<{ path: string; content: string }>
618  > {
619    const { stdout, code } = await execFileNoThrow(
620      gitExe(),
621      ['ls-files', '--others', '--exclude-standard'],
622      { preserveOutputOnError: false },
623    )
624  
625    const trimmed = stdout.trim()
626    if (code !== 0 || !trimmed) {
627      return []
628    }
629  
630    const files = trimmed.split('\n').filter(Boolean)
631    const result: Array<{ path: string; content: string }> = []
632    let totalSize = 0
633  
634    for (const filePath of files) {
635      // Check file count limit
636      if (result.length >= MAX_FILE_COUNT) {
637        logForDebugging(
638          `Untracked file capture: reached max file count (${MAX_FILE_COUNT})`,
639        )
640        break
641      }
642  
643      // Skip binary files by extension - zero I/O
644      if (hasBinaryExtension(filePath)) {
645        continue
646      }
647  
648      try {
649        const stats = await stat(filePath)
650        const fileSize = stats.size
651  
652        // Skip files exceeding per-file limit
653        if (fileSize > MAX_FILE_SIZE_BYTES) {
654          logForDebugging(
655            `Untracked file capture: skipping ${filePath} (exceeds ${MAX_FILE_SIZE_BYTES} bytes)`,
656          )
657          continue
658        }
659  
660        // Check total size limit
661        if (totalSize + fileSize > MAX_TOTAL_SIZE_BYTES) {
662          logForDebugging(
663            `Untracked file capture: reached total size limit (${MAX_TOTAL_SIZE_BYTES} bytes)`,
664          )
665          break
666        }
667  
668        // Empty file - no need to open
669        if (fileSize === 0) {
670          result.push({ path: filePath, content: '' })
671          continue
672        }
673  
674        // Binary sniff on up to SNIFF_BUFFER_SIZE bytes. Caps binary-file reads
675        // at SNIFF_BUFFER_SIZE even though MAX_FILE_SIZE_BYTES allows up to 500MB.
676        // If the file fits in the sniff buffer we reuse it as the content; for
677        // larger text files we fall back to readFile with encoding so the runtime
678        // decodes to a string without materializing a full-size Buffer in JS.
679        const sniffSize = Math.min(SNIFF_BUFFER_SIZE, fileSize)
680        const fd = await open(filePath, 'r')
681        try {
682          const sniffBuf = Buffer.alloc(sniffSize)
683          const { bytesRead } = await fd.read(sniffBuf, 0, sniffSize, 0)
684          const sniff = sniffBuf.subarray(0, bytesRead)
685  
686          if (isBinaryContent(sniff)) {
687            continue
688          }
689  
690          let content: string
691          if (fileSize <= sniffSize) {
692            // Sniff already covers the whole file
693            content = sniff.toString('utf-8')
694          } else {
695            // readFile with encoding decodes to string directly, avoiding a
696            // full-size Buffer living alongside the decoded string. The extra
697            // open/close is cheaper than doubling peak memory for large files.
698            content = await readFile(filePath, 'utf-8')
699          }
700  
701          result.push({ path: filePath, content })
702          totalSize += fileSize
703        } finally {
704          await fd.close()
705        }
706      } catch (err) {
707        // Skip files we can't read
708        logForDebugging(`Failed to read untracked file ${filePath}: ${err}`)
709      }
710    }
711  
712    return result
713  }
714  
715  /**
716   * Preserve git state for issue submission.
717   * Uses remote base for more stable replay capability.
718   *
719   * Edge cases handled:
720   * - Detached HEAD: falls back to merge-base with default branch directly
721   * - No remote: returns null for remote fields, uses HEAD-only mode
722   * - Shallow clone: falls back to HEAD-only mode
723   */
724  export async function preserveGitStateForIssue(): Promise<PreservedGitState | null> {
725    try {
726      const isGit = await getIsGit()
727      if (!isGit) {
728        return null
729      }
730  
731      // Check for shallow clone - fall back to simpler mode
732      if (await isShallowClone()) {
733        logForDebugging('Shallow clone detected, using HEAD-only mode for issue')
734        const [{ stdout: patch }, untrackedFiles] = await Promise.all([
735          execFileNoThrow(gitExe(), ['diff', 'HEAD']),
736          captureUntrackedFiles(),
737        ])
738        return {
739          remote_base_sha: null,
740          remote_base: null,
741          patch: patch || '',
742          untracked_files: untrackedFiles,
743          format_patch: null,
744          head_sha: null,
745          branch_name: null,
746        }
747      }
748  
749      // Find the best remote base
750      const remoteBase = await findRemoteBase()
751  
752      if (!remoteBase) {
753        // No remote found - use HEAD-only mode
754        logForDebugging('No remote found, using HEAD-only mode for issue')
755        const [{ stdout: patch }, untrackedFiles] = await Promise.all([
756          execFileNoThrow(gitExe(), ['diff', 'HEAD']),
757          captureUntrackedFiles(),
758        ])
759        return {
760          remote_base_sha: null,
761          remote_base: null,
762          patch: patch || '',
763          untracked_files: untrackedFiles,
764          format_patch: null,
765          head_sha: null,
766          branch_name: null,
767        }
768      }
769  
770      // Get the merge-base with remote
771      const { stdout: mergeBase, code: mergeBaseCode } = await execFileNoThrow(
772        gitExe(),
773        ['merge-base', 'HEAD', remoteBase],
774        { preserveOutputOnError: false },
775      )
776  
777      if (mergeBaseCode !== 0 || !mergeBase.trim()) {
778        // Merge-base failed - fall back to HEAD-only
779        logForDebugging('Merge-base failed, using HEAD-only mode for issue')
780        const [{ stdout: patch }, untrackedFiles] = await Promise.all([
781          execFileNoThrow(gitExe(), ['diff', 'HEAD']),
782          captureUntrackedFiles(),
783        ])
784        return {
785          remote_base_sha: null,
786          remote_base: null,
787          patch: patch || '',
788          untracked_files: untrackedFiles,
789          format_patch: null,
790          head_sha: null,
791          branch_name: null,
792        }
793      }
794  
795      const remoteBaseSha = mergeBase.trim()
796  
797      // All 5 commands below depend only on remoteBaseSha — run them in parallel.
798      // ~5×90ms serial → ~90ms parallel on Bun native (used by /issue and /share).
799      const [
800        { stdout: patch },
801        untrackedFiles,
802        { stdout: formatPatchOut, code: formatPatchCode },
803        { stdout: headSha },
804        { stdout: branchName },
805      ] = await Promise.all([
806        // Patch from merge-base to current state (including staged changes)
807        execFileNoThrow(gitExe(), ['diff', remoteBaseSha]),
808        // Untracked files captured separately
809        captureUntrackedFiles(),
810        // format-patch for committed changes between merge-base and HEAD.
811        // Preserves the actual commit chain (author, date, message) so replay
812        // containers can reconstruct the branch with real commits instead of a
813        // squashed diff. Uses --stdout to emit all patches as a single text stream.
814        execFileNoThrow(gitExe(), [
815          'format-patch',
816          `${remoteBaseSha}..HEAD`,
817          '--stdout',
818        ]),
819        // HEAD SHA for replay
820        execFileNoThrow(gitExe(), ['rev-parse', 'HEAD']),
821        // Branch name for replay
822        execFileNoThrow(gitExe(), ['rev-parse', '--abbrev-ref', 'HEAD']),
823      ])
824  
825      let formatPatch: string | null = null
826      if (formatPatchCode === 0 && formatPatchOut && formatPatchOut.trim()) {
827        formatPatch = formatPatchOut
828      }
829  
830      const trimmedBranch = branchName?.trim()
831      return {
832        remote_base_sha: remoteBaseSha,
833        remote_base: remoteBase,
834        patch: patch || '',
835        untracked_files: untrackedFiles,
836        format_patch: formatPatch,
837        head_sha: headSha?.trim() || null,
838        branch_name:
839          trimmedBranch && trimmedBranch !== 'HEAD' ? trimmedBranch : null,
840      }
841    } catch (err) {
842      logError(err)
843      return null
844    }
845  }
846  
847  function isLocalHost(host: string): boolean {
848    const hostWithoutPort = host.split(':')[0] ?? ''
849    return (
850      hostWithoutPort === 'localhost' ||
851      /^127\.\d{1,3}\.\d{1,3}\.\d{1,3}$/.test(hostWithoutPort)
852    )
853  }
854  
855  /**
856   * Checks if the current working directory appears to be a bare git repository
857   * or has been manipulated to look like one (sandbox escape attack vector).
858   *
859   * SECURITY: Git's is_git_directory() function (setup.c:417-455) checks for:
860   * 1. HEAD file - Must be a valid ref
861   * 2. objects/ directory - Must exist and be accessible
862   * 3. refs/ directory - Must exist and be accessible
863   *
864   * If all three exist in the current directory (not in a .git subdirectory),
865   * Git treats the current directory as a bare repository and will execute
866   * hooks/pre-commit and other hook scripts from the cwd.
867   *
868   * Attack scenario:
869   * 1. Attacker creates HEAD, objects/, refs/, and hooks/pre-commit in cwd
870   * 2. Attacker deletes or corrupts .git/HEAD to invalidate the normal git directory
871   * 3. When user runs 'git status', Git treats cwd as the git dir and runs the hook
872   *
873   * @returns true if the cwd looks like a bare/exploited git directory
874   */
875  /* eslint-disable custom-rules/no-sync-fs -- sync permission-eval check */
876  export function isCurrentDirectoryBareGitRepo(): boolean {
877    const fs = getFsImplementation()
878    const cwd = getCwd()
879  
880    const gitPath = join(cwd, '.git')
881    try {
882      const stats = fs.statSync(gitPath)
883      if (stats.isFile()) {
884        // worktree/submodule — Git follows the gitdir reference
885        return false
886      }
887      if (stats.isDirectory()) {
888        const gitHeadPath = join(gitPath, 'HEAD')
889        try {
890          // SECURITY: check isFile(). An attacker creating .git/HEAD as a
891          // DIRECTORY would pass a bare statSync but Git's setup_git_directory
892          // rejects it (not a valid HEAD) and falls back to cwd discovery.
893          if (fs.statSync(gitHeadPath).isFile()) {
894            // normal repo — .git/HEAD valid, Git won't fall back to cwd
895            return false
896          }
897          // .git/HEAD exists but is not a regular file — fall through
898        } catch {
899          // .git exists but no HEAD — fall through to bare-repo check
900        }
901      }
902    } catch {
903      // no .git — fall through to bare-repo indicator check
904    }
905  
906    // No valid .git/HEAD found. Check if cwd has bare git repo indicators.
907    // Be cautious — flag if ANY of these exist without a valid .git reference.
908    // Per-indicator try/catch so an error on one doesn't mask another.
909    try {
910      if (fs.statSync(join(cwd, 'HEAD')).isFile()) return true
911    } catch {
912      // no HEAD
913    }
914    try {
915      if (fs.statSync(join(cwd, 'objects')).isDirectory()) return true
916    } catch {
917      // no objects/
918    }
919    try {
920      if (fs.statSync(join(cwd, 'refs')).isDirectory()) return true
921    } catch {
922      // no refs/
923    }
924    return false
925  }
926  /* eslint-enable custom-rules/no-sync-fs */