/ utils / git / gitFilesystem.ts
gitFilesystem.ts
  1  /**
  2   * Filesystem-based git state reading — avoids spawning git subprocesses.
  3   *
  4   * Covers: resolving .git directories (including worktrees/submodules),
  5   * parsing HEAD, resolving refs via loose files and packed-refs,
  6   * and the GitHeadWatcher that caches branch/SHA with fs.watchFile.
  7   *
  8   * Correctness notes (verified against git source):
  9   *   - HEAD: `ref: refs/heads/<branch>\n` or raw SHA (refs/files-backend.c)
 10   *   - Packed-refs: `<sha> <refname>\n`, skip `#` and `^` lines (packed-backend.c)
 11   *   - .git file (worktree): `gitdir: <path>\n` with optional relative path (setup.c)
 12   *   - Shallow: mere existence of `<commonDir>/shallow` means shallow (shallow.c)
 13   */
 14  
 15  import { unwatchFile, watchFile } from 'fs'
 16  import { readdir, readFile, stat } from 'fs/promises'
 17  import { join, resolve } from 'path'
 18  import { waitForScrollIdle } from '../../bootstrap/state.js'
 19  import { registerCleanup } from '../cleanupRegistry.js'
 20  import { getCwd } from '../cwd.js'
 21  import { findGitRoot } from '../git.js'
 22  import { parseGitConfigValue } from './gitConfigParser.js'
 23  
 24  // ---------------------------------------------------------------------------
 25  // resolveGitDir — find the actual .git directory
 26  // ---------------------------------------------------------------------------
 27  
 28  const resolveGitDirCache = new Map<string, string | null>()
 29  
 30  /** Clear cached git dir resolutions. Exported for testing only. */
 31  export function clearResolveGitDirCache(): void {
 32    resolveGitDirCache.clear()
 33  }
 34  
 35  /**
 36   * Resolve the actual .git directory for a repo.
 37   * Handles worktrees/submodules where .git is a file containing `gitdir: <path>`.
 38   * Memoized per startPath.
 39   */
 40  export async function resolveGitDir(
 41    startPath?: string,
 42  ): Promise<string | null> {
 43    const cwd = resolve(startPath ?? getCwd())
 44    const cached = resolveGitDirCache.get(cwd)
 45    if (cached !== undefined) {
 46      return cached
 47    }
 48  
 49    const root = findGitRoot(cwd)
 50    if (!root) {
 51      resolveGitDirCache.set(cwd, null)
 52      return null
 53    }
 54  
 55    const gitPath = join(root, '.git')
 56    try {
 57      const st = await stat(gitPath)
 58      if (st.isFile()) {
 59        // Worktree or submodule: .git is a file with `gitdir: <path>`
 60        // Git strips trailing \n and \r (setup.c read_gitfile_gently).
 61        const content = (await readFile(gitPath, 'utf-8')).trim()
 62        if (content.startsWith('gitdir:')) {
 63          const rawDir = content.slice('gitdir:'.length).trim()
 64          const resolved = resolve(root, rawDir)
 65          resolveGitDirCache.set(cwd, resolved)
 66          return resolved
 67        }
 68      }
 69      // Regular repo: .git is a directory
 70      resolveGitDirCache.set(cwd, gitPath)
 71      return gitPath
 72    } catch {
 73      resolveGitDirCache.set(cwd, null)
 74      return null
 75    }
 76  }
 77  
 78  // ---------------------------------------------------------------------------
 79  // isSafeRefName — validate ref/branch names read from .git/
 80  // ---------------------------------------------------------------------------
 81  
 82  /**
 83   * Validate that a ref/branch name read from .git/ is safe to use in path
 84   * joins, as git positional arguments, and when interpolated into shell
 85   * commands (commit-push-pr skill interpolates the branch into shell).
 86   * An attacker who controls .git/HEAD or a loose ref file could otherwise
 87   * embed path traversal (`..`), argument injection (leading `-`), or shell
 88   * metacharacters — .git/HEAD is a plain text file that can be written
 89   * without git's own check-ref-format validation.
 90   *
 91   * Allowlist: ASCII alphanumerics, `/`, `.`, `_`, `+`, `-`, `@` only. This
 92   * covers all legitimate git branch names (e.g. `feature/foo`,
 93   * `release-1.2.3+build`, `dependabot/npm_and_yarn/@types/node-18.0.0`)
 94   * while rejecting everything that could be dangerous in shell context
 95   * (newlines, backticks, `$`, `;`, `|`, `&`, `(`, `)`, `<`, `>`, spaces,
 96   * tabs, quotes, backslash) and path traversal (`..`).
 97   */
 98  export function isSafeRefName(name: string): boolean {
 99    if (!name || name.startsWith('-') || name.startsWith('/')) {
100      return false
101    }
102    if (name.includes('..')) {
103      return false
104    }
105    // Reject single-dot and empty path components (`.`, `foo/./bar`, `foo//bar`,
106    // `foo/`). Git-check-ref-format rejects these, and `.` normalizes away in
107    // path joins so a tampered HEAD of `refs/heads/.` would make us watch the
108    // refs/heads directory itself instead of a branch file.
109    if (name.split('/').some(c => c === '.' || c === '')) {
110      return false
111    }
112    // Allowlist-only: alphanumerics, /, ., _, +, -, @. Rejects all shell
113    // metacharacters, whitespace, NUL, and non-ASCII. Git's forbidden @{
114    // sequence is blocked because { is not in the allowlist.
115    if (!/^[a-zA-Z0-9/._+@-]+$/.test(name)) {
116      return false
117    }
118    return true
119  }
120  
121  /**
122   * Validate that a string is a git SHA: 40 hex chars (SHA-1) or 64 hex chars
123   * (SHA-256). Git never writes abbreviated SHAs to HEAD or ref files, so we
124   * only accept full-length hashes.
125   *
126   * An attacker who controls .git/HEAD when detached, or a loose ref file,
127   * could otherwise return arbitrary content that flows into shell contexts.
128   */
129  export function isValidGitSha(s: string): boolean {
130    return /^[0-9a-f]{40}$/.test(s) || /^[0-9a-f]{64}$/.test(s)
131  }
132  
133  // ---------------------------------------------------------------------------
134  // readGitHead — parse .git/HEAD
135  // ---------------------------------------------------------------------------
136  
137  /**
138   * Parse .git/HEAD to determine current branch or detached SHA.
139   *
140   * HEAD format (per git source, refs/files-backend.c):
141   *   - `ref: refs/heads/<branch>\n`  — on a branch
142   *   - `ref: <other-ref>\n`          — unusual symref (e.g. during bisect)
143   *   - `<hex-sha>\n`                 — detached HEAD (e.g. during rebase)
144   *
145   * Git strips trailing whitespace via strbuf_rtrim; .trim() is equivalent.
146   * Git allows any whitespace between "ref:" and the path; we handle
147   * this by trimming after slicing past "ref:".
148   */
149  export async function readGitHead(
150    gitDir: string,
151  ): Promise<
152    { type: 'branch'; name: string } | { type: 'detached'; sha: string } | null
153  > {
154    try {
155      const content = (await readFile(join(gitDir, 'HEAD'), 'utf-8')).trim()
156      if (content.startsWith('ref:')) {
157        const ref = content.slice('ref:'.length).trim()
158        if (ref.startsWith('refs/heads/')) {
159          const name = ref.slice('refs/heads/'.length)
160          // Reject path traversal and argument injection from a tampered HEAD.
161          if (!isSafeRefName(name)) {
162            return null
163          }
164          return { type: 'branch', name }
165        }
166        // Unusual symref (not a local branch) — resolve to SHA
167        if (!isSafeRefName(ref)) {
168          return null
169        }
170        const sha = await resolveRef(gitDir, ref)
171        return sha ? { type: 'detached', sha } : { type: 'detached', sha: '' }
172      }
173      // Raw SHA (detached HEAD). Validate: an attacker-controlled HEAD file
174      // could contain shell metacharacters that flow into downstream shell
175      // contexts.
176      if (!isValidGitSha(content)) {
177        return null
178      }
179      return { type: 'detached', sha: content }
180    } catch {
181      return null
182    }
183  }
184  
185  // ---------------------------------------------------------------------------
186  // resolveRef — resolve loose/packed refs to SHAs
187  // ---------------------------------------------------------------------------
188  
189  /**
190   * Resolve a git ref (e.g. `refs/heads/main`) to a commit SHA.
191   * Checks loose ref files first, then falls back to packed-refs.
192   * Follows symrefs (e.g. `ref: refs/remotes/origin/main`).
193   *
194   * For worktrees, refs live in the common gitdir (pointed to by the
195   * `commondir` file), not the worktree-specific gitdir. We check the
196   * worktree gitdir first, then fall back to the common dir.
197   *
198   * Packed-refs format (per packed-backend.c):
199   *   - Header: `# pack-refs with: <traits>\n`
200   *   - Entries: `<40-hex-sha> <refname>\n`
201   *   - Peeled:  `^<40-hex-sha>\n` (after annotated tag entries)
202   */
203  export async function resolveRef(
204    gitDir: string,
205    ref: string,
206  ): Promise<string | null> {
207    const result = await resolveRefInDir(gitDir, ref)
208    if (result) {
209      return result
210    }
211  
212    // For worktrees: try the common gitdir where shared refs live
213    const commonDir = await getCommonDir(gitDir)
214    if (commonDir && commonDir !== gitDir) {
215      return resolveRefInDir(commonDir, ref)
216    }
217  
218    return null
219  }
220  
221  async function resolveRefInDir(
222    dir: string,
223    ref: string,
224  ): Promise<string | null> {
225    // Try loose ref file
226    try {
227      const content = (await readFile(join(dir, ref), 'utf-8')).trim()
228      if (content.startsWith('ref:')) {
229        const target = content.slice('ref:'.length).trim()
230        // Reject path traversal in a tampered symref chain.
231        if (!isSafeRefName(target)) {
232          return null
233        }
234        return resolveRef(dir, target)
235      }
236      // Loose ref content should be a raw SHA. Validate: an attacker-controlled
237      // ref file could contain shell metacharacters.
238      if (!isValidGitSha(content)) {
239        return null
240      }
241      return content
242    } catch {
243      // Loose ref doesn't exist, try packed-refs
244    }
245  
246    try {
247      const packed = await readFile(join(dir, 'packed-refs'), 'utf-8')
248      for (const line of packed.split('\n')) {
249        if (line.startsWith('#') || line.startsWith('^')) {
250          continue
251        }
252        const spaceIdx = line.indexOf(' ')
253        if (spaceIdx === -1) {
254          continue
255        }
256        if (line.slice(spaceIdx + 1) === ref) {
257          const sha = line.slice(0, spaceIdx)
258          return isValidGitSha(sha) ? sha : null
259        }
260      }
261    } catch {
262      // No packed-refs
263    }
264  
265    return null
266  }
267  
268  /**
269   * Read the `commondir` file to find the shared git directory.
270   * In a worktree, this points to the main repo's .git dir.
271   * Returns null if no commondir file exists (regular repo).
272   */
273  export async function getCommonDir(gitDir: string): Promise<string | null> {
274    try {
275      const content = (await readFile(join(gitDir, 'commondir'), 'utf-8')).trim()
276      return resolve(gitDir, content)
277    } catch {
278      return null
279    }
280  }
281  
282  /**
283   * Read a raw symref file and extract the branch name after a known prefix.
284   * Returns null if the ref doesn't exist, isn't a symref, or doesn't match the prefix.
285   * Checks loose file only — packed-refs doesn't store symrefs.
286   */
287  export async function readRawSymref(
288    gitDir: string,
289    refPath: string,
290    branchPrefix: string,
291  ): Promise<string | null> {
292    try {
293      const content = (await readFile(join(gitDir, refPath), 'utf-8')).trim()
294      if (content.startsWith('ref:')) {
295        const target = content.slice('ref:'.length).trim()
296        if (target.startsWith(branchPrefix)) {
297          const name = target.slice(branchPrefix.length)
298          // Reject path traversal and argument injection from a tampered symref.
299          if (!isSafeRefName(name)) {
300            return null
301          }
302          return name
303        }
304      }
305    } catch {
306      // Not a loose ref
307    }
308    return null
309  }
310  
311  // ---------------------------------------------------------------------------
312  // GitFileWatcher — watches git files and caches derived values.
313  // Lazily initialized on first cache access. Invalidates all cached
314  // values when any watched file changes.
315  //
316  // Watches:
317  //   .git/HEAD          — branch switches, detached HEAD
318  //   .git/config        — remote URL changes
319  //   .git/refs/heads/<branch> — new commits on the current branch
320  //
321  // When HEAD changes (branch switch), the branch ref watcher is updated
322  // to track the new branch's ref file.
323  // ---------------------------------------------------------------------------
324  
325  type CacheEntry<T> = {
326    value: T
327    dirty: boolean
328    compute: () => Promise<T>
329  }
330  
331  const WATCH_INTERVAL_MS = process.env.NODE_ENV === 'test' ? 10 : 1000
332  
333  class GitFileWatcher {
334    private gitDir: string | null = null
335    private commonDir: string | null = null
336    private initialized = false
337    private initPromise: Promise<void> | null = null
338    private watchedPaths: string[] = []
339    private branchRefPath: string | null = null
340    private cache = new Map<string, CacheEntry<unknown>>()
341  
342    async ensureStarted(): Promise<void> {
343      if (this.initialized) {
344        return
345      }
346      if (this.initPromise) {
347        return this.initPromise
348      }
349      this.initPromise = this.start()
350      return this.initPromise
351    }
352  
353    private async start(): Promise<void> {
354      this.gitDir = await resolveGitDir()
355      this.initialized = true
356      if (!this.gitDir) {
357        return
358      }
359  
360      // In a worktree, branch refs and the main config are shared and live in
361      // commonDir, not the per-worktree gitDir. Resolve once so we don't
362      // re-read the commondir file on every branch switch.
363      this.commonDir = await getCommonDir(this.gitDir)
364  
365      // Watch .git/HEAD and .git/config
366      this.watchPath(join(this.gitDir, 'HEAD'), () => {
367        void this.onHeadChanged()
368      })
369      // Config (remote URLs) lives in commonDir for worktrees
370      this.watchPath(join(this.commonDir ?? this.gitDir, 'config'), () => {
371        this.invalidate()
372      })
373  
374      // Watch the current branch's ref file for commit changes
375      await this.watchCurrentBranchRef()
376  
377      registerCleanup(async () => {
378        this.stopWatching()
379      })
380    }
381  
382    private watchPath(path: string, callback: () => void): void {
383      this.watchedPaths.push(path)
384      watchFile(path, { interval: WATCH_INTERVAL_MS }, callback)
385    }
386  
387    /**
388     * Watch the loose ref file for the current branch.
389     * Called on startup and whenever HEAD changes (branch switch).
390     */
391    private async watchCurrentBranchRef(): Promise<void> {
392      if (!this.gitDir) {
393        return
394      }
395  
396      const head = await readGitHead(this.gitDir)
397      // Branch refs live in commonDir for worktrees (gitDir for regular repos)
398      const refsDir = this.commonDir ?? this.gitDir
399      const refPath =
400        head?.type === 'branch' ? join(refsDir, 'refs', 'heads', head.name) : null
401  
402      // Already watching this ref (or already not watching anything)
403      if (refPath === this.branchRefPath) {
404        return
405      }
406  
407      // Stop watching old branch ref. Runs for branch→branch AND
408      // branch→detached (checkout --detach, rebase, bisect).
409      if (this.branchRefPath) {
410        unwatchFile(this.branchRefPath)
411        this.watchedPaths = this.watchedPaths.filter(
412          p => p !== this.branchRefPath,
413        )
414      }
415  
416      this.branchRefPath = refPath
417  
418      if (!refPath) {
419        return
420      }
421  
422      // The ref file may not exist yet (new branch before first commit).
423      // watchFile works on nonexistent files — it fires when the file appears.
424      this.watchPath(refPath, () => {
425        this.invalidate()
426      })
427    }
428  
429    private async onHeadChanged(): Promise<void> {
430      // HEAD changed — could be a branch switch or detach.
431      // Defer file I/O (readGitHead, watchFile setup) until scroll settles so
432      // watchFile callbacks that land mid-scroll don't compete for the event
433      // loop. invalidate() is cheap (just marks dirty) so do it first — the
434      // cache correctly serves stale-marked values until the watcher updates.
435      this.invalidate()
436      await waitForScrollIdle()
437      await this.watchCurrentBranchRef()
438    }
439  
440    private invalidate(): void {
441      for (const entry of this.cache.values()) {
442        entry.dirty = true
443      }
444    }
445  
446    private stopWatching(): void {
447      for (const path of this.watchedPaths) {
448        unwatchFile(path)
449      }
450      this.watchedPaths = []
451      this.branchRefPath = null
452    }
453  
454    /**
455     * Get a cached value by key. On first call for a key, computes and caches it.
456     * Subsequent calls return the cached value until a watched file changes,
457     * which marks the entry dirty. The next get() re-computes from disk.
458     *
459     * Race condition handling: dirty is cleared BEFORE the async compute starts.
460     * If a file change arrives during compute, it re-sets dirty, so the next
461     * get() will re-read again rather than serving a stale value.
462     */
463    async get<T>(key: string, compute: () => Promise<T>): Promise<T> {
464      await this.ensureStarted()
465      const existing = this.cache.get(key)
466      if (existing && !existing.dirty) {
467        return existing.value as T
468      }
469      // Clear dirty before compute — if the file changes again during the
470      // async read, invalidate() will re-set dirty and we'll re-read on
471      // the next get() call.
472      if (existing) {
473        existing.dirty = false
474      }
475      const value = await compute()
476      // Only update the cached value if no new invalidation arrived during compute
477      const entry = this.cache.get(key)
478      if (entry && !entry.dirty) {
479        entry.value = value
480      }
481      if (!entry) {
482        this.cache.set(key, { value, dirty: false, compute })
483      }
484      return value
485    }
486  
487    /** Reset all state. Stops file watchers. For testing only. */
488    reset(): void {
489      this.stopWatching()
490      this.cache.clear()
491      this.initialized = false
492      this.initPromise = null
493      this.gitDir = null
494      this.commonDir = null
495    }
496  }
497  
498  const gitWatcher = new GitFileWatcher()
499  
500  async function computeBranch(): Promise<string> {
501    const gitDir = await resolveGitDir()
502    if (!gitDir) {
503      return 'HEAD'
504    }
505    const head = await readGitHead(gitDir)
506    if (!head) {
507      return 'HEAD'
508    }
509    return head.type === 'branch' ? head.name : 'HEAD'
510  }
511  
512  async function computeHead(): Promise<string> {
513    const gitDir = await resolveGitDir()
514    if (!gitDir) {
515      return ''
516    }
517    const head = await readGitHead(gitDir)
518    if (!head) {
519      return ''
520    }
521    if (head.type === 'branch') {
522      return (await resolveRef(gitDir, `refs/heads/${head.name}`)) ?? ''
523    }
524    return head.sha
525  }
526  
527  async function computeRemoteUrl(): Promise<string | null> {
528    const gitDir = await resolveGitDir()
529    if (!gitDir) {
530      return null
531    }
532    const url = await parseGitConfigValue(gitDir, 'remote', 'origin', 'url')
533    if (url) {
534      return url
535    }
536    // In worktrees, the config with remote URLs is in the common dir
537    const commonDir = await getCommonDir(gitDir)
538    if (commonDir && commonDir !== gitDir) {
539      return parseGitConfigValue(commonDir, 'remote', 'origin', 'url')
540    }
541    return null
542  }
543  
544  async function computeDefaultBranch(): Promise<string> {
545    const gitDir = await resolveGitDir()
546    if (!gitDir) {
547      return 'main'
548    }
549    // refs/remotes/ lives in commonDir, not the per-worktree gitDir
550    const commonDir = (await getCommonDir(gitDir)) ?? gitDir
551    const branchFromSymref = await readRawSymref(
552      commonDir,
553      'refs/remotes/origin/HEAD',
554      'refs/remotes/origin/',
555    )
556    if (branchFromSymref) {
557      return branchFromSymref
558    }
559    for (const candidate of ['main', 'master']) {
560      const sha = await resolveRef(commonDir, `refs/remotes/origin/${candidate}`)
561      if (sha) {
562        return candidate
563      }
564    }
565    return 'main'
566  }
567  
568  export function getCachedBranch(): Promise<string> {
569    return gitWatcher.get('branch', computeBranch)
570  }
571  
572  export function getCachedHead(): Promise<string> {
573    return gitWatcher.get('head', computeHead)
574  }
575  
576  export function getCachedRemoteUrl(): Promise<string | null> {
577    return gitWatcher.get('remoteUrl', computeRemoteUrl)
578  }
579  
580  export function getCachedDefaultBranch(): Promise<string> {
581    return gitWatcher.get('defaultBranch', computeDefaultBranch)
582  }
583  
584  /** Reset the git file watcher state. For testing only. */
585  export function resetGitFileWatcher(): void {
586    gitWatcher.reset()
587  }
588  
589  /**
590   * Read the HEAD SHA for an arbitrary directory (not using the watcher).
591   * Used by plugins that need the HEAD of a specific repo, not the CWD repo.
592   */
593  export async function getHeadForDir(cwd: string): Promise<string | null> {
594    const gitDir = await resolveGitDir(cwd)
595    if (!gitDir) {
596      return null
597    }
598    const head = await readGitHead(gitDir)
599    if (!head) {
600      return null
601    }
602    if (head.type === 'branch') {
603      return resolveRef(gitDir, `refs/heads/${head.name}`)
604    }
605    return head.sha
606  }
607  
608  /**
609   * Read the HEAD SHA for a git worktree directory (not the main repo).
610   *
611   * Unlike `getHeadForDir`, this reads `<worktreePath>/.git` directly as a
612   * `gitdir:` pointer file, with no upward walk. `getHeadForDir` walks upward
613   * via `findGitRoot` and would find the parent repo's `.git` when the
614   * worktree path doesn't exist — misreporting the parent HEAD as the worktree's.
615   *
616   * Returns null if the worktree doesn't exist (`.git` pointer ENOENT) or is
617   * malformed. Caller can treat null as "not a valid worktree".
618   */
619  export async function readWorktreeHeadSha(
620    worktreePath: string,
621  ): Promise<string | null> {
622    let gitDir: string
623    try {
624      const ptr = (await readFile(join(worktreePath, '.git'), 'utf-8')).trim()
625      if (!ptr.startsWith('gitdir:')) {
626        return null
627      }
628      gitDir = resolve(worktreePath, ptr.slice('gitdir:'.length).trim())
629    } catch {
630      return null
631    }
632    const head = await readGitHead(gitDir)
633    if (!head) {
634      return null
635    }
636    if (head.type === 'branch') {
637      return resolveRef(gitDir, `refs/heads/${head.name}`)
638    }
639    return head.sha
640  }
641  
642  /**
643   * Read the remote origin URL for an arbitrary directory via .git/config.
644   */
645  export async function getRemoteUrlForDir(cwd: string): Promise<string | null> {
646    const gitDir = await resolveGitDir(cwd)
647    if (!gitDir) {
648      return null
649    }
650    const url = await parseGitConfigValue(gitDir, 'remote', 'origin', 'url')
651    if (url) {
652      return url
653    }
654    // In worktrees, the config with remote URLs is in the common dir
655    const commonDir = await getCommonDir(gitDir)
656    if (commonDir && commonDir !== gitDir) {
657      return parseGitConfigValue(commonDir, 'remote', 'origin', 'url')
658    }
659    return null
660  }
661  
662  /**
663   * Check if we're in a shallow clone by looking for <commonDir>/shallow.
664   * Per git's shallow.c, mere existence of the file means shallow.
665   * The shallow file lives in commonDir, not the per-worktree gitDir.
666   */
667  export async function isShallowClone(): Promise<boolean> {
668    const gitDir = await resolveGitDir()
669    if (!gitDir) {
670      return false
671    }
672    const commonDir = (await getCommonDir(gitDir)) ?? gitDir
673    try {
674      await stat(join(commonDir, 'shallow'))
675      return true
676    } catch {
677      return false
678    }
679  }
680  
681  /**
682   * Count worktrees by reading <commonDir>/worktrees/ directory.
683   * The worktrees/ directory lives in commonDir, not the per-worktree gitDir.
684   * The main worktree is not listed there, so add 1.
685   */
686  export async function getWorktreeCountFromFs(): Promise<number> {
687    try {
688      const gitDir = await resolveGitDir()
689      if (!gitDir) {
690        return 0
691      }
692      const commonDir = (await getCommonDir(gitDir)) ?? gitDir
693      const entries = await readdir(join(commonDir, 'worktrees'))
694      return entries.length + 1
695    } catch {
696      // No worktrees directory means only the main worktree
697      return 1
698    }
699  }