/ bridge / bridgePointer.ts
bridgePointer.ts
  1  import { mkdir, readFile, stat, unlink, writeFile } from 'fs/promises'
  2  import { dirname, join } from 'path'
  3  import { z } from 'zod/v4'
  4  import { logForDebugging } from '../utils/debug.js'
  5  import { isENOENT } from '../utils/errors.js'
  6  import { getWorktreePathsPortable } from '../utils/getWorktreePathsPortable.js'
  7  import { lazySchema } from '../utils/lazySchema.js'
  8  import {
  9    getProjectsDir,
 10    sanitizePath,
 11  } from '../utils/sessionStoragePortable.js'
 12  import { jsonParse, jsonStringify } from '../utils/slowOperations.js'
 13  
 14  /**
 15   * Upper bound on worktree fanout. git worktree list is naturally bounded
 16   * (50 is a LOT), but this caps the parallel stat() burst and guards against
 17   * pathological setups. Above this, --continue falls back to current-dir-only.
 18   */
 19  const MAX_WORKTREE_FANOUT = 50
 20  
 21  /**
 22   * Crash-recovery pointer for Remote Control sessions.
 23   *
 24   * Written immediately after a bridge session is created, periodically
 25   * refreshed during the session, and cleared on clean shutdown. If the
 26   * process dies unclean (crash, kill -9, terminal closed), the pointer
 27   * persists. On next startup, `claude remote-control` detects it and offers
 28   * to resume via the --session-id flow from #20460.
 29   *
 30   * Staleness is checked against the file's mtime (not an embedded timestamp)
 31   * so that a periodic re-write with the same content serves as a refresh —
 32   * matches the backend's rolling BRIDGE_LAST_POLL_TTL (4h) semantics. A
 33   * bridge that's been polling for 5+ hours and then crashes still has a
 34   * fresh pointer as long as the refresh ran within the window.
 35   *
 36   * Scoped per working directory (alongside transcript JSONL files) so two
 37   * concurrent bridges in different repos don't clobber each other.
 38   */
 39  
 40  export const BRIDGE_POINTER_TTL_MS = 4 * 60 * 60 * 1000
 41  
 42  const BridgePointerSchema = lazySchema(() =>
 43    z.object({
 44      sessionId: z.string(),
 45      environmentId: z.string(),
 46      source: z.enum(['standalone', 'repl']),
 47    }),
 48  )
 49  
 50  export type BridgePointer = z.infer<ReturnType<typeof BridgePointerSchema>>
 51  
 52  export function getBridgePointerPath(dir: string): string {
 53    return join(getProjectsDir(), sanitizePath(dir), 'bridge-pointer.json')
 54  }
 55  
 56  /**
 57   * Write the pointer. Also used to refresh mtime during long sessions —
 58   * calling with the same IDs is a cheap no-content-change write that bumps
 59   * the staleness clock. Best-effort — a crash-recovery file must never
 60   * itself cause a crash. Logs and swallows on error.
 61   */
 62  export async function writeBridgePointer(
 63    dir: string,
 64    pointer: BridgePointer,
 65  ): Promise<void> {
 66    const path = getBridgePointerPath(dir)
 67    try {
 68      await mkdir(dirname(path), { recursive: true })
 69      await writeFile(path, jsonStringify(pointer), 'utf8')
 70      logForDebugging(`[bridge:pointer] wrote ${path}`)
 71    } catch (err: unknown) {
 72      logForDebugging(`[bridge:pointer] write failed: ${err}`, { level: 'warn' })
 73    }
 74  }
 75  
 76  /**
 77   * Read the pointer and its age (ms since last write). Operates directly
 78   * and handles errors — no existence check (CLAUDE.md TOCTOU rule). Returns
 79   * null on any failure: missing file, corrupted JSON, schema mismatch, or
 80   * stale (mtime > 4h ago). Stale/invalid pointers are deleted so they don't
 81   * keep re-prompting after the backend has already GC'd the env.
 82   */
 83  export async function readBridgePointer(
 84    dir: string,
 85  ): Promise<(BridgePointer & { ageMs: number }) | null> {
 86    const path = getBridgePointerPath(dir)
 87    let raw: string
 88    let mtimeMs: number
 89    try {
 90      // stat for mtime (staleness anchor), then read. Two syscalls, but both
 91      // are needed — mtime IS the data we return, not a TOCTOU guard.
 92      mtimeMs = (await stat(path)).mtimeMs
 93      raw = await readFile(path, 'utf8')
 94    } catch {
 95      return null
 96    }
 97  
 98    const parsed = BridgePointerSchema().safeParse(safeJsonParse(raw))
 99    if (!parsed.success) {
100      logForDebugging(`[bridge:pointer] invalid schema, clearing: ${path}`)
101      await clearBridgePointer(dir)
102      return null
103    }
104  
105    const ageMs = Math.max(0, Date.now() - mtimeMs)
106    if (ageMs > BRIDGE_POINTER_TTL_MS) {
107      logForDebugging(`[bridge:pointer] stale (>4h mtime), clearing: ${path}`)
108      await clearBridgePointer(dir)
109      return null
110    }
111  
112    return { ...parsed.data, ageMs }
113  }
114  
115  /**
116   * Worktree-aware read for `--continue`. The REPL bridge writes its pointer
117   * to `getOriginalCwd()` which EnterWorktreeTool/activeWorktreeSession can
118   * mutate to a worktree path — but `claude remote-control --continue` runs
119   * with `resolve('.')` = shell CWD. This fans out across git worktree
120   * siblings to find the freshest pointer, matching /resume's semantics.
121   *
122   * Fast path: checks `dir` first. Only shells out to `git worktree list` if
123   * that misses — the common case (pointer in launch dir) is one stat, zero
124   * exec. Fanout reads run in parallel; capped at MAX_WORKTREE_FANOUT.
125   *
126   * Returns the pointer AND the dir it was found in, so the caller can clear
127   * the right file on resume failure.
128   */
129  export async function readBridgePointerAcrossWorktrees(
130    dir: string,
131  ): Promise<{ pointer: BridgePointer & { ageMs: number }; dir: string } | null> {
132    // Fast path: current dir. Covers standalone bridge (always matches) and
133    // REPL bridge when no worktree mutation happened.
134    const here = await readBridgePointer(dir)
135    if (here) {
136      return { pointer: here, dir }
137    }
138  
139    // Fanout: scan worktree siblings. getWorktreePathsPortable has a 5s
140    // timeout and returns [] on any error (not a git repo, git not installed).
141    const worktrees = await getWorktreePathsPortable(dir)
142    if (worktrees.length <= 1) return null
143    if (worktrees.length > MAX_WORKTREE_FANOUT) {
144      logForDebugging(
145        `[bridge:pointer] ${worktrees.length} worktrees exceeds fanout cap ${MAX_WORKTREE_FANOUT}, skipping`,
146      )
147      return null
148    }
149  
150    // Dedupe against `dir` so we don't re-stat it. sanitizePath normalizes
151    // case/separators so worktree-list output matches our fast-path key even
152    // on Windows where git may emit C:/ vs stored c:/.
153    const dirKey = sanitizePath(dir)
154    const candidates = worktrees.filter(wt => sanitizePath(wt) !== dirKey)
155  
156    // Parallel stat+read. Each readBridgePointer is a stat() that ENOENTs
157    // for worktrees with no pointer (cheap) plus a ~100-byte read for the
158    // rare ones that have one. Promise.all → latency ≈ slowest single stat.
159    const results = await Promise.all(
160      candidates.map(async wt => {
161        const p = await readBridgePointer(wt)
162        return p ? { pointer: p, dir: wt } : null
163      }),
164    )
165  
166    // Pick freshest (lowest ageMs). The pointer stores environmentId so
167    // resume reconnects to the right env regardless of which worktree
168    // --continue was invoked from.
169    let freshest: {
170      pointer: BridgePointer & { ageMs: number }
171      dir: string
172    } | null = null
173    for (const r of results) {
174      if (r && (!freshest || r.pointer.ageMs < freshest.pointer.ageMs)) {
175        freshest = r
176      }
177    }
178    if (freshest) {
179      logForDebugging(
180        `[bridge:pointer] fanout found pointer in worktree ${freshest.dir} (ageMs=${freshest.pointer.ageMs})`,
181      )
182    }
183    return freshest
184  }
185  
186  /**
187   * Delete the pointer. Idempotent — ENOENT is expected when the process
188   * shut down clean previously.
189   */
190  export async function clearBridgePointer(dir: string): Promise<void> {
191    const path = getBridgePointerPath(dir)
192    try {
193      await unlink(path)
194      logForDebugging(`[bridge:pointer] cleared ${path}`)
195    } catch (err: unknown) {
196      if (!isENOENT(err)) {
197        logForDebugging(`[bridge:pointer] clear failed: ${err}`, {
198          level: 'warn',
199        })
200      }
201    }
202  }
203  
204  function safeJsonParse(raw: string): unknown {
205    try {
206      return jsonParse(raw)
207    } catch {
208      return null
209    }
210  }