/ utils / teleport / gitBundle.ts
gitBundle.ts
  1  /**
  2   * Git bundle creation + upload for CCR seed-bundle seeding.
  3   *
  4   * Flow:
  5   *   1. git stash create → update-ref refs/seed/stash (makes it reachable)
  6   *   2. git bundle create --all (packs refs/seed/stash + its objects)
  7   *   3. Upload to /v1/files
  8   *   4. Cleanup refs/seed/stash (don't pollute user's repo)
  9   *   5. Caller sets seed_bundle_file_id on SessionContext
 10   */
 11  
 12  import { stat, unlink } from 'fs/promises'
 13  import {
 14    type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 15    logEvent,
 16  } from 'src/services/analytics/index.js'
 17  import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js'
 18  import { type FilesApiConfig, uploadFile } from '../../services/api/filesApi.js'
 19  import { getCwd } from '../cwd.js'
 20  import { logForDebugging } from '../debug.js'
 21  import { execFileNoThrowWithCwd } from '../execFileNoThrow.js'
 22  import { findGitRoot, gitExe } from '../git.js'
 23  import { generateTempFilePath } from '../tempfile.js'
 24  
 25  // Tunable via tengu_ccr_bundle_max_bytes.
 26  const DEFAULT_BUNDLE_MAX_BYTES = 100 * 1024 * 1024
 27  
 28  type BundleScope = 'all' | 'head' | 'squashed'
 29  
 30  export type BundleUploadResult =
 31    | {
 32        success: true
 33        fileId: string
 34        bundleSizeBytes: number
 35        scope: BundleScope
 36        hasWip: boolean
 37      }
 38    | { success: false; error: string; failReason?: BundleFailReason }
 39  
 40  type BundleFailReason = 'git_error' | 'too_large' | 'empty_repo'
 41  
 42  type BundleCreateResult =
 43    | { ok: true; size: number; scope: BundleScope }
 44    | { ok: false; error: string; failReason: BundleFailReason }
 45  
 46  // Bundle --all → HEAD → squashed-root. HEAD drops side branches/tags but
 47  // keeps full current-branch history. Squashed-root is a single parentless
 48  // commit of HEAD's tree (or the stash tree if WIP exists) — no history,
 49  // just the snapshot. Receiver needs refs/seed/root handling for that tier.
 50  async function _bundleWithFallback(
 51    gitRoot: string,
 52    bundlePath: string,
 53    maxBytes: number,
 54    hasStash: boolean,
 55    signal: AbortSignal | undefined,
 56  ): Promise<BundleCreateResult> {
 57    // --all picks up refs/seed/stash; HEAD needs it explicit.
 58    const extra = hasStash ? ['refs/seed/stash'] : []
 59    const mkBundle = (base: string) =>
 60      execFileNoThrowWithCwd(
 61        gitExe(),
 62        ['bundle', 'create', bundlePath, base, ...extra],
 63        { cwd: gitRoot, abortSignal: signal },
 64      )
 65  
 66    const allResult = await mkBundle('--all')
 67    if (allResult.code !== 0) {
 68      return {
 69        ok: false,
 70        error: `git bundle create --all failed (${allResult.code}): ${allResult.stderr.slice(0, 200)}`,
 71        failReason: 'git_error',
 72      }
 73    }
 74  
 75    const { size: allSize } = await stat(bundlePath)
 76    if (allSize <= maxBytes) {
 77      return { ok: true, size: allSize, scope: 'all' }
 78    }
 79  
 80    // bundle create overwrites in place.
 81    logForDebugging(
 82      `[gitBundle] --all bundle is ${(allSize / 1024 / 1024).toFixed(1)}MB (> ${(maxBytes / 1024 / 1024).toFixed(0)}MB), retrying HEAD-only`,
 83    )
 84    const headResult = await mkBundle('HEAD')
 85    if (headResult.code !== 0) {
 86      return {
 87        ok: false,
 88        error: `git bundle create HEAD failed (${headResult.code}): ${headResult.stderr.slice(0, 200)}`,
 89        failReason: 'git_error',
 90      }
 91    }
 92  
 93    const { size: headSize } = await stat(bundlePath)
 94    if (headSize <= maxBytes) {
 95      return { ok: true, size: headSize, scope: 'head' }
 96    }
 97  
 98    // Last resort: squash to a single parentless commit. Uses the stash tree
 99    // when WIP exists (bakes uncommitted changes in — can't bundle the stash
100    // ref separately since its parents would drag history back).
101    logForDebugging(
102      `[gitBundle] HEAD bundle is ${(headSize / 1024 / 1024).toFixed(1)}MB, retrying squashed-root`,
103    )
104    const treeRef = hasStash ? 'refs/seed/stash^{tree}' : 'HEAD^{tree}'
105    const commitTree = await execFileNoThrowWithCwd(
106      gitExe(),
107      ['commit-tree', treeRef, '-m', 'seed'],
108      { cwd: gitRoot, abortSignal: signal },
109    )
110    if (commitTree.code !== 0) {
111      return {
112        ok: false,
113        error: `git commit-tree failed (${commitTree.code}): ${commitTree.stderr.slice(0, 200)}`,
114        failReason: 'git_error',
115      }
116    }
117    const squashedSha = commitTree.stdout.trim()
118    await execFileNoThrowWithCwd(
119      gitExe(),
120      ['update-ref', 'refs/seed/root', squashedSha],
121      { cwd: gitRoot },
122    )
123    const squashResult = await execFileNoThrowWithCwd(
124      gitExe(),
125      ['bundle', 'create', bundlePath, 'refs/seed/root'],
126      { cwd: gitRoot, abortSignal: signal },
127    )
128    if (squashResult.code !== 0) {
129      return {
130        ok: false,
131        error: `git bundle create refs/seed/root failed (${squashResult.code}): ${squashResult.stderr.slice(0, 200)}`,
132        failReason: 'git_error',
133      }
134    }
135    const { size: squashSize } = await stat(bundlePath)
136    if (squashSize <= maxBytes) {
137      return { ok: true, size: squashSize, scope: 'squashed' }
138    }
139  
140    return {
141      ok: false,
142      error:
143        'Repo is too large to bundle. Please setup GitHub on https://claude.ai/code',
144      failReason: 'too_large',
145    }
146  }
147  
148  // Bundle the repo and upload to Files API; return file_id for
149  // seed_bundle_file_id. --all → HEAD → squashed-root fallback chain.
150  // Tracked WIP via stash create → refs/seed/stash (or baked into the
151  // squashed tree); untracked not captured.
152  export async function createAndUploadGitBundle(
153    config: FilesApiConfig,
154    opts?: { cwd?: string; signal?: AbortSignal },
155  ): Promise<BundleUploadResult> {
156    const workdir = opts?.cwd ?? getCwd()
157    const gitRoot = findGitRoot(workdir)
158    if (!gitRoot) {
159      return { success: false, error: 'Not in a git repository' }
160    }
161  
162    // Sweep stale refs from a crashed prior run before --all bundles them.
163    // Runs before the empty-repo check so it's never skipped by an early return.
164    for (const ref of ['refs/seed/stash', 'refs/seed/root']) {
165      await execFileNoThrowWithCwd(gitExe(), ['update-ref', '-d', ref], {
166        cwd: gitRoot,
167      })
168    }
169  
170    // `git bundle create` refuses to create an empty bundle (exit 128), and
171    // `stash create` fails with "You do not have the initial commit yet".
172    // Check for any refs (not just HEAD) so orphan branches with commits
173    // elsewhere still bundle — `--all` packs those refs regardless of HEAD.
174    const refCheck = await execFileNoThrowWithCwd(
175      gitExe(),
176      ['for-each-ref', '--count=1', 'refs/'],
177      { cwd: gitRoot },
178    )
179    if (refCheck.code === 0 && refCheck.stdout.trim() === '') {
180      logEvent('tengu_ccr_bundle_upload', {
181        outcome:
182          'empty_repo' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
183      })
184      return {
185        success: false,
186        error: 'Repository has no commits yet',
187        failReason: 'empty_repo',
188      }
189    }
190  
191    // stash create writes a dangling commit — doesn't touch refs/stash or
192    // the working tree. Untracked files intentionally excluded.
193    const stashResult = await execFileNoThrowWithCwd(
194      gitExe(),
195      ['stash', 'create'],
196      { cwd: gitRoot, abortSignal: opts?.signal },
197    )
198    // exit 0 + empty stdout = nothing to stash. Nonzero is rare; non-fatal.
199    const wipStashSha = stashResult.code === 0 ? stashResult.stdout.trim() : ''
200    const hasWip = wipStashSha !== ''
201    if (stashResult.code !== 0) {
202      logForDebugging(
203        `[gitBundle] git stash create failed (${stashResult.code}), proceeding without WIP: ${stashResult.stderr.slice(0, 200)}`,
204      )
205    } else if (hasWip) {
206      logForDebugging(`[gitBundle] Captured WIP as stash ${wipStashSha}`)
207      // env-runner reads the SHA via bundle list-heads refs/seed/stash.
208      await execFileNoThrowWithCwd(
209        gitExe(),
210        ['update-ref', 'refs/seed/stash', wipStashSha],
211        { cwd: gitRoot },
212      )
213    }
214  
215    const bundlePath = generateTempFilePath('ccr-seed', '.bundle')
216  
217    // git leaves a partial file on nonzero exit (e.g. empty-repo 128).
218    try {
219      const maxBytes =
220        getFeatureValue_CACHED_MAY_BE_STALE<number | null>(
221          'tengu_ccr_bundle_max_bytes',
222          null,
223        ) ?? DEFAULT_BUNDLE_MAX_BYTES
224  
225      const bundle = await _bundleWithFallback(
226        gitRoot,
227        bundlePath,
228        maxBytes,
229        hasWip,
230        opts?.signal,
231      )
232  
233      if (!bundle.ok) {
234        logForDebugging(`[gitBundle] ${bundle.error}`)
235        logEvent('tengu_ccr_bundle_upload', {
236          outcome:
237            bundle.failReason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
238          max_bytes: maxBytes,
239        })
240        return {
241          success: false,
242          error: bundle.error,
243          failReason: bundle.failReason,
244        }
245      }
246  
247      // Fixed relativePath so CCR can locate it.
248      const upload = await uploadFile(bundlePath, '_source_seed.bundle', config, {
249        signal: opts?.signal,
250      })
251  
252      if (!upload.success) {
253        logEvent('tengu_ccr_bundle_upload', {
254          outcome:
255            'failed' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
256        })
257        return { success: false, error: upload.error }
258      }
259  
260      logForDebugging(
261        `[gitBundle] Uploaded ${upload.size} bytes as file_id ${upload.fileId}`,
262      )
263      logEvent('tengu_ccr_bundle_upload', {
264        outcome:
265          'success' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
266        size_bytes: upload.size,
267        scope:
268          bundle.scope as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
269        has_wip: hasWip,
270      })
271      return {
272        success: true,
273        fileId: upload.fileId,
274        bundleSizeBytes: upload.size,
275        scope: bundle.scope,
276        hasWip,
277      }
278    } finally {
279      try {
280        await unlink(bundlePath)
281      } catch {
282        logForDebugging(`[gitBundle] Could not delete ${bundlePath} (non-fatal)`)
283      }
284      // Always delete — also sweeps a stale ref from a crashed prior run.
285      // update-ref -d on a missing ref exits 0.
286      for (const ref of ['refs/seed/stash', 'refs/seed/root']) {
287        await execFileNoThrowWithCwd(gitExe(), ['update-ref', '-d', ref], {
288          cwd: gitRoot,
289        })
290      }
291    }
292  }