/ src / utils / commitAttribution.ts
commitAttribution.ts
  1  import { createHash, randomUUID, type UUID } from 'crypto'
  2  import { stat } from 'fs/promises'
  3  import { isAbsolute, join, relative, sep } from 'path'
  4  import { getOriginalCwd, getSessionId } from '../bootstrap/state.js'
  5  import type {
  6    AttributionSnapshotMessage,
  7    FileAttributionState,
  8  } from '../types/logs.js'
  9  import { getCwd } from './cwd.js'
 10  import { logForDebugging } from './debug.js'
 11  import { execFileNoThrowWithCwd } from './execFileNoThrow.js'
 12  import { getFsImplementation } from './fsOperations.js'
 13  import { isGeneratedFile } from './generatedFiles.js'
 14  import { getRemoteUrlForDir, resolveGitDir } from './git/gitFilesystem.js'
 15  import { findGitRoot, gitExe } from './git.js'
 16  import { logError } from './log.js'
 17  import { getCanonicalName, type ModelName } from './model/model.js'
 18  import { sequential } from './sequential.js'
 19  
 20  /**
 21   * List of repos where internal model names are allowed in trailers.
 22   * Includes both SSH and HTTPS URL formats.
 23   *
 24   * NOTE: This is intentionally a repo allowlist, not an org-wide check.
 25   * The anthropics and anthropic-experimental orgs contain PUBLIC repos
 26   * (e.g. anthropics/claude-code, anthropic-experimental/sandbox-runtime).
 27   * Undercover mode must stay ON in those to prevent codename leaks.
 28   * Only add repos here that are confirmed PRIVATE.
 29   */
 30  const INTERNAL_MODEL_REPOS = [
 31    'github.com:anthropics/claude-cli-internal',
 32    'github.com/anthropics/claude-cli-internal',
 33    'github.com:anthropics/anthropic',
 34    'github.com/anthropics/anthropic',
 35    'github.com:anthropics/apps',
 36    'github.com/anthropics/apps',
 37    'github.com:anthropics/casino',
 38    'github.com/anthropics/casino',
 39    'github.com:anthropics/dbt',
 40    'github.com/anthropics/dbt',
 41    'github.com:anthropics/dotfiles',
 42    'github.com/anthropics/dotfiles',
 43    'github.com:anthropics/terraform-config',
 44    'github.com/anthropics/terraform-config',
 45    'github.com:anthropics/hex-export',
 46    'github.com/anthropics/hex-export',
 47    'github.com:anthropics/feedback-v2',
 48    'github.com/anthropics/feedback-v2',
 49    'github.com:anthropics/labs',
 50    'github.com/anthropics/labs',
 51    'github.com:anthropics/argo-rollouts',
 52    'github.com/anthropics/argo-rollouts',
 53    'github.com:anthropics/starling-configs',
 54    'github.com/anthropics/starling-configs',
 55    'github.com:anthropics/ts-tools',
 56    'github.com/anthropics/ts-tools',
 57    'github.com:anthropics/ts-capsules',
 58    'github.com/anthropics/ts-capsules',
 59    'github.com:anthropics/feldspar-testing',
 60    'github.com/anthropics/feldspar-testing',
 61    'github.com:anthropics/trellis',
 62    'github.com/anthropics/trellis',
 63    'github.com:anthropics/claude-for-hiring',
 64    'github.com/anthropics/claude-for-hiring',
 65    'github.com:anthropics/forge-web',
 66    'github.com/anthropics/forge-web',
 67    'github.com:anthropics/infra-manifests',
 68    'github.com/anthropics/infra-manifests',
 69    'github.com:anthropics/mycro_manifests',
 70    'github.com/anthropics/mycro_manifests',
 71    'github.com:anthropics/mycro_configs',
 72    'github.com/anthropics/mycro_configs',
 73    'github.com:anthropics/mobile-apps',
 74    'github.com/anthropics/mobile-apps',
 75  ]
 76  
 77  /**
 78   * Get the repo root for attribution operations.
 79   * Uses getCwd() which respects agent worktree overrides (AsyncLocalStorage),
 80   * then resolves to git root to handle `cd subdir` case.
 81   * Falls back to getOriginalCwd() if git root can't be determined.
 82   */
 83  export function getAttributionRepoRoot(): string {
 84    const cwd = getCwd()
 85    return findGitRoot(cwd) ?? getOriginalCwd()
 86  }
 87  
 88  // Cache for repo classification result. Primed once per process.
 89  // 'internal' = remote matches INTERNAL_MODEL_REPOS allowlist
 90  // 'external' = has a remote, not on allowlist (public/open-source repo)
 91  // 'none'     = no remote URL (not a git repo, or no remote configured)
 92  let repoClassCache: 'internal' | 'external' | 'none' | null = null
 93  
 94  /**
 95   * Synchronously return the cached repo classification.
 96   * Returns null if the async check hasn't run yet.
 97   */
 98  export function getRepoClassCached(): 'internal' | 'external' | 'none' | null {
 99    return repoClassCache
100  }
101  
102  /**
103   * Synchronously return the cached result of isInternalModelRepo().
104   * Returns false if the check hasn't run yet (safe default: don't leak).
105   */
106  export function isInternalModelRepoCached(): boolean {
107    return repoClassCache === 'internal'
108  }
109  
110  /**
111   * Check if the current repo is in the allowlist for internal model names.
112   * Memoized - only checks once per process.
113   */
114  export const isInternalModelRepo = sequential(async (): Promise<boolean> => {
115    if (repoClassCache !== null) {
116      return repoClassCache === 'internal'
117    }
118  
119    const cwd = getAttributionRepoRoot()
120    const remoteUrl = await getRemoteUrlForDir(cwd)
121  
122    if (!remoteUrl) {
123      repoClassCache = 'none'
124      return false
125    }
126    const isInternal = INTERNAL_MODEL_REPOS.some(repo => remoteUrl.includes(repo))
127    repoClassCache = isInternal ? 'internal' : 'external'
128    return isInternal
129  })
130  
131  /**
132   * Sanitize a surface key to use public model names.
133   * Converts internal model variants to their public equivalents.
134   */
135  export function sanitizeSurfaceKey(surfaceKey: string): string {
136    // Split surface key into surface and model parts (e.g., "cli/opus-4-5-fast" -> ["cli", "opus-4-5-fast"])
137    const slashIndex = surfaceKey.lastIndexOf('/')
138    if (slashIndex === -1) {
139      return surfaceKey
140    }
141  
142    const surface = surfaceKey.slice(0, slashIndex)
143    const model = surfaceKey.slice(slashIndex + 1)
144    const sanitizedModel = sanitizeModelName(model)
145  
146    return `${surface}/${sanitizedModel}`
147  }
148  
149  // @[MODEL LAUNCH]: Add a mapping for the new model ID so git commit trailers show the public name.
150  /**
151   * Sanitize a model name to its public equivalent.
152   * Maps internal variants to their public names based on model family.
153   */
154  export function sanitizeModelName(shortName: string): string {
155    // Map internal variants to public equivalents based on model family
156    if (shortName.includes('opus-4-6')) return 'claude-opus-4-6'
157    if (shortName.includes('opus-4-5')) return 'claude-opus-4-5'
158    if (shortName.includes('opus-4-1')) return 'claude-opus-4-1'
159    if (shortName.includes('opus-4')) return 'claude-opus-4'
160    if (shortName.includes('sonnet-4-6')) return 'claude-sonnet-4-6'
161    if (shortName.includes('sonnet-4-5')) return 'claude-sonnet-4-5'
162    if (shortName.includes('sonnet-4')) return 'claude-sonnet-4'
163    if (shortName.includes('sonnet-3-7')) return 'claude-sonnet-3-7'
164    if (shortName.includes('haiku-4-5')) return 'claude-haiku-4-5'
165    if (shortName.includes('haiku-3-5')) return 'claude-haiku-3-5'
166    // Unknown models get a generic name
167    return 'claude'
168  }
169  
170  /**
171   * Attribution state for tracking Claude's contributions to files.
172   */
173  export type AttributionState = {
174    // File states keyed by relative path (from cwd)
175    fileStates: Map<string, FileAttributionState>
176    // Session baseline states for net change calculation
177    sessionBaselines: Map<string, { contentHash: string; mtime: number }>
178    // Surface from which edits were made
179    surface: string
180    // HEAD SHA at session start (for detecting external commits)
181    startingHeadSha: string | null
182    // Total prompts in session (for steer count calculation)
183    promptCount: number
184    // Prompts at last commit (to calculate steers for current commit)
185    promptCountAtLastCommit: number
186    // Permission prompt tracking
187    permissionPromptCount: number
188    permissionPromptCountAtLastCommit: number
189    // ESC press tracking (user cancelled permission prompt)
190    escapeCount: number
191    escapeCountAtLastCommit: number
192  }
193  
194  /**
195   * Summary of Claude's contribution for a commit.
196   */
197  export type AttributionSummary = {
198    claudePercent: number
199    claudeChars: number
200    humanChars: number
201    surfaces: string[]
202  }
203  
204  /**
205   * Per-file attribution details for git notes.
206   */
207  export type FileAttribution = {
208    claudeChars: number
209    humanChars: number
210    percent: number
211    surface: string
212  }
213  
214  /**
215   * Full attribution data for git notes JSON.
216   */
217  export type AttributionData = {
218    version: 1
219    summary: AttributionSummary
220    files: Record<string, FileAttribution>
221    surfaceBreakdown: Record<string, { claudeChars: number; percent: number }>
222    excludedGenerated: string[]
223    sessions: string[]
224  }
225  
226  /**
227   * Get the current client surface from environment.
228   */
229  export function getClientSurface(): string {
230    return process.env.CLAUDE_CODE_ENTRYPOINT ?? 'cli'
231  }
232  
233  /**
234   * Build a surface key that includes the model name.
235   * Format: "surface/model" (e.g., "cli/claude-sonnet")
236   */
237  export function buildSurfaceKey(surface: string, model: ModelName): string {
238    return `${surface}/${getCanonicalName(model)}`
239  }
240  
241  /**
242   * Compute SHA-256 hash of content.
243   */
244  export function computeContentHash(content: string): string {
245    return createHash('sha256').update(content).digest('hex')
246  }
247  
248  /**
249   * Normalize file path to relative path from cwd for consistent tracking.
250   * Resolves symlinks to handle /tmp vs /private/tmp on macOS.
251   */
252  export function normalizeFilePath(filePath: string): string {
253    const fs = getFsImplementation()
254    const cwd = getAttributionRepoRoot()
255  
256    if (!isAbsolute(filePath)) {
257      return filePath
258    }
259  
260    // Resolve symlinks in both paths for consistent comparison
261    // (e.g., /tmp -> /private/tmp on macOS)
262    let resolvedPath = filePath
263    let resolvedCwd = cwd
264  
265    try {
266      resolvedPath = fs.realpathSync(filePath)
267    } catch {
268      // File may not exist yet, use original path
269    }
270  
271    try {
272      resolvedCwd = fs.realpathSync(cwd)
273    } catch {
274      // Keep original cwd
275    }
276  
277    if (
278      resolvedPath.startsWith(resolvedCwd + sep) ||
279      resolvedPath === resolvedCwd
280    ) {
281      // Normalize to forward slashes so keys match git diff output on Windows
282      return relative(resolvedCwd, resolvedPath).replaceAll(sep, '/')
283    }
284  
285    // Fallback: try original comparison
286    if (filePath.startsWith(cwd + sep) || filePath === cwd) {
287      return relative(cwd, filePath).replaceAll(sep, '/')
288    }
289  
290    return filePath
291  }
292  
293  /**
294   * Expand a relative path to absolute path.
295   */
296  export function expandFilePath(filePath: string): string {
297    if (isAbsolute(filePath)) {
298      return filePath
299    }
300    return join(getAttributionRepoRoot(), filePath)
301  }
302  
303  /**
304   * Create an empty attribution state for a new session.
305   */
306  export function createEmptyAttributionState(): AttributionState {
307    return {
308      fileStates: new Map(),
309      sessionBaselines: new Map(),
310      surface: getClientSurface(),
311      startingHeadSha: null,
312      promptCount: 0,
313      promptCountAtLastCommit: 0,
314      permissionPromptCount: 0,
315      permissionPromptCountAtLastCommit: 0,
316      escapeCount: 0,
317      escapeCountAtLastCommit: 0,
318    }
319  }
320  
321  /**
322   * Compute the character contribution for a file modification.
323   * Returns the FileAttributionState to store, or null if tracking failed.
324   */
325  function computeFileModificationState(
326    existingFileStates: Map<string, FileAttributionState>,
327    filePath: string,
328    oldContent: string,
329    newContent: string,
330    mtime: number,
331  ): FileAttributionState | null {
332    const normalizedPath = normalizeFilePath(filePath)
333  
334    try {
335      // Calculate Claude's character contribution
336      let claudeContribution: number
337  
338      if (oldContent === '' || newContent === '') {
339        // New file or full deletion - contribution is the content length
340        claudeContribution =
341          oldContent === '' ? newContent.length : oldContent.length
342      } else {
343        // Find actual changed region via common prefix/suffix matching.
344        // This correctly handles same-length replacements (e.g., "Esc" → "esc")
345        // where Math.abs(newLen - oldLen) would be 0.
346        const minLen = Math.min(oldContent.length, newContent.length)
347        let prefixEnd = 0
348        while (
349          prefixEnd < minLen &&
350          oldContent[prefixEnd] === newContent[prefixEnd]
351        ) {
352          prefixEnd++
353        }
354        let suffixLen = 0
355        while (
356          suffixLen < minLen - prefixEnd &&
357          oldContent[oldContent.length - 1 - suffixLen] ===
358            newContent[newContent.length - 1 - suffixLen]
359        ) {
360          suffixLen++
361        }
362        const oldChangedLen = oldContent.length - prefixEnd - suffixLen
363        const newChangedLen = newContent.length - prefixEnd - suffixLen
364        claudeContribution = Math.max(oldChangedLen, newChangedLen)
365      }
366  
367      // Get current file state if it exists
368      const existingState = existingFileStates.get(normalizedPath)
369      const existingContribution = existingState?.claudeContribution ?? 0
370  
371      return {
372        contentHash: computeContentHash(newContent),
373        claudeContribution: existingContribution + claudeContribution,
374        mtime,
375      }
376    } catch (error) {
377      logError(error as Error)
378      return null
379    }
380  }
381  
382  /**
383   * Get a file's modification time (mtimeMs), falling back to Date.now() if
384   * the file doesn't exist. This is async so it can be precomputed before
385   * entering a sync setAppState callback.
386   */
387  export async function getFileMtime(filePath: string): Promise<number> {
388    const normalizedPath = normalizeFilePath(filePath)
389    const absPath = expandFilePath(normalizedPath)
390    try {
391      const stats = await stat(absPath)
392      return stats.mtimeMs
393    } catch {
394      return Date.now()
395    }
396  }
397  
398  /**
399   * Track a file modification by Claude.
400   * Called after Edit/Write tool completes.
401   */
402  export function trackFileModification(
403    state: AttributionState,
404    filePath: string,
405    oldContent: string,
406    newContent: string,
407    _userModified: boolean,
408    mtime: number = Date.now(),
409  ): AttributionState {
410    const normalizedPath = normalizeFilePath(filePath)
411    const newFileState = computeFileModificationState(
412      state.fileStates,
413      filePath,
414      oldContent,
415      newContent,
416      mtime,
417    )
418    if (!newFileState) {
419      return state
420    }
421  
422    const newFileStates = new Map(state.fileStates)
423    newFileStates.set(normalizedPath, newFileState)
424  
425    logForDebugging(
426      `Attribution: Tracked ${newFileState.claudeContribution} chars for ${normalizedPath}`,
427    )
428  
429    return {
430      ...state,
431      fileStates: newFileStates,
432    }
433  }
434  
435  /**
436   * Track a file creation by Claude (e.g., via bash command).
437   * Used when Claude creates a new file through a non-tracked mechanism.
438   */
439  export function trackFileCreation(
440    state: AttributionState,
441    filePath: string,
442    content: string,
443    mtime: number = Date.now(),
444  ): AttributionState {
445    // A creation is simply a modification from empty to the new content
446    return trackFileModification(state, filePath, '', content, false, mtime)
447  }
448  
449  /**
450   * Track a file deletion by Claude (e.g., via bash rm command).
451   * Used when Claude deletes a file through a non-tracked mechanism.
452   */
453  export function trackFileDeletion(
454    state: AttributionState,
455    filePath: string,
456    oldContent: string,
457  ): AttributionState {
458    const normalizedPath = normalizeFilePath(filePath)
459    const existingState = state.fileStates.get(normalizedPath)
460    const existingContribution = existingState?.claudeContribution ?? 0
461    const deletedChars = oldContent.length
462  
463    const newFileState: FileAttributionState = {
464      contentHash: '', // Empty hash for deleted files
465      claudeContribution: existingContribution + deletedChars,
466      mtime: Date.now(),
467    }
468  
469    const newFileStates = new Map(state.fileStates)
470    newFileStates.set(normalizedPath, newFileState)
471  
472    logForDebugging(
473      `Attribution: Tracked deletion of ${normalizedPath} (${deletedChars} chars removed, total contribution: ${newFileState.claudeContribution})`,
474    )
475  
476    return {
477      ...state,
478      fileStates: newFileStates,
479    }
480  }
481  
482  // --
483  
484  /**
485   * Track multiple file changes in bulk, mutating a single Map copy.
486   * This avoids the O(n²) cost of copying the Map per file when processing
487   * large git diffs (e.g., jj operations that touch hundreds of thousands of files).
488   */
489  export function trackBulkFileChanges(
490    state: AttributionState,
491    changes: ReadonlyArray<{
492      path: string
493      type: 'modified' | 'created' | 'deleted'
494      oldContent: string
495      newContent: string
496      mtime?: number
497    }>,
498  ): AttributionState {
499    // Create ONE copy of the Map, then mutate it for each file
500    const newFileStates = new Map(state.fileStates)
501  
502    for (const change of changes) {
503      const mtime = change.mtime ?? Date.now()
504      if (change.type === 'deleted') {
505        const normalizedPath = normalizeFilePath(change.path)
506        const existingState = newFileStates.get(normalizedPath)
507        const existingContribution = existingState?.claudeContribution ?? 0
508        const deletedChars = change.oldContent.length
509  
510        newFileStates.set(normalizedPath, {
511          contentHash: '',
512          claudeContribution: existingContribution + deletedChars,
513          mtime,
514        })
515  
516        logForDebugging(
517          `Attribution: Tracked deletion of ${normalizedPath} (${deletedChars} chars removed, total contribution: ${existingContribution + deletedChars})`,
518        )
519      } else {
520        const newFileState = computeFileModificationState(
521          newFileStates,
522          change.path,
523          change.oldContent,
524          change.newContent,
525          mtime,
526        )
527        if (newFileState) {
528          const normalizedPath = normalizeFilePath(change.path)
529          newFileStates.set(normalizedPath, newFileState)
530  
531          logForDebugging(
532            `Attribution: Tracked ${newFileState.claudeContribution} chars for ${normalizedPath}`,
533          )
534        }
535      }
536    }
537  
538    return {
539      ...state,
540      fileStates: newFileStates,
541    }
542  }
543  
544  /**
545   * Calculate final attribution for staged files.
546   * Compares session baseline to committed state.
547   */
548  export async function calculateCommitAttribution(
549    states: AttributionState[],
550    stagedFiles: string[],
551  ): Promise<AttributionData> {
552    const cwd = getAttributionRepoRoot()
553    const sessionId = getSessionId()
554  
555    const files: Record<string, FileAttribution> = {}
556    const excludedGenerated: string[] = []
557    const surfaces = new Set<string>()
558    const surfaceCounts: Record<string, number> = {}
559  
560    let totalClaudeChars = 0
561    let totalHumanChars = 0
562  
563    // Merge file states from all sessions
564    const mergedFileStates = new Map<string, FileAttributionState>()
565    const mergedBaselines = new Map<
566      string,
567      { contentHash: string; mtime: number }
568    >()
569  
570    for (const state of states) {
571      surfaces.add(state.surface)
572  
573      // Merge baselines (earliest baseline wins)
574      // Handle both Map and plain object (in case of serialization)
575      const baselines =
576        state.sessionBaselines instanceof Map
577          ? state.sessionBaselines
578          : new Map(
579              Object.entries(
580                (state.sessionBaselines ?? {}) as Record<
581                  string,
582                  { contentHash: string; mtime: number }
583                >,
584              ),
585            )
586      for (const [path, baseline] of baselines) {
587        if (!mergedBaselines.has(path)) {
588          mergedBaselines.set(path, baseline)
589        }
590      }
591  
592      // Merge file states (accumulate contributions)
593      // Handle both Map and plain object (in case of serialization)
594      const fileStates =
595        state.fileStates instanceof Map
596          ? state.fileStates
597          : new Map(
598              Object.entries(
599                (state.fileStates ?? {}) as Record<string, FileAttributionState>,
600              ),
601            )
602      for (const [path, fileState] of fileStates) {
603        const existing = mergedFileStates.get(path)
604        if (existing) {
605          mergedFileStates.set(path, {
606            ...fileState,
607            claudeContribution:
608              existing.claudeContribution + fileState.claudeContribution,
609          })
610        } else {
611          mergedFileStates.set(path, fileState)
612        }
613      }
614    }
615  
616    // Process files in parallel
617    const fileResults = await Promise.all(
618      stagedFiles.map(async file => {
619        // Skip generated files
620        if (isGeneratedFile(file)) {
621          return { type: 'generated' as const, file }
622        }
623  
624        const absPath = join(cwd, file)
625        const fileState = mergedFileStates.get(file)
626        const baseline = mergedBaselines.get(file)
627  
628        // Get the surface for this file
629        const fileSurface = states[0]!.surface
630  
631        let claudeChars = 0
632        let humanChars = 0
633  
634        // Check if file was deleted
635        const deleted = await isFileDeleted(file)
636  
637        if (deleted) {
638          // File was deleted
639          if (fileState) {
640            // Claude deleted this file (tracked deletion)
641            claudeChars = fileState.claudeContribution
642            humanChars = 0
643          } else {
644            // Human deleted this file (untracked deletion)
645            // Use diff size to get the actual change size
646            const diffSize = await getGitDiffSize(file)
647            humanChars = diffSize > 0 ? diffSize : 100 // Minimum attribution for a deletion
648          }
649        } else {
650          try {
651            // Only need file size, not content - stat() avoids loading GB-scale
652            // build artifacts into memory when they appear in the working tree.
653            // stats.size (bytes) is an adequate proxy for char count here.
654            const stats = await stat(absPath)
655  
656            if (fileState) {
657              // We have tracked modifications for this file
658              claudeChars = fileState.claudeContribution
659              humanChars = 0
660            } else if (baseline) {
661              // File was modified but not tracked - human modification
662              const diffSize = await getGitDiffSize(file)
663              humanChars = diffSize > 0 ? diffSize : stats.size
664            } else {
665              // New file not created by Claude
666              humanChars = stats.size
667            }
668          } catch {
669            // File doesn't exist or stat failed - skip it
670            return null
671          }
672        }
673  
674        // Ensure non-negative values
675        claudeChars = Math.max(0, claudeChars)
676        humanChars = Math.max(0, humanChars)
677  
678        const total = claudeChars + humanChars
679        const percent = total > 0 ? Math.round((claudeChars / total) * 100) : 0
680  
681        return {
682          type: 'file' as const,
683          file,
684          claudeChars,
685          humanChars,
686          percent,
687          surface: fileSurface,
688        }
689      }),
690    )
691  
692    // Aggregate results
693    for (const result of fileResults) {
694      if (!result) continue
695  
696      if (result.type === 'generated') {
697        excludedGenerated.push(result.file)
698        continue
699      }
700  
701      files[result.file] = {
702        claudeChars: result.claudeChars,
703        humanChars: result.humanChars,
704        percent: result.percent,
705        surface: result.surface,
706      }
707  
708      totalClaudeChars += result.claudeChars
709      totalHumanChars += result.humanChars
710  
711      surfaceCounts[result.surface] =
712        (surfaceCounts[result.surface] ?? 0) + result.claudeChars
713    }
714  
715    const totalChars = totalClaudeChars + totalHumanChars
716    const claudePercent =
717      totalChars > 0 ? Math.round((totalClaudeChars / totalChars) * 100) : 0
718  
719    // Calculate surface breakdown (percentage of total content per surface)
720    const surfaceBreakdown: Record<
721      string,
722      { claudeChars: number; percent: number }
723    > = {}
724    for (const [surface, chars] of Object.entries(surfaceCounts)) {
725      // Calculate what percentage of TOTAL content this surface contributed
726      const percent = totalChars > 0 ? Math.round((chars / totalChars) * 100) : 0
727      surfaceBreakdown[surface] = { claudeChars: chars, percent }
728    }
729  
730    return {
731      version: 1,
732      summary: {
733        claudePercent,
734        claudeChars: totalClaudeChars,
735        humanChars: totalHumanChars,
736        surfaces: Array.from(surfaces),
737      },
738      files,
739      surfaceBreakdown,
740      excludedGenerated,
741      sessions: [sessionId],
742    }
743  }
744  
745  /**
746   * Get the size of changes for a file from git diff.
747   * Returns the number of characters added/removed (absolute difference).
748   * For new files, returns the total file size.
749   * For deleted files, returns the size of the deleted content.
750   */
751  export async function getGitDiffSize(filePath: string): Promise<number> {
752    const cwd = getAttributionRepoRoot()
753  
754    try {
755      // Use git diff --stat to get a summary of changes
756      const result = await execFileNoThrowWithCwd(
757        gitExe(),
758        ['diff', '--cached', '--stat', '--', filePath],
759        { cwd, timeout: 5000 },
760      )
761  
762      if (result.code !== 0 || !result.stdout) {
763        return 0
764      }
765  
766      // Parse the stat output to extract additions and deletions
767      // Format: " file | 5 ++---" or " file | 10 +"
768      const lines = result.stdout.split('\n').filter(Boolean)
769      let totalChanges = 0
770  
771      for (const line of lines) {
772        // Skip the summary line (e.g., "1 file changed, 3 insertions(+), 2 deletions(-)")
773        if (line.includes('file changed') || line.includes('files changed')) {
774          const insertMatch = line.match(/(\d+) insertions?/)
775          const deleteMatch = line.match(/(\d+) deletions?/)
776  
777          // Use line-based changes and approximate chars per line (~40 chars average)
778          const insertions = insertMatch ? parseInt(insertMatch[1]!, 10) : 0
779          const deletions = deleteMatch ? parseInt(deleteMatch[1]!, 10) : 0
780          totalChanges += (insertions + deletions) * 40
781        }
782      }
783  
784      return totalChanges
785    } catch {
786      return 0
787    }
788  }
789  
790  /**
791   * Check if a file was deleted in the staged changes.
792   */
793  export async function isFileDeleted(filePath: string): Promise<boolean> {
794    const cwd = getAttributionRepoRoot()
795  
796    try {
797      const result = await execFileNoThrowWithCwd(
798        gitExe(),
799        ['diff', '--cached', '--name-status', '--', filePath],
800        { cwd, timeout: 5000 },
801      )
802  
803      if (result.code === 0 && result.stdout) {
804        // Format: "D\tfilename" for deleted files
805        return result.stdout.trim().startsWith('D\t')
806      }
807    } catch {
808      // Ignore errors
809    }
810  
811    return false
812  }
813  
814  /**
815   * Get staged files from git.
816   */
817  export async function getStagedFiles(): Promise<string[]> {
818    const cwd = getAttributionRepoRoot()
819  
820    try {
821      const result = await execFileNoThrowWithCwd(
822        gitExe(),
823        ['diff', '--cached', '--name-only'],
824        { cwd, timeout: 5000 },
825      )
826  
827      if (result.code === 0 && result.stdout) {
828        return result.stdout.split('\n').filter(Boolean)
829      }
830    } catch (error) {
831      logError(error as Error)
832    }
833  
834    return []
835  }
836  
837  // formatAttributionTrailer moved to attributionTrailer.ts for tree-shaking
838  // (contains excluded strings that should not be in external builds)
839  
840  /**
841   * Check if we're in a transient git state (rebase, merge, cherry-pick).
842   */
843  export async function isGitTransientState(): Promise<boolean> {
844    const gitDir = await resolveGitDir(getAttributionRepoRoot())
845    if (!gitDir) return false
846  
847    const indicators = [
848      'rebase-merge',
849      'rebase-apply',
850      'MERGE_HEAD',
851      'CHERRY_PICK_HEAD',
852      'BISECT_LOG',
853    ]
854  
855    const results = await Promise.all(
856      indicators.map(async indicator => {
857        try {
858          await stat(join(gitDir, indicator))
859          return true
860        } catch {
861          return false
862        }
863      }),
864    )
865  
866    return results.some(exists => exists)
867  }
868  
869  /**
870   * Convert attribution state to snapshot message for persistence.
871   */
872  export function stateToSnapshotMessage(
873    state: AttributionState,
874    messageId: UUID,
875  ): AttributionSnapshotMessage {
876    const fileStates: Record<string, FileAttributionState> = {}
877  
878    for (const [path, fileState] of state.fileStates) {
879      fileStates[path] = fileState
880    }
881  
882    return {
883      type: 'attribution-snapshot',
884      messageId,
885      surface: state.surface,
886      fileStates,
887      promptCount: state.promptCount,
888      promptCountAtLastCommit: state.promptCountAtLastCommit,
889      permissionPromptCount: state.permissionPromptCount,
890      permissionPromptCountAtLastCommit: state.permissionPromptCountAtLastCommit,
891      escapeCount: state.escapeCount,
892      escapeCountAtLastCommit: state.escapeCountAtLastCommit,
893    }
894  }
895  
896  /**
897   * Restore attribution state from snapshot messages.
898   */
899  export function restoreAttributionStateFromSnapshots(
900    snapshots: AttributionSnapshotMessage[],
901  ): AttributionState {
902    const state = createEmptyAttributionState()
903  
904    // Snapshots are full-state dumps (see stateToSnapshotMessage), not deltas.
905    // The last snapshot has the most recent count for every path — fileStates
906    // never shrinks. Iterating and SUMMING counts across snapshots causes
907    // quadratic growth on restore (837 snapshots × 280 files → 1.15 quadrillion
908    // "chars" tracked for a 5KB file over a 5-day session).
909    const lastSnapshot = snapshots[snapshots.length - 1]
910    if (!lastSnapshot) {
911      return state
912    }
913  
914    state.surface = lastSnapshot.surface
915    for (const [path, fileState] of Object.entries(lastSnapshot.fileStates)) {
916      state.fileStates.set(path, fileState)
917    }
918  
919    // Restore prompt counts from the last snapshot (most recent state)
920    state.promptCount = lastSnapshot.promptCount ?? 0
921    state.promptCountAtLastCommit = lastSnapshot.promptCountAtLastCommit ?? 0
922    state.permissionPromptCount = lastSnapshot.permissionPromptCount ?? 0
923    state.permissionPromptCountAtLastCommit =
924      lastSnapshot.permissionPromptCountAtLastCommit ?? 0
925    state.escapeCount = lastSnapshot.escapeCount ?? 0
926    state.escapeCountAtLastCommit = lastSnapshot.escapeCountAtLastCommit ?? 0
927  
928    return state
929  }
930  
931  /**
932   * Restore attribution state from log snapshots on session resume.
933   */
934  export function attributionRestoreStateFromLog(
935    attributionSnapshots: AttributionSnapshotMessage[],
936    onUpdateState: (newState: AttributionState) => void,
937  ): void {
938    const state = restoreAttributionStateFromSnapshots(attributionSnapshots)
939    onUpdateState(state)
940  }
941  
942  /**
943   * Increment promptCount and save an attribution snapshot.
944   * Used to persist the prompt count across compaction.
945   *
946   * @param attribution - Current attribution state
947   * @param saveSnapshot - Function to save the snapshot (allows async handling by caller)
948   * @returns New attribution state with incremented promptCount
949   */
950  export function incrementPromptCount(
951    attribution: AttributionState,
952    saveSnapshot: (snapshot: AttributionSnapshotMessage) => void,
953  ): AttributionState {
954    const newAttribution = {
955      ...attribution,
956      promptCount: attribution.promptCount + 1,
957    }
958    const snapshot = stateToSnapshotMessage(newAttribution, randomUUID())
959    saveSnapshot(snapshot)
960    return newAttribution
961  }