/ services / api / promptCacheBreakDetection.ts
promptCacheBreakDetection.ts
  1  import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
  2  import type { TextBlockParam } from '@anthropic-ai/sdk/resources/index.mjs'
  3  import { createPatch } from 'diff'
  4  import { mkdir, writeFile } from 'fs/promises'
  5  import { join } from 'path'
  6  import type { AgentId } from 'src/types/ids.js'
  7  import type { Message } from 'src/types/message.js'
  8  import { logForDebugging } from 'src/utils/debug.js'
  9  import { djb2Hash } from 'src/utils/hash.js'
 10  import { logError } from 'src/utils/log.js'
 11  import { getClaudeTempDir } from 'src/utils/permissions/filesystem.js'
 12  import { jsonStringify } from 'src/utils/slowOperations.js'
 13  import type { QuerySource } from '../../constants/querySource.js'
 14  import {
 15    type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 16    logEvent,
 17  } from '../analytics/index.js'
 18  
 19  function getCacheBreakDiffPath(): string {
 20    const chars = 'abcdefghijklmnopqrstuvwxyz0123456789'
 21    let suffix = ''
 22    for (let i = 0; i < 4; i++) {
 23      suffix += chars[Math.floor(Math.random() * chars.length)]
 24    }
 25    return join(getClaudeTempDir(), `cache-break-${suffix}.diff`)
 26  }
 27  
 28  type PreviousState = {
 29    systemHash: number
 30    toolsHash: number
 31    /** Hash of system blocks WITH cache_control intact. Catches scope/TTL flips
 32     *  (global↔org, 1h↔5m) that stripCacheControl erases from systemHash. */
 33    cacheControlHash: number
 34    toolNames: string[]
 35    /** Per-tool schema hash. Diffed to name which tool's description changed
 36     *  when toolSchemasChanged but added=removed=0 (77% of tool breaks per
 37     *  BQ 2026-03-22). AgentTool/SkillTool embed dynamic agent/command lists. */
 38    perToolHashes: Record<string, number>
 39    systemCharCount: number
 40    model: string
 41    fastMode: boolean
 42    /** 'tool_based' | 'system_prompt' | 'none' — flips when MCP tools are
 43     *  discovered/removed. */
 44    globalCacheStrategy: string
 45    /** Sorted beta header list. Diffed to show which headers were added/removed. */
 46    betas: string[]
 47    /** AFK_MODE_BETA_HEADER presence — should NOT break cache anymore
 48     *  (sticky-on latched in claude.ts). Tracked to verify the fix. */
 49    autoModeActive: boolean
 50    /** Overage state flip — should NOT break cache anymore (eligibility is
 51     *  latched session-stable in should1hCacheTTL). Tracked to verify the fix. */
 52    isUsingOverage: boolean
 53    /** Cache-editing beta header presence — should NOT break cache anymore
 54     *  (sticky-on latched in claude.ts). Tracked to verify the fix. */
 55    cachedMCEnabled: boolean
 56    /** Resolved effort (env → options → model default). Goes into output_config
 57     *  or anthropic_internal.effort_override. */
 58    effortValue: string
 59    /** Hash of getExtraBodyParams() — catches CLAUDE_CODE_EXTRA_BODY and
 60     *  anthropic_internal changes. */
 61    extraBodyHash: number
 62    callCount: number
 63    pendingChanges: PendingChanges | null
 64    prevCacheReadTokens: number | null
 65    /** Set when cached microcompact sends cache_edits deletions. Cache reads
 66     *  will legitimately drop — this is expected, not a break. */
 67    cacheDeletionsPending: boolean
 68    buildDiffableContent: () => string
 69  }
 70  
 71  type PendingChanges = {
 72    systemPromptChanged: boolean
 73    toolSchemasChanged: boolean
 74    modelChanged: boolean
 75    fastModeChanged: boolean
 76    cacheControlChanged: boolean
 77    globalCacheStrategyChanged: boolean
 78    betasChanged: boolean
 79    autoModeChanged: boolean
 80    overageChanged: boolean
 81    cachedMCChanged: boolean
 82    effortChanged: boolean
 83    extraBodyChanged: boolean
 84    addedToolCount: number
 85    removedToolCount: number
 86    systemCharDelta: number
 87    addedTools: string[]
 88    removedTools: string[]
 89    changedToolSchemas: string[]
 90    previousModel: string
 91    newModel: string
 92    prevGlobalCacheStrategy: string
 93    newGlobalCacheStrategy: string
 94    addedBetas: string[]
 95    removedBetas: string[]
 96    prevEffortValue: string
 97    newEffortValue: string
 98    buildPrevDiffableContent: () => string
 99  }
100  
101  const previousStateBySource = new Map<string, PreviousState>()
102  
103  // Cap the number of tracked sources to prevent unbounded memory growth.
104  // Each entry stores a ~300KB+ diffableContent string (serialized system prompt
105  // + tool schemas). Without a cap, spawning many subagents (each with a unique
106  // agentId key) causes the map to grow indefinitely.
107  const MAX_TRACKED_SOURCES = 10
108  
109  const TRACKED_SOURCE_PREFIXES = [
110    'repl_main_thread',
111    'sdk',
112    'agent:custom',
113    'agent:default',
114    'agent:builtin',
115  ]
116  
117  // Minimum absolute token drop required to trigger a cache break warning.
118  // Small drops (e.g., a few thousand tokens) can happen due to normal variation
119  // and aren't worth alerting on.
120  const MIN_CACHE_MISS_TOKENS = 2_000
121  
122  // Anthropic's server-side prompt cache TTL thresholds to test.
123  // Cache breaks after these durations are likely due to TTL expiration
124  // rather than client-side changes.
125  const CACHE_TTL_5MIN_MS = 5 * 60 * 1000
126  export const CACHE_TTL_1HOUR_MS = 60 * 60 * 1000
127  
128  // Models to exclude from cache break detection (e.g., haiku has different caching behavior)
129  function isExcludedModel(model: string): boolean {
130    return model.includes('haiku')
131  }
132  
133  /**
134   * Returns the tracking key for a querySource, or null if untracked.
135   * Compact shares the same server-side cache as repl_main_thread
136   * (same cacheSafeParams), so they share tracking state.
137   *
138   * For subagents with a tracked querySource, uses the unique agentId to
139   * isolate tracking state. This prevents false positive cache break
140   * notifications when multiple instances of the same agent type run
141   * concurrently.
142   *
143   * Untracked sources (speculation, session_memory, prompt_suggestion, etc.)
144   * are short-lived forked agents where cache break detection provides no
145   * value — they run 1-3 turns with a fresh agentId each time, so there's
146   * nothing meaningful to compare against. Their cache metrics are still
147   * logged via tengu_api_success for analytics.
148   */
149  function getTrackingKey(
150    querySource: QuerySource,
151    agentId?: AgentId,
152  ): string | null {
153    if (querySource === 'compact') return 'repl_main_thread'
154    for (const prefix of TRACKED_SOURCE_PREFIXES) {
155      if (querySource.startsWith(prefix)) return agentId || querySource
156    }
157    return null
158  }
159  
160  function stripCacheControl(
161    items: ReadonlyArray<Record<string, unknown>>,
162  ): unknown[] {
163    return items.map(item => {
164      if (!('cache_control' in item)) return item
165      const { cache_control: _, ...rest } = item
166      return rest
167    })
168  }
169  
170  function computeHash(data: unknown): number {
171    const str = jsonStringify(data)
172    if (typeof Bun !== 'undefined') {
173      const hash = Bun.hash(str)
174      // Bun.hash can return bigint for large inputs; convert to number safely
175      return typeof hash === 'bigint' ? Number(hash & 0xffffffffn) : hash
176    }
177    // Fallback for non-Bun runtimes (e.g. Node.js via npm global install)
178    return djb2Hash(str)
179  }
180  
181  /** MCP tool names are user-controlled (server config) and may leak filepaths.
182   *  Collapse them to 'mcp'; built-in names are a fixed vocabulary. */
183  function sanitizeToolName(name: string): string {
184    return name.startsWith('mcp__') ? 'mcp' : name
185  }
186  
187  function computePerToolHashes(
188    strippedTools: ReadonlyArray<unknown>,
189    names: string[],
190  ): Record<string, number> {
191    const hashes: Record<string, number> = {}
192    for (let i = 0; i < strippedTools.length; i++) {
193      hashes[names[i] ?? `__idx_${i}`] = computeHash(strippedTools[i])
194    }
195    return hashes
196  }
197  
198  function getSystemCharCount(system: TextBlockParam[]): number {
199    let total = 0
200    for (const block of system) {
201      total += block.text.length
202    }
203    return total
204  }
205  
206  function buildDiffableContent(
207    system: TextBlockParam[],
208    tools: BetaToolUnion[],
209    model: string,
210  ): string {
211    const systemText = system.map(b => b.text).join('\n\n')
212    const toolDetails = tools
213      .map(t => {
214        if (!('name' in t)) return 'unknown'
215        const desc = 'description' in t ? t.description : ''
216        const schema = 'input_schema' in t ? jsonStringify(t.input_schema) : ''
217        return `${t.name}\n  description: ${desc}\n  input_schema: ${schema}`
218      })
219      .sort()
220      .join('\n\n')
221    return `Model: ${model}\n\n=== System Prompt ===\n\n${systemText}\n\n=== Tools (${tools.length}) ===\n\n${toolDetails}\n`
222  }
223  
224  /** Extended tracking snapshot — everything that could affect the server-side
225   *  cache key that we can observe from the client. All fields are optional so
226   *  the call site can add incrementally; undefined fields compare as stable. */
227  export type PromptStateSnapshot = {
228    system: TextBlockParam[]
229    toolSchemas: BetaToolUnion[]
230    querySource: QuerySource
231    model: string
232    agentId?: AgentId
233    fastMode?: boolean
234    globalCacheStrategy?: string
235    betas?: readonly string[]
236    autoModeActive?: boolean
237    isUsingOverage?: boolean
238    cachedMCEnabled?: boolean
239    effortValue?: string | number
240    extraBodyParams?: unknown
241  }
242  
243  /**
244   * Phase 1 (pre-call): Record the current prompt/tool state and detect what changed.
245   * Does NOT fire events — just stores pending changes for phase 2 to use.
246   */
247  export function recordPromptState(snapshot: PromptStateSnapshot): void {
248    try {
249      const {
250        system,
251        toolSchemas,
252        querySource,
253        model,
254        agentId,
255        fastMode,
256        globalCacheStrategy = '',
257        betas = [],
258        autoModeActive = false,
259        isUsingOverage = false,
260        cachedMCEnabled = false,
261        effortValue,
262        extraBodyParams,
263      } = snapshot
264      const key = getTrackingKey(querySource, agentId)
265      if (!key) return
266  
267      const strippedSystem = stripCacheControl(
268        system as unknown as ReadonlyArray<Record<string, unknown>>,
269      )
270      const strippedTools = stripCacheControl(
271        toolSchemas as unknown as ReadonlyArray<Record<string, unknown>>,
272      )
273  
274      const systemHash = computeHash(strippedSystem)
275      const toolsHash = computeHash(strippedTools)
276      // Hash the full system array INCLUDING cache_control — this catches
277      // scope flips (global↔org/none) and TTL flips (1h↔5m) that the stripped
278      // hash can't see because the text content is identical.
279      const cacheControlHash = computeHash(
280        system.map(b => ('cache_control' in b ? b.cache_control : null)),
281      )
282      const toolNames = toolSchemas.map(t => ('name' in t ? t.name : 'unknown'))
283      // Only compute per-tool hashes when the aggregate changed — common case
284      // (tools unchanged) skips N extra jsonStringify calls.
285      const computeToolHashes = () =>
286        computePerToolHashes(strippedTools, toolNames)
287      const systemCharCount = getSystemCharCount(system)
288      const lazyDiffableContent = () =>
289        buildDiffableContent(system, toolSchemas, model)
290      const isFastMode = fastMode ?? false
291      const sortedBetas = [...betas].sort()
292      const effortStr = effortValue === undefined ? '' : String(effortValue)
293      const extraBodyHash =
294        extraBodyParams === undefined ? 0 : computeHash(extraBodyParams)
295  
296      const prev = previousStateBySource.get(key)
297  
298      if (!prev) {
299        // Evict oldest entries if map is at capacity
300        while (previousStateBySource.size >= MAX_TRACKED_SOURCES) {
301          const oldest = previousStateBySource.keys().next().value
302          if (oldest !== undefined) previousStateBySource.delete(oldest)
303        }
304  
305        previousStateBySource.set(key, {
306          systemHash,
307          toolsHash,
308          cacheControlHash,
309          toolNames,
310          systemCharCount,
311          model,
312          fastMode: isFastMode,
313          globalCacheStrategy,
314          betas: sortedBetas,
315          autoModeActive,
316          isUsingOverage,
317          cachedMCEnabled,
318          effortValue: effortStr,
319          extraBodyHash,
320          callCount: 1,
321          pendingChanges: null,
322          prevCacheReadTokens: null,
323          cacheDeletionsPending: false,
324          buildDiffableContent: lazyDiffableContent,
325          perToolHashes: computeToolHashes(),
326        })
327        return
328      }
329  
330      prev.callCount++
331  
332      const systemPromptChanged = systemHash !== prev.systemHash
333      const toolSchemasChanged = toolsHash !== prev.toolsHash
334      const modelChanged = model !== prev.model
335      const fastModeChanged = isFastMode !== prev.fastMode
336      const cacheControlChanged = cacheControlHash !== prev.cacheControlHash
337      const globalCacheStrategyChanged =
338        globalCacheStrategy !== prev.globalCacheStrategy
339      const betasChanged =
340        sortedBetas.length !== prev.betas.length ||
341        sortedBetas.some((b, i) => b !== prev.betas[i])
342      const autoModeChanged = autoModeActive !== prev.autoModeActive
343      const overageChanged = isUsingOverage !== prev.isUsingOverage
344      const cachedMCChanged = cachedMCEnabled !== prev.cachedMCEnabled
345      const effortChanged = effortStr !== prev.effortValue
346      const extraBodyChanged = extraBodyHash !== prev.extraBodyHash
347  
348      if (
349        systemPromptChanged ||
350        toolSchemasChanged ||
351        modelChanged ||
352        fastModeChanged ||
353        cacheControlChanged ||
354        globalCacheStrategyChanged ||
355        betasChanged ||
356        autoModeChanged ||
357        overageChanged ||
358        cachedMCChanged ||
359        effortChanged ||
360        extraBodyChanged
361      ) {
362        const prevToolSet = new Set(prev.toolNames)
363        const newToolSet = new Set(toolNames)
364        const prevBetaSet = new Set(prev.betas)
365        const newBetaSet = new Set(sortedBetas)
366        const addedTools = toolNames.filter(n => !prevToolSet.has(n))
367        const removedTools = prev.toolNames.filter(n => !newToolSet.has(n))
368        const changedToolSchemas: string[] = []
369        if (toolSchemasChanged) {
370          const newHashes = computeToolHashes()
371          for (const name of toolNames) {
372            if (!prevToolSet.has(name)) continue
373            if (newHashes[name] !== prev.perToolHashes[name]) {
374              changedToolSchemas.push(name)
375            }
376          }
377          prev.perToolHashes = newHashes
378        }
379        prev.pendingChanges = {
380          systemPromptChanged,
381          toolSchemasChanged,
382          modelChanged,
383          fastModeChanged,
384          cacheControlChanged,
385          globalCacheStrategyChanged,
386          betasChanged,
387          autoModeChanged,
388          overageChanged,
389          cachedMCChanged,
390          effortChanged,
391          extraBodyChanged,
392          addedToolCount: addedTools.length,
393          removedToolCount: removedTools.length,
394          addedTools,
395          removedTools,
396          changedToolSchemas,
397          systemCharDelta: systemCharCount - prev.systemCharCount,
398          previousModel: prev.model,
399          newModel: model,
400          prevGlobalCacheStrategy: prev.globalCacheStrategy,
401          newGlobalCacheStrategy: globalCacheStrategy,
402          addedBetas: sortedBetas.filter(b => !prevBetaSet.has(b)),
403          removedBetas: prev.betas.filter(b => !newBetaSet.has(b)),
404          prevEffortValue: prev.effortValue,
405          newEffortValue: effortStr,
406          buildPrevDiffableContent: prev.buildDiffableContent,
407        }
408      } else {
409        prev.pendingChanges = null
410      }
411  
412      prev.systemHash = systemHash
413      prev.toolsHash = toolsHash
414      prev.cacheControlHash = cacheControlHash
415      prev.toolNames = toolNames
416      prev.systemCharCount = systemCharCount
417      prev.model = model
418      prev.fastMode = isFastMode
419      prev.globalCacheStrategy = globalCacheStrategy
420      prev.betas = sortedBetas
421      prev.autoModeActive = autoModeActive
422      prev.isUsingOverage = isUsingOverage
423      prev.cachedMCEnabled = cachedMCEnabled
424      prev.effortValue = effortStr
425      prev.extraBodyHash = extraBodyHash
426      prev.buildDiffableContent = lazyDiffableContent
427    } catch (e: unknown) {
428      logError(e)
429    }
430  }
431  
432  /**
433   * Phase 2 (post-call): Check the API response's cache tokens to determine
434   * if a cache break actually occurred. If it did, use the pending changes
435   * from phase 1 to explain why.
436   */
437  export async function checkResponseForCacheBreak(
438    querySource: QuerySource,
439    cacheReadTokens: number,
440    cacheCreationTokens: number,
441    messages: Message[],
442    agentId?: AgentId,
443    requestId?: string | null,
444  ): Promise<void> {
445    try {
446      const key = getTrackingKey(querySource, agentId)
447      if (!key) return
448  
449      const state = previousStateBySource.get(key)
450      if (!state) return
451  
452      // Skip excluded models (e.g., haiku has different caching behavior)
453      if (isExcludedModel(state.model)) return
454  
455      const prevCacheRead = state.prevCacheReadTokens
456      state.prevCacheReadTokens = cacheReadTokens
457  
458      // Calculate time since last call for TTL detection by finding the most recent
459      // assistant message timestamp in the messages array (before the current response)
460      const lastAssistantMessage = messages.findLast(m => m.type === 'assistant')
461      const timeSinceLastAssistantMsg = lastAssistantMessage
462        ? Date.now() - new Date(lastAssistantMessage.timestamp).getTime()
463        : null
464  
465      // Skip the first call — no previous value to compare against
466      if (prevCacheRead === null) return
467  
468      const changes = state.pendingChanges
469  
470      // Cache deletions via cached microcompact intentionally reduce the cached
471      // prefix. The drop in cache read tokens is expected — reset the baseline
472      // so we don't false-positive on the next call.
473      if (state.cacheDeletionsPending) {
474        state.cacheDeletionsPending = false
475        logForDebugging(
476          `[PROMPT CACHE] cache deletion applied, cache read: ${prevCacheRead} → ${cacheReadTokens} (expected drop)`,
477        )
478        // Don't flag as a break — the remaining state is still valid
479        state.pendingChanges = null
480        return
481      }
482  
483      // Detect a cache break: cache read dropped >5% from previous AND
484      // the absolute drop exceeds the minimum threshold.
485      const tokenDrop = prevCacheRead - cacheReadTokens
486      if (
487        cacheReadTokens >= prevCacheRead * 0.95 ||
488        tokenDrop < MIN_CACHE_MISS_TOKENS
489      ) {
490        state.pendingChanges = null
491        return
492      }
493  
494      // Build explanation from pending changes (if any)
495      const parts: string[] = []
496      if (changes) {
497        if (changes.modelChanged) {
498          parts.push(
499            `model changed (${changes.previousModel} → ${changes.newModel})`,
500          )
501        }
502        if (changes.systemPromptChanged) {
503          const charDelta = changes.systemCharDelta
504          const charInfo =
505            charDelta === 0
506              ? ''
507              : charDelta > 0
508                ? ` (+${charDelta} chars)`
509                : ` (${charDelta} chars)`
510          parts.push(`system prompt changed${charInfo}`)
511        }
512        if (changes.toolSchemasChanged) {
513          const toolDiff =
514            changes.addedToolCount > 0 || changes.removedToolCount > 0
515              ? ` (+${changes.addedToolCount}/-${changes.removedToolCount} tools)`
516              : ' (tool prompt/schema changed, same tool set)'
517          parts.push(`tools changed${toolDiff}`)
518        }
519        if (changes.fastModeChanged) {
520          parts.push('fast mode toggled')
521        }
522        if (changes.globalCacheStrategyChanged) {
523          parts.push(
524            `global cache strategy changed (${changes.prevGlobalCacheStrategy || 'none'} → ${changes.newGlobalCacheStrategy || 'none'})`,
525          )
526        }
527        if (
528          changes.cacheControlChanged &&
529          !changes.globalCacheStrategyChanged &&
530          !changes.systemPromptChanged
531        ) {
532          // Only report as standalone cause if nothing else explains it —
533          // otherwise the scope/TTL flip is a consequence, not the root cause.
534          parts.push('cache_control changed (scope or TTL)')
535        }
536        if (changes.betasChanged) {
537          const added = changes.addedBetas.length
538            ? `+${changes.addedBetas.join(',')}`
539            : ''
540          const removed = changes.removedBetas.length
541            ? `-${changes.removedBetas.join(',')}`
542            : ''
543          const diff = [added, removed].filter(Boolean).join(' ')
544          parts.push(`betas changed${diff ? ` (${diff})` : ''}`)
545        }
546        if (changes.autoModeChanged) {
547          parts.push('auto mode toggled')
548        }
549        if (changes.overageChanged) {
550          parts.push('overage state changed (TTL latched, no flip)')
551        }
552        if (changes.cachedMCChanged) {
553          parts.push('cached microcompact toggled')
554        }
555        if (changes.effortChanged) {
556          parts.push(
557            `effort changed (${changes.prevEffortValue || 'default'} → ${changes.newEffortValue || 'default'})`,
558          )
559        }
560        if (changes.extraBodyChanged) {
561          parts.push('extra body params changed')
562        }
563      }
564  
565      // Check if time gap suggests TTL expiration
566      const lastAssistantMsgOver5minAgo =
567        timeSinceLastAssistantMsg !== null &&
568        timeSinceLastAssistantMsg > CACHE_TTL_5MIN_MS
569      const lastAssistantMsgOver1hAgo =
570        timeSinceLastAssistantMsg !== null &&
571        timeSinceLastAssistantMsg > CACHE_TTL_1HOUR_MS
572  
573      // Post PR #19823 BQ analysis (bq-queries/prompt-caching/cache_break_pr19823_analysis.sql):
574      // when all client-side flags are false and the gap is under TTL, ~90% of breaks
575      // are server-side routing/eviction or billed/inference disagreement. Label
576      // accordingly instead of implying a CC bug hunt.
577      let reason: string
578      if (parts.length > 0) {
579        reason = parts.join(', ')
580      } else if (lastAssistantMsgOver1hAgo) {
581        reason = 'possible 1h TTL expiry (prompt unchanged)'
582      } else if (lastAssistantMsgOver5minAgo) {
583        reason = 'possible 5min TTL expiry (prompt unchanged)'
584      } else if (timeSinceLastAssistantMsg !== null) {
585        reason = 'likely server-side (prompt unchanged, <5min gap)'
586      } else {
587        reason = 'unknown cause'
588      }
589  
590      logEvent('tengu_prompt_cache_break', {
591        systemPromptChanged: changes?.systemPromptChanged ?? false,
592        toolSchemasChanged: changes?.toolSchemasChanged ?? false,
593        modelChanged: changes?.modelChanged ?? false,
594        fastModeChanged: changes?.fastModeChanged ?? false,
595        cacheControlChanged: changes?.cacheControlChanged ?? false,
596        globalCacheStrategyChanged: changes?.globalCacheStrategyChanged ?? false,
597        betasChanged: changes?.betasChanged ?? false,
598        autoModeChanged: changes?.autoModeChanged ?? false,
599        overageChanged: changes?.overageChanged ?? false,
600        cachedMCChanged: changes?.cachedMCChanged ?? false,
601        effortChanged: changes?.effortChanged ?? false,
602        extraBodyChanged: changes?.extraBodyChanged ?? false,
603        addedToolCount: changes?.addedToolCount ?? 0,
604        removedToolCount: changes?.removedToolCount ?? 0,
605        systemCharDelta: changes?.systemCharDelta ?? 0,
606        // Tool names are sanitized: built-in names are a fixed vocabulary,
607        // MCP tools collapse to 'mcp' (user-configured, could leak paths).
608        addedTools: (changes?.addedTools ?? [])
609          .map(sanitizeToolName)
610          .join(
611            ',',
612          ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
613        removedTools: (changes?.removedTools ?? [])
614          .map(sanitizeToolName)
615          .join(
616            ',',
617          ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
618        changedToolSchemas: (changes?.changedToolSchemas ?? [])
619          .map(sanitizeToolName)
620          .join(
621            ',',
622          ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
623        // Beta header names and cache strategy are fixed enum-like values,
624        // not code or filepaths. requestId is an opaque server-generated ID.
625        addedBetas: (changes?.addedBetas ?? []).join(
626          ',',
627        ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
628        removedBetas: (changes?.removedBetas ?? []).join(
629          ',',
630        ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
631        prevGlobalCacheStrategy: (changes?.prevGlobalCacheStrategy ??
632          '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
633        newGlobalCacheStrategy: (changes?.newGlobalCacheStrategy ??
634          '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
635        callNumber: state.callCount,
636        prevCacheReadTokens: prevCacheRead,
637        cacheReadTokens,
638        cacheCreationTokens,
639        timeSinceLastAssistantMsg: timeSinceLastAssistantMsg ?? -1,
640        lastAssistantMsgOver5minAgo,
641        lastAssistantMsgOver1hAgo,
642        requestId: (requestId ??
643          '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
644      })
645  
646      // Write diff file for ant debugging via --debug. The path is included in
647      // the summary log so ants can find it (DevBar UI removed — event data
648      // flows reliably to BQ for analytics).
649      let diffPath: string | undefined
650      if (changes?.buildPrevDiffableContent) {
651        diffPath = await writeCacheBreakDiff(
652          changes.buildPrevDiffableContent(),
653          state.buildDiffableContent(),
654        )
655      }
656  
657      const diffSuffix = diffPath ? `, diff: ${diffPath}` : ''
658      const summary = `[PROMPT CACHE BREAK] ${reason} [source=${querySource}, call #${state.callCount}, cache read: ${prevCacheRead} → ${cacheReadTokens}, creation: ${cacheCreationTokens}${diffSuffix}]`
659  
660      logForDebugging(summary, { level: 'warn' })
661  
662      state.pendingChanges = null
663    } catch (e: unknown) {
664      logError(e)
665    }
666  }
667  
668  /**
669   * Call when cached microcompact sends cache_edits deletions.
670   * The next API response will have lower cache read tokens — that's
671   * expected, not a cache break.
672   */
673  export function notifyCacheDeletion(
674    querySource: QuerySource,
675    agentId?: AgentId,
676  ): void {
677    const key = getTrackingKey(querySource, agentId)
678    const state = key ? previousStateBySource.get(key) : undefined
679    if (state) {
680      state.cacheDeletionsPending = true
681    }
682  }
683  
684  /**
685   * Call after compaction to reset the cache read baseline.
686   * Compaction legitimately reduces message count, so cache read tokens
687   * will naturally drop on the next call — that's not a break.
688   */
689  export function notifyCompaction(
690    querySource: QuerySource,
691    agentId?: AgentId,
692  ): void {
693    const key = getTrackingKey(querySource, agentId)
694    const state = key ? previousStateBySource.get(key) : undefined
695    if (state) {
696      state.prevCacheReadTokens = null
697    }
698  }
699  
700  export function cleanupAgentTracking(agentId: AgentId): void {
701    previousStateBySource.delete(agentId)
702  }
703  
704  export function resetPromptCacheBreakDetection(): void {
705    previousStateBySource.clear()
706  }
707  
708  async function writeCacheBreakDiff(
709    prevContent: string,
710    newContent: string,
711  ): Promise<string | undefined> {
712    try {
713      const diffPath = getCacheBreakDiffPath()
714      await mkdir(getClaudeTempDir(), { recursive: true })
715      const patch = createPatch(
716        'prompt-state',
717        prevContent,
718        newContent,
719        'before',
720        'after',
721      )
722      await writeFile(diffPath, patch)
723      return diffPath
724    } catch {
725      return undefined
726    }
727  }