/ services / compact / autoCompact.ts
autoCompact.ts
  1  import { feature } from 'bun:bundle'
  2  import { markPostCompaction } from 'src/bootstrap/state.js'
  3  import { getSdkBetas } from '../../bootstrap/state.js'
  4  import type { QuerySource } from '../../constants/querySource.js'
  5  import type { ToolUseContext } from '../../Tool.js'
  6  import type { Message } from '../../types/message.js'
  7  import { getGlobalConfig } from '../../utils/config.js'
  8  import { getContextWindowForModel } from '../../utils/context.js'
  9  import { logForDebugging } from '../../utils/debug.js'
 10  import { isEnvTruthy } from '../../utils/envUtils.js'
 11  import { hasExactErrorMessage } from '../../utils/errors.js'
 12  import type { CacheSafeParams } from '../../utils/forkedAgent.js'
 13  import { logError } from '../../utils/log.js'
 14  import { tokenCountWithEstimation } from '../../utils/tokens.js'
 15  import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
 16  import { getMaxOutputTokensForModel } from '../api/claude.js'
 17  import { notifyCompaction } from '../api/promptCacheBreakDetection.js'
 18  import { setLastSummarizedMessageId } from '../SessionMemory/sessionMemoryUtils.js'
 19  import {
 20    type CompactionResult,
 21    compactConversation,
 22    ERROR_MESSAGE_USER_ABORT,
 23    type RecompactionInfo,
 24  } from './compact.js'
 25  import { runPostCompactCleanup } from './postCompactCleanup.js'
 26  import { trySessionMemoryCompaction } from './sessionMemoryCompact.js'
 27  
 28  // Reserve this many tokens for output during compaction
 29  // Based on p99.99 of compact summary output being 17,387 tokens.
 30  const MAX_OUTPUT_TOKENS_FOR_SUMMARY = 20_000
 31  
 32  // Returns the context window size minus the max output tokens for the model
 33  export function getEffectiveContextWindowSize(model: string): number {
 34    const reservedTokensForSummary = Math.min(
 35      getMaxOutputTokensForModel(model),
 36      MAX_OUTPUT_TOKENS_FOR_SUMMARY,
 37    )
 38    let contextWindow = getContextWindowForModel(model, getSdkBetas())
 39  
 40    const autoCompactWindow = process.env.CLAUDE_CODE_AUTO_COMPACT_WINDOW
 41    if (autoCompactWindow) {
 42      const parsed = parseInt(autoCompactWindow, 10)
 43      if (!isNaN(parsed) && parsed > 0) {
 44        contextWindow = Math.min(contextWindow, parsed)
 45      }
 46    }
 47  
 48    return contextWindow - reservedTokensForSummary
 49  }
 50  
 51  export type AutoCompactTrackingState = {
 52    compacted: boolean
 53    turnCounter: number
 54    // Unique ID per turn
 55    turnId: string
 56    // Consecutive autocompact failures. Reset on success.
 57    // Used as a circuit breaker to stop retrying when the context is
 58    // irrecoverably over the limit (e.g., prompt_too_long).
 59    consecutiveFailures?: number
 60  }
 61  
 62  export const AUTOCOMPACT_BUFFER_TOKENS = 13_000
 63  export const WARNING_THRESHOLD_BUFFER_TOKENS = 20_000
 64  export const ERROR_THRESHOLD_BUFFER_TOKENS = 20_000
 65  export const MANUAL_COMPACT_BUFFER_TOKENS = 3_000
 66  
 67  // Stop trying autocompact after this many consecutive failures.
 68  // BQ 2026-03-10: 1,279 sessions had 50+ consecutive failures (up to 3,272)
 69  // in a single session, wasting ~250K API calls/day globally.
 70  const MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES = 3
 71  
 72  export function getAutoCompactThreshold(model: string): number {
 73    const effectiveContextWindow = getEffectiveContextWindowSize(model)
 74  
 75    const autocompactThreshold =
 76      effectiveContextWindow - AUTOCOMPACT_BUFFER_TOKENS
 77  
 78    // Override for easier testing of autocompact
 79    const envPercent = process.env.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE
 80    if (envPercent) {
 81      const parsed = parseFloat(envPercent)
 82      if (!isNaN(parsed) && parsed > 0 && parsed <= 100) {
 83        const percentageThreshold = Math.floor(
 84          effectiveContextWindow * (parsed / 100),
 85        )
 86        return Math.min(percentageThreshold, autocompactThreshold)
 87      }
 88    }
 89  
 90    return autocompactThreshold
 91  }
 92  
 93  export function calculateTokenWarningState(
 94    tokenUsage: number,
 95    model: string,
 96  ): {
 97    percentLeft: number
 98    isAboveWarningThreshold: boolean
 99    isAboveErrorThreshold: boolean
100    isAboveAutoCompactThreshold: boolean
101    isAtBlockingLimit: boolean
102  } {
103    const autoCompactThreshold = getAutoCompactThreshold(model)
104    const threshold = isAutoCompactEnabled()
105      ? autoCompactThreshold
106      : getEffectiveContextWindowSize(model)
107  
108    const percentLeft = Math.max(
109      0,
110      Math.round(((threshold - tokenUsage) / threshold) * 100),
111    )
112  
113    const warningThreshold = threshold - WARNING_THRESHOLD_BUFFER_TOKENS
114    const errorThreshold = threshold - ERROR_THRESHOLD_BUFFER_TOKENS
115  
116    const isAboveWarningThreshold = tokenUsage >= warningThreshold
117    const isAboveErrorThreshold = tokenUsage >= errorThreshold
118  
119    const isAboveAutoCompactThreshold =
120      isAutoCompactEnabled() && tokenUsage >= autoCompactThreshold
121  
122    const actualContextWindow = getEffectiveContextWindowSize(model)
123    const defaultBlockingLimit =
124      actualContextWindow - MANUAL_COMPACT_BUFFER_TOKENS
125  
126    // Allow override for testing
127    const blockingLimitOverride = process.env.CLAUDE_CODE_BLOCKING_LIMIT_OVERRIDE
128    const parsedOverride = blockingLimitOverride
129      ? parseInt(blockingLimitOverride, 10)
130      : NaN
131    const blockingLimit =
132      !isNaN(parsedOverride) && parsedOverride > 0
133        ? parsedOverride
134        : defaultBlockingLimit
135  
136    const isAtBlockingLimit = tokenUsage >= blockingLimit
137  
138    return {
139      percentLeft,
140      isAboveWarningThreshold,
141      isAboveErrorThreshold,
142      isAboveAutoCompactThreshold,
143      isAtBlockingLimit,
144    }
145  }
146  
147  export function isAutoCompactEnabled(): boolean {
148    if (isEnvTruthy(process.env.DISABLE_COMPACT)) {
149      return false
150    }
151    // Allow disabling just auto-compact (keeps manual /compact working)
152    if (isEnvTruthy(process.env.DISABLE_AUTO_COMPACT)) {
153      return false
154    }
155    // Check if user has disabled auto-compact in their settings
156    const userConfig = getGlobalConfig()
157    return userConfig.autoCompactEnabled
158  }
159  
160  export async function shouldAutoCompact(
161    messages: Message[],
162    model: string,
163    querySource?: QuerySource,
164    // Snip removes messages but the surviving assistant's usage still reflects
165    // pre-snip context, so tokenCountWithEstimation can't see the savings.
166    // Subtract the rough-delta that snip already computed.
167    snipTokensFreed = 0,
168  ): Promise<boolean> {
169    // Recursion guards. session_memory and compact are forked agents that
170    // would deadlock.
171    if (querySource === 'session_memory' || querySource === 'compact') {
172      return false
173    }
174    // marble_origami is the ctx-agent — if ITS context blows up and
175    // autocompact fires, runPostCompactCleanup calls resetContextCollapse()
176    // which destroys the MAIN thread's committed log (module-level state
177    // shared across forks). Inside feature() so the string DCEs from
178    // external builds (it's in excluded-strings.txt).
179    if (feature('CONTEXT_COLLAPSE')) {
180      if (querySource === 'marble_origami') {
181        return false
182      }
183    }
184  
185    if (!isAutoCompactEnabled()) {
186      return false
187    }
188  
189    // Reactive-only mode: suppress proactive autocompact, let reactive compact
190    // catch the API's prompt-too-long. feature() wrapper keeps the flag string
191    // out of external builds (REACTIVE_COMPACT is ant-only).
192    // Note: returning false here also means autoCompactIfNeeded never reaches
193    // trySessionMemoryCompaction in the query loop — the /compact call site
194    // still tries session memory first. Revisit if reactive-only graduates.
195    if (feature('REACTIVE_COMPACT')) {
196      if (getFeatureValue_CACHED_MAY_BE_STALE('tengu_cobalt_raccoon', false)) {
197        return false
198      }
199    }
200  
201    // Context-collapse mode: same suppression. Collapse IS the context
202    // management system when it's on — the 90% commit / 95% blocking-spawn
203    // flow owns the headroom problem. Autocompact firing at effective-13k
204    // (~93% of effective) sits right between collapse's commit-start (90%)
205    // and blocking (95%), so it would race collapse and usually win, nuking
206    // granular context that collapse was about to save. Gating here rather
207    // than in isAutoCompactEnabled() keeps reactiveCompact alive as the 413
208    // fallback (it consults isAutoCompactEnabled directly) and leaves
209    // sessionMemory + manual /compact working.
210    //
211    // Consult isContextCollapseEnabled (not the raw gate) so the
212    // CLAUDE_CONTEXT_COLLAPSE env override is honored here too. require()
213    // inside the block breaks the init-time cycle (this file exports
214    // getEffectiveContextWindowSize which collapse's index imports).
215    if (feature('CONTEXT_COLLAPSE')) {
216      /* eslint-disable @typescript-eslint/no-require-imports */
217      const { isContextCollapseEnabled } =
218        require('../contextCollapse/index.js') as typeof import('../contextCollapse/index.js')
219      /* eslint-enable @typescript-eslint/no-require-imports */
220      if (isContextCollapseEnabled()) {
221        return false
222      }
223    }
224  
225    const tokenCount = tokenCountWithEstimation(messages) - snipTokensFreed
226    const threshold = getAutoCompactThreshold(model)
227    const effectiveWindow = getEffectiveContextWindowSize(model)
228  
229    logForDebugging(
230      `autocompact: tokens=${tokenCount} threshold=${threshold} effectiveWindow=${effectiveWindow}${snipTokensFreed > 0 ? ` snipFreed=${snipTokensFreed}` : ''}`,
231    )
232  
233    const { isAboveAutoCompactThreshold } = calculateTokenWarningState(
234      tokenCount,
235      model,
236    )
237  
238    return isAboveAutoCompactThreshold
239  }
240  
241  export async function autoCompactIfNeeded(
242    messages: Message[],
243    toolUseContext: ToolUseContext,
244    cacheSafeParams: CacheSafeParams,
245    querySource?: QuerySource,
246    tracking?: AutoCompactTrackingState,
247    snipTokensFreed?: number,
248  ): Promise<{
249    wasCompacted: boolean
250    compactionResult?: CompactionResult
251    consecutiveFailures?: number
252  }> {
253    if (isEnvTruthy(process.env.DISABLE_COMPACT)) {
254      return { wasCompacted: false }
255    }
256  
257    // Circuit breaker: stop retrying after N consecutive failures.
258    // Without this, sessions where context is irrecoverably over the limit
259    // hammer the API with doomed compaction attempts on every turn.
260    if (
261      tracking?.consecutiveFailures !== undefined &&
262      tracking.consecutiveFailures >= MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES
263    ) {
264      return { wasCompacted: false }
265    }
266  
267    const model = toolUseContext.options.mainLoopModel
268    const shouldCompact = await shouldAutoCompact(
269      messages,
270      model,
271      querySource,
272      snipTokensFreed,
273    )
274  
275    if (!shouldCompact) {
276      return { wasCompacted: false }
277    }
278  
279    const recompactionInfo: RecompactionInfo = {
280      isRecompactionInChain: tracking?.compacted === true,
281      turnsSincePreviousCompact: tracking?.turnCounter ?? -1,
282      previousCompactTurnId: tracking?.turnId,
283      autoCompactThreshold: getAutoCompactThreshold(model),
284      querySource,
285    }
286  
287    // EXPERIMENT: Try session memory compaction first
288    const sessionMemoryResult = await trySessionMemoryCompaction(
289      messages,
290      toolUseContext.agentId,
291      recompactionInfo.autoCompactThreshold,
292    )
293    if (sessionMemoryResult) {
294      // Reset lastSummarizedMessageId since session memory compaction prunes messages
295      // and the old message UUID will no longer exist after the REPL replaces messages
296      setLastSummarizedMessageId(undefined)
297      runPostCompactCleanup(querySource)
298      // Reset cache read baseline so the post-compact drop isn't flagged as a
299      // break. compactConversation does this internally; SM-compact doesn't.
300      // BQ 2026-03-01: missing this made 20% of tengu_prompt_cache_break events
301      // false positives (systemPromptChanged=true, timeSinceLastAssistantMsg=-1).
302      if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
303        notifyCompaction(querySource ?? 'compact', toolUseContext.agentId)
304      }
305      markPostCompaction()
306      return {
307        wasCompacted: true,
308        compactionResult: sessionMemoryResult,
309      }
310    }
311  
312    try {
313      const compactionResult = await compactConversation(
314        messages,
315        toolUseContext,
316        cacheSafeParams,
317        true, // Suppress user questions for autocompact
318        undefined, // No custom instructions for autocompact
319        true, // isAutoCompact
320        recompactionInfo,
321      )
322  
323      // Reset lastSummarizedMessageId since legacy compaction replaces all messages
324      // and the old message UUID will no longer exist in the new messages array
325      setLastSummarizedMessageId(undefined)
326      runPostCompactCleanup(querySource)
327  
328      return {
329        wasCompacted: true,
330        compactionResult,
331        // Reset failure count on success
332        consecutiveFailures: 0,
333      }
334    } catch (error) {
335      if (!hasExactErrorMessage(error, ERROR_MESSAGE_USER_ABORT)) {
336        logError(error)
337      }
338      // Increment consecutive failure count for circuit breaker.
339      // The caller threads this through autoCompactTracking so the
340      // next query loop iteration can skip futile retry attempts.
341      const prevFailures = tracking?.consecutiveFailures ?? 0
342      const nextFailures = prevFailures + 1
343      if (nextFailures >= MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES) {
344        logForDebugging(
345          `autocompact: circuit breaker tripped after ${nextFailures} consecutive failures — skipping future attempts this session`,
346          { level: 'warn' },
347        )
348      }
349      return { wasCompacted: false, consecutiveFailures: nextFailures }
350    }
351  }