/ services / compact / microCompact.ts
microCompact.ts
  1  import { feature } from 'bun:bundle'
  2  import type { ToolResultBlockParam } from '@anthropic-ai/sdk/resources/index.mjs'
  3  import type { QuerySource } from '../../constants/querySource.js'
  4  import type { ToolUseContext } from '../../Tool.js'
  5  import { FILE_EDIT_TOOL_NAME } from '../../tools/FileEditTool/constants.js'
  6  import { FILE_READ_TOOL_NAME } from '../../tools/FileReadTool/prompt.js'
  7  import { FILE_WRITE_TOOL_NAME } from '../../tools/FileWriteTool/prompt.js'
  8  import { GLOB_TOOL_NAME } from '../../tools/GlobTool/prompt.js'
  9  import { GREP_TOOL_NAME } from '../../tools/GrepTool/prompt.js'
 10  import { WEB_FETCH_TOOL_NAME } from '../../tools/WebFetchTool/prompt.js'
 11  import { WEB_SEARCH_TOOL_NAME } from '../../tools/WebSearchTool/prompt.js'
 12  import type { Message } from '../../types/message.js'
 13  import { logForDebugging } from '../../utils/debug.js'
 14  import { getMainLoopModel } from '../../utils/model/model.js'
 15  import { SHELL_TOOL_NAMES } from '../../utils/shell/shellToolUtils.js'
 16  import { jsonStringify } from '../../utils/slowOperations.js'
 17  import {
 18    type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 19    logEvent,
 20  } from '../analytics/index.js'
 21  import { notifyCacheDeletion } from '../api/promptCacheBreakDetection.js'
 22  import { roughTokenCountEstimation } from '../tokenEstimation.js'
 23  import {
 24    clearCompactWarningSuppression,
 25    suppressCompactWarning,
 26  } from './compactWarningState.js'
 27  import {
 28    getTimeBasedMCConfig,
 29    type TimeBasedMCConfig,
 30  } from './timeBasedMCConfig.js'
 31  
 32  // Inline from utils/toolResultStorage.ts — importing that file pulls in
 33  // sessionStorage → utils/messages → services/api/errors, completing a
 34  // circular-deps loop back through this file via promptCacheBreakDetection.
 35  // Drift is caught by a test asserting equality with the source-of-truth.
 36  export const TIME_BASED_MC_CLEARED_MESSAGE = '[Old tool result content cleared]'
 37  
 38  const IMAGE_MAX_TOKEN_SIZE = 2000
 39  
 40  // Only compact these tools
 41  const COMPACTABLE_TOOLS = new Set<string>([
 42    FILE_READ_TOOL_NAME,
 43    ...SHELL_TOOL_NAMES,
 44    GREP_TOOL_NAME,
 45    GLOB_TOOL_NAME,
 46    WEB_SEARCH_TOOL_NAME,
 47    WEB_FETCH_TOOL_NAME,
 48    FILE_EDIT_TOOL_NAME,
 49    FILE_WRITE_TOOL_NAME,
 50  ])
 51  
 52  // --- Cached microcompact state (ant-only, gated by feature('CACHED_MICROCOMPACT')) ---
 53  
 54  // Lazy-initialized cached MC module and state to avoid importing in external builds.
 55  // The imports and state live inside feature() checks for dead code elimination.
 56  let cachedMCModule: typeof import('./cachedMicrocompact.js') | null = null
 57  let cachedMCState: import('./cachedMicrocompact.js').CachedMCState | null = null
 58  let pendingCacheEdits:
 59    | import('./cachedMicrocompact.js').CacheEditsBlock
 60    | null = null
 61  
 62  async function getCachedMCModule(): Promise<
 63    typeof import('./cachedMicrocompact.js')
 64  > {
 65    if (!cachedMCModule) {
 66      cachedMCModule = await import('./cachedMicrocompact.js')
 67    }
 68    return cachedMCModule
 69  }
 70  
 71  function ensureCachedMCState(): import('./cachedMicrocompact.js').CachedMCState {
 72    if (!cachedMCState && cachedMCModule) {
 73      cachedMCState = cachedMCModule.createCachedMCState()
 74    }
 75    if (!cachedMCState) {
 76      throw new Error(
 77        'cachedMCState not initialized — getCachedMCModule() must be called first',
 78      )
 79    }
 80    return cachedMCState
 81  }
 82  
 83  /**
 84   * Get new pending cache edits to be included in the next API request.
 85   * Returns null if there are no new pending edits.
 86   * Clears the pending state (caller must pin them after insertion).
 87   */
 88  export function consumePendingCacheEdits():
 89    | import('./cachedMicrocompact.js').CacheEditsBlock
 90    | null {
 91    const edits = pendingCacheEdits
 92    pendingCacheEdits = null
 93    return edits
 94  }
 95  
 96  /**
 97   * Get all previously-pinned cache edits that must be re-sent at their
 98   * original positions for cache hits.
 99   */
100  export function getPinnedCacheEdits(): import('./cachedMicrocompact.js').PinnedCacheEdits[] {
101    if (!cachedMCState) {
102      return []
103    }
104    return cachedMCState.pinnedEdits
105  }
106  
107  /**
108   * Pin a new cache_edits block to a specific user message position.
109   * Called after inserting new edits so they are re-sent in subsequent calls.
110   */
111  export function pinCacheEdits(
112    userMessageIndex: number,
113    block: import('./cachedMicrocompact.js').CacheEditsBlock,
114  ): void {
115    if (cachedMCState) {
116      cachedMCState.pinnedEdits.push({ userMessageIndex, block })
117    }
118  }
119  
120  /**
121   * Marks all registered tools as sent to the API.
122   * Called after a successful API response.
123   */
124  export function markToolsSentToAPIState(): void {
125    if (cachedMCState && cachedMCModule) {
126      cachedMCModule.markToolsSentToAPI(cachedMCState)
127    }
128  }
129  
130  export function resetMicrocompactState(): void {
131    if (cachedMCState && cachedMCModule) {
132      cachedMCModule.resetCachedMCState(cachedMCState)
133    }
134    pendingCacheEdits = null
135  }
136  
137  // Helper to calculate tool result tokens
138  function calculateToolResultTokens(block: ToolResultBlockParam): number {
139    if (!block.content) {
140      return 0
141    }
142  
143    if (typeof block.content === 'string') {
144      return roughTokenCountEstimation(block.content)
145    }
146  
147    // Array of TextBlockParam | ImageBlockParam | DocumentBlockParam
148    return block.content.reduce((sum, item) => {
149      if (item.type === 'text') {
150        return sum + roughTokenCountEstimation(item.text)
151      } else if (item.type === 'image' || item.type === 'document') {
152        // Images/documents are approximately 2000 tokens regardless of format
153        return sum + IMAGE_MAX_TOKEN_SIZE
154      }
155      return sum
156    }, 0)
157  }
158  
159  /**
160   * Estimate token count for messages by extracting text content
161   * Used for rough token estimation when we don't have accurate API counts
162   * Pads estimate by 4/3 to be conservative since we're approximating
163   */
164  export function estimateMessageTokens(messages: Message[]): number {
165    let totalTokens = 0
166  
167    for (const message of messages) {
168      if (message.type !== 'user' && message.type !== 'assistant') {
169        continue
170      }
171  
172      if (!Array.isArray(message.message.content)) {
173        continue
174      }
175  
176      for (const block of message.message.content) {
177        if (block.type === 'text') {
178          totalTokens += roughTokenCountEstimation(block.text)
179        } else if (block.type === 'tool_result') {
180          totalTokens += calculateToolResultTokens(block)
181        } else if (block.type === 'image' || block.type === 'document') {
182          totalTokens += IMAGE_MAX_TOKEN_SIZE
183        } else if (block.type === 'thinking') {
184          // Match roughTokenCountEstimationForBlock: count only the thinking
185          // text, not the JSON wrapper or signature (signature is metadata,
186          // not model-tokenized content).
187          totalTokens += roughTokenCountEstimation(block.thinking)
188        } else if (block.type === 'redacted_thinking') {
189          totalTokens += roughTokenCountEstimation(block.data)
190        } else if (block.type === 'tool_use') {
191          // Match roughTokenCountEstimationForBlock: count name + input,
192          // not the JSON wrapper or id field.
193          totalTokens += roughTokenCountEstimation(
194            block.name + jsonStringify(block.input ?? {}),
195          )
196        } else {
197          // server_tool_use, web_search_tool_result, etc.
198          totalTokens += roughTokenCountEstimation(jsonStringify(block))
199        }
200      }
201    }
202  
203    // Pad estimate by 4/3 to be conservative since we're approximating
204    return Math.ceil(totalTokens * (4 / 3))
205  }
206  
207  export type PendingCacheEdits = {
208    trigger: 'auto'
209    deletedToolIds: string[]
210    // Baseline cumulative cache_deleted_input_tokens from the previous API response,
211    // used to compute the per-operation delta (the API value is sticky/cumulative)
212    baselineCacheDeletedTokens: number
213  }
214  
215  export type MicrocompactResult = {
216    messages: Message[]
217    compactionInfo?: {
218      pendingCacheEdits?: PendingCacheEdits
219    }
220  }
221  
222  /**
223   * Walk messages and collect tool_use IDs whose tool name is in
224   * COMPACTABLE_TOOLS, in encounter order. Shared by both microcompact paths.
225   */
226  function collectCompactableToolIds(messages: Message[]): string[] {
227    const ids: string[] = []
228    for (const message of messages) {
229      if (
230        message.type === 'assistant' &&
231        Array.isArray(message.message.content)
232      ) {
233        for (const block of message.message.content) {
234          if (block.type === 'tool_use' && COMPACTABLE_TOOLS.has(block.name)) {
235            ids.push(block.id)
236          }
237        }
238      }
239    }
240    return ids
241  }
242  
243  // Prefix-match because promptCategory.ts sets the querySource to
244  // 'repl_main_thread:outputStyle:<style>' when a non-default output style
245  // is active. The bare 'repl_main_thread' is only used for the default style.
246  // query.ts:350/1451 use the same startsWith pattern; the pre-existing
247  // cached-MC `=== 'repl_main_thread'` check was a latent bug — users with a
248  // non-default output style were silently excluded from cached MC.
249  function isMainThreadSource(querySource: QuerySource | undefined): boolean {
250    return !querySource || querySource.startsWith('repl_main_thread')
251  }
252  
253  export async function microcompactMessages(
254    messages: Message[],
255    toolUseContext?: ToolUseContext,
256    querySource?: QuerySource,
257  ): Promise<MicrocompactResult> {
258    // Clear suppression flag at start of new microcompact attempt
259    clearCompactWarningSuppression()
260  
261    // Time-based trigger runs first and short-circuits. If the gap since the
262    // last assistant message exceeds the threshold, the server cache has expired
263    // and the full prefix will be rewritten regardless — so content-clear old
264    // tool results now, before the request, to shrink what gets rewritten.
265    // Cached MC (cache-editing) is skipped when this fires: editing assumes a
266    // warm cache, and we just established it's cold.
267    const timeBasedResult = maybeTimeBasedMicrocompact(messages, querySource)
268    if (timeBasedResult) {
269      return timeBasedResult
270    }
271  
272    // Only run cached MC for the main thread to prevent forked agents
273    // (session_memory, prompt_suggestion, etc.) from registering their
274    // tool_results in the global cachedMCState, which would cause the main
275    // thread to try deleting tools that don't exist in its own conversation.
276    if (feature('CACHED_MICROCOMPACT')) {
277      const mod = await getCachedMCModule()
278      const model = toolUseContext?.options.mainLoopModel ?? getMainLoopModel()
279      if (
280        mod.isCachedMicrocompactEnabled() &&
281        mod.isModelSupportedForCacheEditing(model) &&
282        isMainThreadSource(querySource)
283      ) {
284        return await cachedMicrocompactPath(messages, querySource)
285      }
286    }
287  
288    // Legacy microcompact path removed — tengu_cache_plum_violet is always true.
289    // For contexts where cached microcompact is not available (external builds,
290    // non-ant users, unsupported models, sub-agents), no compaction happens here;
291    // autocompact handles context pressure instead.
292    return { messages }
293  }
294  
295  /**
296   * Cached microcompact path - uses cache editing API to remove tool results
297   * without invalidating the cached prefix.
298   *
299   * Key differences from regular microcompact:
300   * - Does NOT modify local message content (cache_reference and cache_edits are added at API layer)
301   * - Uses count-based trigger/keep thresholds from GrowthBook config
302   * - Takes precedence over regular microcompact (no disk persistence)
303   * - Tracks tool results and queues cache edits for the API layer
304   */
305  async function cachedMicrocompactPath(
306    messages: Message[],
307    querySource: QuerySource | undefined,
308  ): Promise<MicrocompactResult> {
309    const mod = await getCachedMCModule()
310    const state = ensureCachedMCState()
311    const config = mod.getCachedMCConfig()
312  
313    const compactableToolIds = new Set(collectCompactableToolIds(messages))
314    // Second pass: register tool results grouped by user message
315    for (const message of messages) {
316      if (message.type === 'user' && Array.isArray(message.message.content)) {
317        const groupIds: string[] = []
318        for (const block of message.message.content) {
319          if (
320            block.type === 'tool_result' &&
321            compactableToolIds.has(block.tool_use_id) &&
322            !state.registeredTools.has(block.tool_use_id)
323          ) {
324            mod.registerToolResult(state, block.tool_use_id)
325            groupIds.push(block.tool_use_id)
326          }
327        }
328        mod.registerToolMessage(state, groupIds)
329      }
330    }
331  
332    const toolsToDelete = mod.getToolResultsToDelete(state)
333  
334    if (toolsToDelete.length > 0) {
335      // Create and queue the cache_edits block for the API layer
336      const cacheEdits = mod.createCacheEditsBlock(state, toolsToDelete)
337      if (cacheEdits) {
338        pendingCacheEdits = cacheEdits
339      }
340  
341      logForDebugging(
342        `Cached MC deleting ${toolsToDelete.length} tool(s): ${toolsToDelete.join(', ')}`,
343      )
344  
345      // Log the event
346      logEvent('tengu_cached_microcompact', {
347        toolsDeleted: toolsToDelete.length,
348        deletedToolIds: toolsToDelete.join(
349          ',',
350        ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
351        activeToolCount: state.toolOrder.length - state.deletedRefs.size,
352        triggerType:
353          'auto' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
354        threshold: config.triggerThreshold,
355        keepRecent: config.keepRecent,
356      })
357  
358      // Suppress warning after successful compaction
359      suppressCompactWarning()
360  
361      // Notify cache break detection that cache reads will legitimately drop
362      if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
363        // Pass the actual querySource — isMainThreadSource now prefix-matches
364        // so output-style variants enter here, and getTrackingKey keys on the
365        // full source string, not the 'repl_main_thread' prefix.
366        notifyCacheDeletion(querySource ?? 'repl_main_thread')
367      }
368  
369      // Return messages unchanged - cache_reference and cache_edits are added at API layer
370      // Boundary message is deferred until after API response so we can use
371      // actual cache_deleted_input_tokens from the API instead of client-side estimates
372      // Capture the baseline cumulative cache_deleted_input_tokens from the last
373      // assistant message so we can compute a per-operation delta after the API call
374      const lastAsst = messages.findLast(m => m.type === 'assistant')
375      const baseline =
376        lastAsst?.type === 'assistant'
377          ? ((
378              lastAsst.message.usage as unknown as Record<
379                string,
380                number | undefined
381              >
382            )?.cache_deleted_input_tokens ?? 0)
383          : 0
384  
385      return {
386        messages,
387        compactionInfo: {
388          pendingCacheEdits: {
389            trigger: 'auto',
390            deletedToolIds: toolsToDelete,
391            baselineCacheDeletedTokens: baseline,
392          },
393        },
394      }
395    }
396  
397    // No compaction needed, return messages unchanged
398    return { messages }
399  }
400  
401  /**
402   * Time-based microcompact: when the gap since the last main-loop assistant
403   * message exceeds the configured threshold, content-clear all but the most
404   * recent N compactable tool results.
405   *
406   * Returns null when the trigger doesn't fire (disabled, wrong source, gap
407   * under threshold, nothing to clear) — caller falls through to other paths.
408   *
409   * Unlike cached MC, this mutates message content directly. The cache is cold,
410   * so there's no cached prefix to preserve via cache_edits.
411   */
412  /**
413   * Check whether the time-based trigger should fire for this request.
414   *
415   * Returns the measured gap (minutes since last assistant message) when the
416   * trigger fires, or null when it doesn't (disabled, wrong source, under
417   * threshold, no prior assistant, unparseable timestamp).
418   *
419   * Extracted so other pre-request paths (e.g. snip force-apply) can consult
420   * the same predicate without coupling to the tool-result clearing action.
421   */
422  export function evaluateTimeBasedTrigger(
423    messages: Message[],
424    querySource: QuerySource | undefined,
425  ): { gapMinutes: number; config: TimeBasedMCConfig } | null {
426    const config = getTimeBasedMCConfig()
427    // Require an explicit main-thread querySource. isMainThreadSource treats
428    // undefined as main-thread (for cached-MC backward-compat), but several
429    // callers (/context, /compact, analyzeContext) invoke microcompactMessages
430    // without a source for analysis-only purposes — they should not trigger.
431    if (!config.enabled || !querySource || !isMainThreadSource(querySource)) {
432      return null
433    }
434    const lastAssistant = messages.findLast(m => m.type === 'assistant')
435    if (!lastAssistant) {
436      return null
437    }
438    const gapMinutes =
439      (Date.now() - new Date(lastAssistant.timestamp).getTime()) / 60_000
440    if (!Number.isFinite(gapMinutes) || gapMinutes < config.gapThresholdMinutes) {
441      return null
442    }
443    return { gapMinutes, config }
444  }
445  
446  function maybeTimeBasedMicrocompact(
447    messages: Message[],
448    querySource: QuerySource | undefined,
449  ): MicrocompactResult | null {
450    const trigger = evaluateTimeBasedTrigger(messages, querySource)
451    if (!trigger) {
452      return null
453    }
454    const { gapMinutes, config } = trigger
455  
456    const compactableIds = collectCompactableToolIds(messages)
457  
458    // Floor at 1: slice(-0) returns the full array (paradoxically keeps
459    // everything), and clearing ALL results leaves the model with zero working
460    // context. Neither degenerate is sensible — always keep at least the last.
461    const keepRecent = Math.max(1, config.keepRecent)
462    const keepSet = new Set(compactableIds.slice(-keepRecent))
463    const clearSet = new Set(compactableIds.filter(id => !keepSet.has(id)))
464  
465    if (clearSet.size === 0) {
466      return null
467    }
468  
469    let tokensSaved = 0
470    const result: Message[] = messages.map(message => {
471      if (message.type !== 'user' || !Array.isArray(message.message.content)) {
472        return message
473      }
474      let touched = false
475      const newContent = message.message.content.map(block => {
476        if (
477          block.type === 'tool_result' &&
478          clearSet.has(block.tool_use_id) &&
479          block.content !== TIME_BASED_MC_CLEARED_MESSAGE
480        ) {
481          tokensSaved += calculateToolResultTokens(block)
482          touched = true
483          return { ...block, content: TIME_BASED_MC_CLEARED_MESSAGE }
484        }
485        return block
486      })
487      if (!touched) return message
488      return {
489        ...message,
490        message: { ...message.message, content: newContent },
491      }
492    })
493  
494    if (tokensSaved === 0) {
495      return null
496    }
497  
498    logEvent('tengu_time_based_microcompact', {
499      gapMinutes: Math.round(gapMinutes),
500      gapThresholdMinutes: config.gapThresholdMinutes,
501      toolsCleared: clearSet.size,
502      toolsKept: keepSet.size,
503      keepRecent: config.keepRecent,
504      tokensSaved,
505    })
506  
507    logForDebugging(
508      `[TIME-BASED MC] gap ${Math.round(gapMinutes)}min > ${config.gapThresholdMinutes}min, cleared ${clearSet.size} tool results (~${tokensSaved} tokens), kept last ${keepSet.size}`,
509    )
510  
511    suppressCompactWarning()
512    // Cached-MC state (module-level) holds tool IDs registered on prior turns.
513    // We just content-cleared some of those tools AND invalidated the server
514    // cache by changing prompt content. If cached-MC runs next turn with the
515    // stale state, it would try to cache_edit tools whose server-side entries
516    // no longer exist. Reset it.
517    resetMicrocompactState()
518    // We just changed the prompt content — the next response's cache read will
519    // be low, but that's us, not a break. Tell the detector to expect a drop.
520    // notifyCacheDeletion (not notifyCompaction) because it's already imported
521    // here and achieves the same false-positive suppression — adding the second
522    // symbol to the import was flagged by the circular-deps check.
523    // Pass the actual querySource: getTrackingKey returns the full source string
524    // (e.g. 'repl_main_thread:outputStyle:custom'), not just the prefix.
525    if (feature('PROMPT_CACHE_BREAK_DETECTION') && querySource) {
526      notifyCacheDeletion(querySource)
527    }
528  
529    return { messages: result }
530  }