/ services / compact / sessionMemoryCompact.ts
sessionMemoryCompact.ts
  1  /**
  2   * EXPERIMENT: Session memory compaction
  3   */
  4  
  5  import type { AgentId } from '../../types/ids.js'
  6  import type { HookResultMessage, Message } from '../../types/message.js'
  7  import { logForDebugging } from '../../utils/debug.js'
  8  import { isEnvTruthy } from '../../utils/envUtils.js'
  9  import { errorMessage } from '../../utils/errors.js'
 10  import {
 11    createCompactBoundaryMessage,
 12    createUserMessage,
 13    isCompactBoundaryMessage,
 14  } from '../../utils/messages.js'
 15  import { getMainLoopModel } from '../../utils/model/model.js'
 16  import { getSessionMemoryPath } from '../../utils/permissions/filesystem.js'
 17  import { processSessionStartHooks } from '../../utils/sessionStart.js'
 18  import { getTranscriptPath } from '../../utils/sessionStorage.js'
 19  import { tokenCountFromLastAPIResponse } from '../../utils/tokens.js'
 20  import { extractDiscoveredToolNames } from '../../utils/toolSearch.js'
 21  import {
 22    getDynamicConfig_BLOCKS_ON_INIT,
 23    getFeatureValue_CACHED_MAY_BE_STALE,
 24  } from '../analytics/growthbook.js'
 25  import { logEvent } from '../analytics/index.js'
 26  import {
 27    isSessionMemoryEmpty,
 28    truncateSessionMemoryForCompact,
 29  } from '../SessionMemory/prompts.js'
 30  import {
 31    getLastSummarizedMessageId,
 32    getSessionMemoryContent,
 33    waitForSessionMemoryExtraction,
 34  } from '../SessionMemory/sessionMemoryUtils.js'
 35  import {
 36    annotateBoundaryWithPreservedSegment,
 37    buildPostCompactMessages,
 38    type CompactionResult,
 39    createPlanAttachmentIfNeeded,
 40  } from './compact.js'
 41  import { estimateMessageTokens } from './microCompact.js'
 42  import { getCompactUserSummaryMessage } from './prompt.js'
 43  
 44  /**
 45   * Configuration for session memory compaction thresholds
 46   */
 47  export type SessionMemoryCompactConfig = {
 48    /** Minimum tokens to preserve after compaction */
 49    minTokens: number
 50    /** Minimum number of messages with text blocks to keep */
 51    minTextBlockMessages: number
 52    /** Maximum tokens to preserve after compaction (hard cap) */
 53    maxTokens: number
 54  }
 55  
 56  // Default configuration values (exported for use in tests)
 57  export const DEFAULT_SM_COMPACT_CONFIG: SessionMemoryCompactConfig = {
 58    minTokens: 10_000,
 59    minTextBlockMessages: 5,
 60    maxTokens: 40_000,
 61  }
 62  
 63  // Current configuration (starts with defaults)
 64  let smCompactConfig: SessionMemoryCompactConfig = {
 65    ...DEFAULT_SM_COMPACT_CONFIG,
 66  }
 67  
 68  // Track whether config has been initialized from remote
 69  let configInitialized = false
 70  
 71  /**
 72   * Set the session memory compact configuration
 73   */
 74  export function setSessionMemoryCompactConfig(
 75    config: Partial<SessionMemoryCompactConfig>,
 76  ): void {
 77    smCompactConfig = {
 78      ...smCompactConfig,
 79      ...config,
 80    }
 81  }
 82  
 83  /**
 84   * Get the current session memory compact configuration
 85   */
 86  export function getSessionMemoryCompactConfig(): SessionMemoryCompactConfig {
 87    return { ...smCompactConfig }
 88  }
 89  
 90  /**
 91   * Reset config state (useful for testing)
 92   */
 93  export function resetSessionMemoryCompactConfig(): void {
 94    smCompactConfig = { ...DEFAULT_SM_COMPACT_CONFIG }
 95    configInitialized = false
 96  }
 97  
 98  /**
 99   * Initialize configuration from remote config (GrowthBook).
100   * Only fetches once per session - subsequent calls return immediately.
101   */
102  async function initSessionMemoryCompactConfig(): Promise<void> {
103    if (configInitialized) {
104      return
105    }
106    configInitialized = true
107  
108    // Load config from GrowthBook, merging with defaults
109    const remoteConfig = await getDynamicConfig_BLOCKS_ON_INIT<
110      Partial<SessionMemoryCompactConfig>
111    >('tengu_sm_compact_config', {})
112  
113    // Only use remote values if they are explicitly set (positive numbers)
114    // This ensures sensible defaults aren't overridden by zero values
115    const config: SessionMemoryCompactConfig = {
116      minTokens:
117        remoteConfig.minTokens && remoteConfig.minTokens > 0
118          ? remoteConfig.minTokens
119          : DEFAULT_SM_COMPACT_CONFIG.minTokens,
120      minTextBlockMessages:
121        remoteConfig.minTextBlockMessages && remoteConfig.minTextBlockMessages > 0
122          ? remoteConfig.minTextBlockMessages
123          : DEFAULT_SM_COMPACT_CONFIG.minTextBlockMessages,
124      maxTokens:
125        remoteConfig.maxTokens && remoteConfig.maxTokens > 0
126          ? remoteConfig.maxTokens
127          : DEFAULT_SM_COMPACT_CONFIG.maxTokens,
128    }
129    setSessionMemoryCompactConfig(config)
130  }
131  
132  /**
133   * Check if a message contains text blocks (text content for user/assistant interaction)
134   */
135  export function hasTextBlocks(message: Message): boolean {
136    if (message.type === 'assistant') {
137      const content = message.message.content
138      return content.some(block => block.type === 'text')
139    }
140    if (message.type === 'user') {
141      const content = message.message.content
142      if (typeof content === 'string') {
143        return content.length > 0
144      }
145      if (Array.isArray(content)) {
146        return content.some(block => block.type === 'text')
147      }
148    }
149    return false
150  }
151  
152  /**
153   * Check if a message contains tool_result blocks and return their tool_use_ids
154   */
155  function getToolResultIds(message: Message): string[] {
156    if (message.type !== 'user') {
157      return []
158    }
159    const content = message.message.content
160    if (!Array.isArray(content)) {
161      return []
162    }
163    const ids: string[] = []
164    for (const block of content) {
165      if (block.type === 'tool_result') {
166        ids.push(block.tool_use_id)
167      }
168    }
169    return ids
170  }
171  
172  /**
173   * Check if a message contains tool_use blocks with any of the given ids
174   */
175  function hasToolUseWithIds(message: Message, toolUseIds: Set<string>): boolean {
176    if (message.type !== 'assistant') {
177      return false
178    }
179    const content = message.message.content
180    if (!Array.isArray(content)) {
181      return false
182    }
183    return content.some(
184      block => block.type === 'tool_use' && toolUseIds.has(block.id),
185    )
186  }
187  
188  /**
189   * Adjust the start index to ensure we don't split tool_use/tool_result pairs
190   * or thinking blocks that share the same message.id with kept assistant messages.
191   *
192   * If ANY message we're keeping contains tool_result blocks, we need to
193   * include the preceding assistant message(s) that contain the matching tool_use blocks.
194   *
195   * Additionally, if ANY assistant message in the kept range has the same message.id
196   * as a preceding assistant message (which may contain thinking blocks), we need to
197   * include those messages so they can be properly merged by normalizeMessagesForAPI.
198   *
199   * This handles the case where streaming yields separate messages per content block
200   * (thinking, tool_use, etc.) with the same message.id but different uuids. If the
201   * startIndex lands on one of these streaming messages, we need to look at ALL kept
202   * messages for tool_results, not just the first one.
203   *
204   * Example bug scenarios this fixes:
205   *
206   * Tool pair scenario:
207   *   Session storage (before compaction):
208   *     Index N:   assistant, message.id: X, content: [thinking]
209   *     Index N+1: assistant, message.id: X, content: [tool_use: ORPHAN_ID]
210   *     Index N+2: assistant, message.id: X, content: [tool_use: VALID_ID]
211   *     Index N+3: user, content: [tool_result: ORPHAN_ID, tool_result: VALID_ID]
212   *
213   *   If startIndex = N+2:
214   *     - Old code: checked only message N+2 for tool_results, found none, returned N+2
215   *     - After slicing and normalizeMessagesForAPI merging by message.id:
216   *       msg[1]: assistant with [tool_use: VALID_ID]  (ORPHAN tool_use was excluded!)
217   *       msg[2]: user with [tool_result: ORPHAN_ID, tool_result: VALID_ID]
218   *     - API error: orphan tool_result references non-existent tool_use
219   *
220   * Thinking block scenario:
221   *   Session storage (before compaction):
222   *     Index N:   assistant, message.id: X, content: [thinking]
223   *     Index N+1: assistant, message.id: X, content: [tool_use: ID]
224   *     Index N+2: user, content: [tool_result: ID]
225   *
226   *   If startIndex = N+1:
227   *     - Without this fix: thinking block at N is excluded
228   *     - After normalizeMessagesForAPI: thinking block is lost (no message to merge with)
229   *
230   *   Fixed code: detects that message N+1 has same message.id as N, adjusts to N.
231   */
232  export function adjustIndexToPreserveAPIInvariants(
233    messages: Message[],
234    startIndex: number,
235  ): number {
236    if (startIndex <= 0 || startIndex >= messages.length) {
237      return startIndex
238    }
239  
240    let adjustedIndex = startIndex
241  
242    // Step 1: Handle tool_use/tool_result pairs
243    // Collect tool_result IDs from ALL messages in the kept range
244    const allToolResultIds: string[] = []
245    for (let i = startIndex; i < messages.length; i++) {
246      allToolResultIds.push(...getToolResultIds(messages[i]!))
247    }
248  
249    if (allToolResultIds.length > 0) {
250      // Collect tool_use IDs already in the kept range
251      const toolUseIdsInKeptRange = new Set<string>()
252      for (let i = adjustedIndex; i < messages.length; i++) {
253        const msg = messages[i]!
254        if (msg.type === 'assistant' && Array.isArray(msg.message.content)) {
255          for (const block of msg.message.content) {
256            if (block.type === 'tool_use') {
257              toolUseIdsInKeptRange.add(block.id)
258            }
259          }
260        }
261      }
262  
263      // Only look for tool_uses that are NOT already in the kept range
264      const neededToolUseIds = new Set(
265        allToolResultIds.filter(id => !toolUseIdsInKeptRange.has(id)),
266      )
267  
268      // Find the assistant message(s) with matching tool_use blocks
269      for (let i = adjustedIndex - 1; i >= 0 && neededToolUseIds.size > 0; i--) {
270        const message = messages[i]!
271        if (hasToolUseWithIds(message, neededToolUseIds)) {
272          adjustedIndex = i
273          // Remove found tool_use_ids from the set
274          if (
275            message.type === 'assistant' &&
276            Array.isArray(message.message.content)
277          ) {
278            for (const block of message.message.content) {
279              if (block.type === 'tool_use' && neededToolUseIds.has(block.id)) {
280                neededToolUseIds.delete(block.id)
281              }
282            }
283          }
284        }
285      }
286    }
287  
288    // Step 2: Handle thinking blocks that share message.id with kept assistant messages
289    // Collect all message.ids from assistant messages in the kept range
290    const messageIdsInKeptRange = new Set<string>()
291    for (let i = adjustedIndex; i < messages.length; i++) {
292      const msg = messages[i]!
293      if (msg.type === 'assistant' && msg.message.id) {
294        messageIdsInKeptRange.add(msg.message.id)
295      }
296    }
297  
298    // Look backwards for assistant messages with the same message.id that are not in the kept range
299    // These may contain thinking blocks that need to be merged by normalizeMessagesForAPI
300    for (let i = adjustedIndex - 1; i >= 0; i--) {
301      const message = messages[i]!
302      if (
303        message.type === 'assistant' &&
304        message.message.id &&
305        messageIdsInKeptRange.has(message.message.id)
306      ) {
307        // This message has the same message.id as one in the kept range
308        // Include it so thinking blocks can be properly merged
309        adjustedIndex = i
310      }
311    }
312  
313    return adjustedIndex
314  }
315  
316  /**
317   * Calculate the starting index for messages to keep after compaction.
318   * Starts from lastSummarizedMessageId, then expands backwards to meet minimums:
319   * - At least config.minTokens tokens
320   * - At least config.minTextBlockMessages messages with text blocks
321   * Stops expanding if config.maxTokens is reached.
322   * Also ensures tool_use/tool_result pairs are not split.
323   */
324  export function calculateMessagesToKeepIndex(
325    messages: Message[],
326    lastSummarizedIndex: number,
327  ): number {
328    if (messages.length === 0) {
329      return 0
330    }
331  
332    const config = getSessionMemoryCompactConfig()
333  
334    // Start from the message after lastSummarizedIndex
335    // If lastSummarizedIndex is -1 (not found) or messages.length (no summarized id),
336    // we start with no messages kept
337    let startIndex =
338      lastSummarizedIndex >= 0 ? lastSummarizedIndex + 1 : messages.length
339  
340    // Calculate current tokens and text-block message count from startIndex to end
341    let totalTokens = 0
342    let textBlockMessageCount = 0
343    for (let i = startIndex; i < messages.length; i++) {
344      const msg = messages[i]!
345      totalTokens += estimateMessageTokens([msg])
346      if (hasTextBlocks(msg)) {
347        textBlockMessageCount++
348      }
349    }
350  
351    // Check if we already hit the max cap
352    if (totalTokens >= config.maxTokens) {
353      return adjustIndexToPreserveAPIInvariants(messages, startIndex)
354    }
355  
356    // Check if we already meet both minimums
357    if (
358      totalTokens >= config.minTokens &&
359      textBlockMessageCount >= config.minTextBlockMessages
360    ) {
361      return adjustIndexToPreserveAPIInvariants(messages, startIndex)
362    }
363  
364    // Expand backwards until we meet both minimums or hit max cap.
365    // Floor at the last boundary: the preserved-segment chain has a disk
366    // discontinuity there (att[0]→summary shortcut from dedup-skip), which
367    // would let the loader's tail→head walk bypass inner preserved messages
368    // and then prune them. Reactive compact already slices at the boundary
369    // via getMessagesAfterCompactBoundary; this is the same invariant.
370    const idx = messages.findLastIndex(m => isCompactBoundaryMessage(m))
371    const floor = idx === -1 ? 0 : idx + 1
372    for (let i = startIndex - 1; i >= floor; i--) {
373      const msg = messages[i]!
374      const msgTokens = estimateMessageTokens([msg])
375      totalTokens += msgTokens
376      if (hasTextBlocks(msg)) {
377        textBlockMessageCount++
378      }
379      startIndex = i
380  
381      // Stop if we hit the max cap
382      if (totalTokens >= config.maxTokens) {
383        break
384      }
385  
386      // Stop if we meet both minimums
387      if (
388        totalTokens >= config.minTokens &&
389        textBlockMessageCount >= config.minTextBlockMessages
390      ) {
391        break
392      }
393    }
394  
395    // Adjust for tool pairs
396    return adjustIndexToPreserveAPIInvariants(messages, startIndex)
397  }
398  
399  /**
400   * Check if we should use session memory for compaction
401   * Uses cached gate values to avoid blocking on Statsig initialization
402   */
403  export function shouldUseSessionMemoryCompaction(): boolean {
404    // Allow env var override for eval runs and testing
405    if (isEnvTruthy(process.env.ENABLE_CLAUDE_CODE_SM_COMPACT)) {
406      return true
407    }
408    if (isEnvTruthy(process.env.DISABLE_CLAUDE_CODE_SM_COMPACT)) {
409      return false
410    }
411  
412    const sessionMemoryFlag = getFeatureValue_CACHED_MAY_BE_STALE(
413      'tengu_session_memory',
414      false,
415    )
416    const smCompactFlag = getFeatureValue_CACHED_MAY_BE_STALE(
417      'tengu_sm_compact',
418      false,
419    )
420    const shouldUse = sessionMemoryFlag && smCompactFlag
421  
422    // Log flag states for debugging (ant-only to avoid noise in external logs)
423    if (process.env.USER_TYPE === 'ant') {
424      logEvent('tengu_sm_compact_flag_check', {
425        tengu_session_memory: sessionMemoryFlag,
426        tengu_sm_compact: smCompactFlag,
427        should_use: shouldUse,
428      })
429    }
430  
431    return shouldUse
432  }
433  
434  /**
435   * Create a CompactionResult from session memory
436   */
437  function createCompactionResultFromSessionMemory(
438    messages: Message[],
439    sessionMemory: string,
440    messagesToKeep: Message[],
441    hookResults: HookResultMessage[],
442    transcriptPath: string,
443    agentId?: AgentId,
444  ): CompactionResult {
445    const preCompactTokenCount = tokenCountFromLastAPIResponse(messages)
446  
447    const boundaryMarker = createCompactBoundaryMessage(
448      'auto',
449      preCompactTokenCount ?? 0,
450      messages[messages.length - 1]?.uuid,
451    )
452    const preCompactDiscovered = extractDiscoveredToolNames(messages)
453    if (preCompactDiscovered.size > 0) {
454      boundaryMarker.compactMetadata.preCompactDiscoveredTools = [
455        ...preCompactDiscovered,
456      ].sort()
457    }
458  
459    // Truncate oversized sections to prevent session memory from consuming
460    // the entire post-compact token budget
461    const { truncatedContent, wasTruncated } =
462      truncateSessionMemoryForCompact(sessionMemory)
463  
464    let summaryContent = getCompactUserSummaryMessage(
465      truncatedContent,
466      true,
467      transcriptPath,
468      true,
469    )
470  
471    if (wasTruncated) {
472      const memoryPath = getSessionMemoryPath()
473      summaryContent += `\n\nSome session memory sections were truncated for length. The full session memory can be viewed at: ${memoryPath}`
474    }
475  
476    const summaryMessages = [
477      createUserMessage({
478        content: summaryContent,
479        isCompactSummary: true,
480        isVisibleInTranscriptOnly: true,
481      }),
482    ]
483  
484    const planAttachment = createPlanAttachmentIfNeeded(agentId)
485    const attachments = planAttachment ? [planAttachment] : []
486  
487    return {
488      boundaryMarker: annotateBoundaryWithPreservedSegment(
489        boundaryMarker,
490        summaryMessages[summaryMessages.length - 1]!.uuid,
491        messagesToKeep,
492      ),
493      summaryMessages,
494      attachments,
495      hookResults,
496      messagesToKeep,
497      preCompactTokenCount,
498      // SM-compact has no compact-API-call, so postCompactTokenCount (kept for
499      // event continuity) and truePostCompactTokenCount converge to the same value.
500      postCompactTokenCount: estimateMessageTokens(summaryMessages),
501      truePostCompactTokenCount: estimateMessageTokens(summaryMessages),
502    }
503  }
504  
505  /**
506   * Try to use session memory for compaction instead of traditional compaction.
507   * Returns null if session memory compaction cannot be used.
508   *
509   * Handles two scenarios:
510   * 1. Normal case: lastSummarizedMessageId is set, keep only messages after that ID
511   * 2. Resumed session: lastSummarizedMessageId is not set but session memory has content,
512   *    keep all messages but use session memory as the summary
513   */
514  export async function trySessionMemoryCompaction(
515    messages: Message[],
516    agentId?: AgentId,
517    autoCompactThreshold?: number,
518  ): Promise<CompactionResult | null> {
519    if (!shouldUseSessionMemoryCompaction()) {
520      return null
521    }
522  
523    // Initialize config from remote (only fetches once)
524    await initSessionMemoryCompactConfig()
525  
526    // Wait for any in-progress session memory extraction to complete (with timeout)
527    await waitForSessionMemoryExtraction()
528  
529    const lastSummarizedMessageId = getLastSummarizedMessageId()
530    const sessionMemory = await getSessionMemoryContent()
531  
532    // No session memory file exists at all
533    if (!sessionMemory) {
534      logEvent('tengu_sm_compact_no_session_memory', {})
535      return null
536    }
537  
538    // Session memory exists but matches the template (no actual content extracted)
539    // Fall back to legacy compact behavior
540    if (await isSessionMemoryEmpty(sessionMemory)) {
541      logEvent('tengu_sm_compact_empty_template', {})
542      return null
543    }
544  
545    try {
546      let lastSummarizedIndex: number
547  
548      if (lastSummarizedMessageId) {
549        // Normal case: we know exactly which messages have been summarized
550        lastSummarizedIndex = messages.findIndex(
551          msg => msg.uuid === lastSummarizedMessageId,
552        )
553  
554        if (lastSummarizedIndex === -1) {
555          // The summarized message ID doesn't exist in current messages
556          // This can happen if messages were modified - fall back to legacy compact
557          // since we can't determine the boundary between summarized and unsummarized messages
558          logEvent('tengu_sm_compact_summarized_id_not_found', {})
559          return null
560        }
561      } else {
562        // Resumed session case: session memory has content but we don't know the boundary
563        // Set lastSummarizedIndex to last message so startIndex becomes messages.length (no messages kept initially)
564        lastSummarizedIndex = messages.length - 1
565        logEvent('tengu_sm_compact_resumed_session', {})
566      }
567  
568      // Calculate the starting index for messages to keep
569      // This starts from lastSummarizedIndex, expands to meet minimums,
570      // and adjusts to not split tool_use/tool_result pairs
571      const startIndex = calculateMessagesToKeepIndex(
572        messages,
573        lastSummarizedIndex,
574      )
575      // Filter out old compact boundary messages from messagesToKeep.
576      // After REPL pruning, old boundaries re-yielded from messagesToKeep would
577      // trigger an unwanted second prune (isCompactBoundaryMessage returns true),
578      // discarding the new boundary and summary.
579      const messagesToKeep = messages
580        .slice(startIndex)
581        .filter(m => !isCompactBoundaryMessage(m))
582  
583      // Run session start hooks to restore CLAUDE.md and other context
584      const hookResults = await processSessionStartHooks('compact', {
585        model: getMainLoopModel(),
586      })
587  
588      // Get transcript path for the summary message
589      const transcriptPath = getTranscriptPath()
590  
591      const compactionResult = createCompactionResultFromSessionMemory(
592        messages,
593        sessionMemory,
594        messagesToKeep,
595        hookResults,
596        transcriptPath,
597        agentId,
598      )
599  
600      const postCompactMessages = buildPostCompactMessages(compactionResult)
601  
602      const postCompactTokenCount = estimateMessageTokens(postCompactMessages)
603  
604      // Only check threshold if one was provided (for autocompact)
605      if (
606        autoCompactThreshold !== undefined &&
607        postCompactTokenCount >= autoCompactThreshold
608      ) {
609        logEvent('tengu_sm_compact_threshold_exceeded', {
610          postCompactTokenCount,
611          autoCompactThreshold,
612        })
613        return null
614      }
615  
616      return {
617        ...compactionResult,
618        postCompactTokenCount,
619        truePostCompactTokenCount: postCompactTokenCount,
620      }
621    } catch (error) {
622      // Use logEvent instead of logError since errors here are expected
623      // (e.g., file not found, path issues) and shouldn't go to error logs
624      logEvent('tengu_sm_compact_error', {})
625      if (process.env.USER_TYPE === 'ant') {
626        logForDebugging(`Session memory compaction error: ${errorMessage(error)}`)
627      }
628      return null
629    }
630  }