/ services / compact / compact.ts
compact.ts
   1  import { feature } from 'bun:bundle'
   2  import type { UUID } from 'crypto'
   3  import uniqBy from 'lodash-es/uniqBy.js'
   4  
   5  /* eslint-disable @typescript-eslint/no-require-imports */
   6  const sessionTranscriptModule = feature('KAIROS')
   7    ? (require('../sessionTranscript/sessionTranscript.js') as typeof import('../sessionTranscript/sessionTranscript.js'))
   8    : null
   9  
  10  import { APIUserAbortError } from '@anthropic-ai/sdk'
  11  import { markPostCompaction } from 'src/bootstrap/state.js'
  12  import { getInvokedSkillsForAgent } from '../../bootstrap/state.js'
  13  import type { QuerySource } from '../../constants/querySource.js'
  14  import type { CanUseToolFn } from '../../hooks/useCanUseTool.js'
  15  import type { Tool, ToolUseContext } from '../../Tool.js'
  16  import type { LocalAgentTaskState } from '../../tasks/LocalAgentTask/LocalAgentTask.js'
  17  import { FileReadTool } from '../../tools/FileReadTool/FileReadTool.js'
  18  import {
  19    FILE_READ_TOOL_NAME,
  20    FILE_UNCHANGED_STUB,
  21  } from '../../tools/FileReadTool/prompt.js'
  22  import { ToolSearchTool } from '../../tools/ToolSearchTool/ToolSearchTool.js'
  23  import type { AgentId } from '../../types/ids.js'
  24  import type {
  25    AssistantMessage,
  26    AttachmentMessage,
  27    HookResultMessage,
  28    Message,
  29    PartialCompactDirection,
  30    SystemCompactBoundaryMessage,
  31    SystemMessage,
  32    UserMessage,
  33  } from '../../types/message.js'
  34  import {
  35    createAttachmentMessage,
  36    generateFileAttachment,
  37    getAgentListingDeltaAttachment,
  38    getDeferredToolsDeltaAttachment,
  39    getMcpInstructionsDeltaAttachment,
  40  } from '../../utils/attachments.js'
  41  import { getMemoryPath } from '../../utils/config.js'
  42  import { COMPACT_MAX_OUTPUT_TOKENS } from '../../utils/context.js'
  43  import {
  44    analyzeContext,
  45    tokenStatsToStatsigMetrics,
  46  } from '../../utils/contextAnalysis.js'
  47  import { logForDebugging } from '../../utils/debug.js'
  48  import { hasExactErrorMessage } from '../../utils/errors.js'
  49  import { cacheToObject } from '../../utils/fileStateCache.js'
  50  import {
  51    type CacheSafeParams,
  52    runForkedAgent,
  53  } from '../../utils/forkedAgent.js'
  54  import {
  55    executePostCompactHooks,
  56    executePreCompactHooks,
  57  } from '../../utils/hooks.js'
  58  import { logError } from '../../utils/log.js'
  59  import { MEMORY_TYPE_VALUES } from '../../utils/memory/types.js'
  60  import {
  61    createCompactBoundaryMessage,
  62    createUserMessage,
  63    getAssistantMessageText,
  64    getLastAssistantMessage,
  65    getMessagesAfterCompactBoundary,
  66    isCompactBoundaryMessage,
  67    normalizeMessagesForAPI,
  68  } from '../../utils/messages.js'
  69  import { expandPath } from '../../utils/path.js'
  70  import { getPlan, getPlanFilePath } from '../../utils/plans.js'
  71  import {
  72    isSessionActivityTrackingActive,
  73    sendSessionActivitySignal,
  74  } from '../../utils/sessionActivity.js'
  75  import { processSessionStartHooks } from '../../utils/sessionStart.js'
  76  import {
  77    getTranscriptPath,
  78    reAppendSessionMetadata,
  79  } from '../../utils/sessionStorage.js'
  80  import { sleep } from '../../utils/sleep.js'
  81  import { jsonStringify } from '../../utils/slowOperations.js'
  82  /* eslint-enable @typescript-eslint/no-require-imports */
  83  import { asSystemPrompt } from '../../utils/systemPromptType.js'
  84  import { getTaskOutputPath } from '../../utils/task/diskOutput.js'
  85  import {
  86    getTokenUsage,
  87    tokenCountFromLastAPIResponse,
  88    tokenCountWithEstimation,
  89  } from '../../utils/tokens.js'
  90  import {
  91    extractDiscoveredToolNames,
  92    isToolSearchEnabled,
  93  } from '../../utils/toolSearch.js'
  94  import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
  95  import {
  96    type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  97    logEvent,
  98  } from '../analytics/index.js'
  99  import {
 100    getMaxOutputTokensForModel,
 101    queryModelWithStreaming,
 102  } from '../api/claude.js'
 103  import {
 104    getPromptTooLongTokenGap,
 105    PROMPT_TOO_LONG_ERROR_MESSAGE,
 106    startsWithApiErrorPrefix,
 107  } from '../api/errors.js'
 108  import { notifyCompaction } from '../api/promptCacheBreakDetection.js'
 109  import { getRetryDelay } from '../api/withRetry.js'
 110  import { logPermissionContextForAnts } from '../internalLogging.js'
 111  import {
 112    roughTokenCountEstimation,
 113    roughTokenCountEstimationForMessages,
 114  } from '../tokenEstimation.js'
 115  import { groupMessagesByApiRound } from './grouping.js'
 116  import {
 117    getCompactPrompt,
 118    getCompactUserSummaryMessage,
 119    getPartialCompactPrompt,
 120  } from './prompt.js'
 121  
 122  export const POST_COMPACT_MAX_FILES_TO_RESTORE = 5
 123  export const POST_COMPACT_TOKEN_BUDGET = 50_000
 124  export const POST_COMPACT_MAX_TOKENS_PER_FILE = 5_000
 125  // Skills can be large (verify=18.7KB, claude-api=20.1KB). Previously re-injected
 126  // unbounded on every compact → 5-10K tok/compact. Per-skill truncation beats
 127  // dropping — instructions at the top of a skill file are usually the critical
 128  // part. Budget sized to hold ~5 skills at the per-skill cap.
 129  export const POST_COMPACT_MAX_TOKENS_PER_SKILL = 5_000
 130  export const POST_COMPACT_SKILLS_TOKEN_BUDGET = 25_000
 131  const MAX_COMPACT_STREAMING_RETRIES = 2
 132  
 133  /**
 134   * Strip image blocks from user messages before sending for compaction.
 135   * Images are not needed for generating a conversation summary and can
 136   * cause the compaction API call itself to hit the prompt-too-long limit,
 137   * especially in CCD sessions where users frequently attach images.
 138   * Replaces image blocks with a text marker so the summary still notes
 139   * that an image was shared.
 140   *
 141   * Note: Only user messages contain images (either directly attached or within
 142   * tool_result content from tools). Assistant messages contain text, tool_use,
 143   * and thinking blocks but not images.
 144   */
 145  export function stripImagesFromMessages(messages: Message[]): Message[] {
 146    return messages.map(message => {
 147      if (message.type !== 'user') {
 148        return message
 149      }
 150  
 151      const content = message.message.content
 152      if (!Array.isArray(content)) {
 153        return message
 154      }
 155  
 156      let hasMediaBlock = false
 157      const newContent = content.flatMap(block => {
 158        if (block.type === 'image') {
 159          hasMediaBlock = true
 160          return [{ type: 'text' as const, text: '[image]' }]
 161        }
 162        if (block.type === 'document') {
 163          hasMediaBlock = true
 164          return [{ type: 'text' as const, text: '[document]' }]
 165        }
 166        // Also strip images/documents nested inside tool_result content arrays
 167        if (block.type === 'tool_result' && Array.isArray(block.content)) {
 168          let toolHasMedia = false
 169          const newToolContent = block.content.map(item => {
 170            if (item.type === 'image') {
 171              toolHasMedia = true
 172              return { type: 'text' as const, text: '[image]' }
 173            }
 174            if (item.type === 'document') {
 175              toolHasMedia = true
 176              return { type: 'text' as const, text: '[document]' }
 177            }
 178            return item
 179          })
 180          if (toolHasMedia) {
 181            hasMediaBlock = true
 182            return [{ ...block, content: newToolContent }]
 183          }
 184        }
 185        return [block]
 186      })
 187  
 188      if (!hasMediaBlock) {
 189        return message
 190      }
 191  
 192      return {
 193        ...message,
 194        message: {
 195          ...message.message,
 196          content: newContent,
 197        },
 198      } as typeof message
 199    })
 200  }
 201  
 202  /**
 203   * Strip attachment types that are re-injected post-compaction anyway.
 204   * skill_discovery/skill_listing are re-surfaced by resetSentSkillNames()
 205   * + the next turn's discovery signal, so feeding them to the summarizer
 206   * wastes tokens and pollutes the summary with stale skill suggestions.
 207   *
 208   * No-op when EXPERIMENTAL_SKILL_SEARCH is off (the attachment types
 209   * don't exist on external builds).
 210   */
 211  export function stripReinjectedAttachments(messages: Message[]): Message[] {
 212    if (feature('EXPERIMENTAL_SKILL_SEARCH')) {
 213      return messages.filter(
 214        m =>
 215          !(
 216            m.type === 'attachment' &&
 217            (m.attachment.type === 'skill_discovery' ||
 218              m.attachment.type === 'skill_listing')
 219          ),
 220      )
 221    }
 222    return messages
 223  }
 224  
 225  export const ERROR_MESSAGE_NOT_ENOUGH_MESSAGES =
 226    'Not enough messages to compact.'
 227  const MAX_PTL_RETRIES = 3
 228  const PTL_RETRY_MARKER = '[earlier conversation truncated for compaction retry]'
 229  
 230  /**
 231   * Drops the oldest API-round groups from messages until tokenGap is covered.
 232   * Falls back to dropping 20% of groups when the gap is unparseable (some
 233   * Vertex/Bedrock error formats). Returns null when nothing can be dropped
 234   * without leaving an empty summarize set.
 235   *
 236   * This is the last-resort escape hatch for CC-1180 — when the compact request
 237   * itself hits prompt-too-long, the user is otherwise stuck. Dropping the
 238   * oldest context is lossy but unblocks them. The reactive-compact path
 239   * (compactMessages.ts) has the proper retry loop that peels from the tail;
 240   * this helper is the dumb-but-safe fallback for the proactive/manual path
 241   * that wasn't migrated in bfdb472f's unification.
 242   */
 243  export function truncateHeadForPTLRetry(
 244    messages: Message[],
 245    ptlResponse: AssistantMessage,
 246  ): Message[] | null {
 247    // Strip our own synthetic marker from a previous retry before grouping.
 248    // Otherwise it becomes its own group 0 and the 20% fallback stalls
 249    // (drops only the marker, re-adds it, zero progress on retry 2+).
 250    const input =
 251      messages[0]?.type === 'user' &&
 252      messages[0].isMeta &&
 253      messages[0].message.content === PTL_RETRY_MARKER
 254        ? messages.slice(1)
 255        : messages
 256  
 257    const groups = groupMessagesByApiRound(input)
 258    if (groups.length < 2) return null
 259  
 260    const tokenGap = getPromptTooLongTokenGap(ptlResponse)
 261    let dropCount: number
 262    if (tokenGap !== undefined) {
 263      let acc = 0
 264      dropCount = 0
 265      for (const g of groups) {
 266        acc += roughTokenCountEstimationForMessages(g)
 267        dropCount++
 268        if (acc >= tokenGap) break
 269      }
 270    } else {
 271      dropCount = Math.max(1, Math.floor(groups.length * 0.2))
 272    }
 273  
 274    // Keep at least one group so there's something to summarize.
 275    dropCount = Math.min(dropCount, groups.length - 1)
 276    if (dropCount < 1) return null
 277  
 278    const sliced = groups.slice(dropCount).flat()
 279    // groupMessagesByApiRound puts the preamble in group 0 and starts every
 280    // subsequent group with an assistant message. Dropping group 0 leaves an
 281    // assistant-first sequence which the API rejects (first message must be
 282    // role=user). Prepend a synthetic user marker — ensureToolResultPairing
 283    // already handles any orphaned tool_results this creates.
 284    if (sliced[0]?.type === 'assistant') {
 285      return [
 286        createUserMessage({ content: PTL_RETRY_MARKER, isMeta: true }),
 287        ...sliced,
 288      ]
 289    }
 290    return sliced
 291  }
 292  
 293  export const ERROR_MESSAGE_PROMPT_TOO_LONG =
 294    'Conversation too long. Press esc twice to go up a few messages and try again.'
 295  export const ERROR_MESSAGE_USER_ABORT = 'API Error: Request was aborted.'
 296  export const ERROR_MESSAGE_INCOMPLETE_RESPONSE =
 297    'Compaction interrupted · This may be due to network issues — please try again.'
 298  
 299  export interface CompactionResult {
 300    boundaryMarker: SystemMessage
 301    summaryMessages: UserMessage[]
 302    attachments: AttachmentMessage[]
 303    hookResults: HookResultMessage[]
 304    messagesToKeep?: Message[]
 305    userDisplayMessage?: string
 306    preCompactTokenCount?: number
 307    postCompactTokenCount?: number
 308    truePostCompactTokenCount?: number
 309    compactionUsage?: ReturnType<typeof getTokenUsage>
 310  }
 311  
 312  /**
 313   * Diagnosis context passed from autoCompactIfNeeded into compactConversation.
 314   * Lets the tengu_compact event disambiguate same-chain loops (H2) from
 315   * cross-agent (H1/H5) and manual-vs-auto (H3) compactions without joins.
 316   */
 317  export type RecompactionInfo = {
 318    isRecompactionInChain: boolean
 319    turnsSincePreviousCompact: number
 320    previousCompactTurnId?: string
 321    autoCompactThreshold: number
 322    querySource?: QuerySource
 323  }
 324  
 325  /**
 326   * Build the base post-compact messages array from a CompactionResult.
 327   * This ensures consistent ordering across all compaction paths.
 328   * Order: boundaryMarker, summaryMessages, messagesToKeep, attachments, hookResults
 329   */
 330  export function buildPostCompactMessages(result: CompactionResult): Message[] {
 331    return [
 332      result.boundaryMarker,
 333      ...result.summaryMessages,
 334      ...(result.messagesToKeep ?? []),
 335      ...result.attachments,
 336      ...result.hookResults,
 337    ]
 338  }
 339  
 340  /**
 341   * Annotate a compact boundary with relink metadata for messagesToKeep.
 342   * Preserved messages keep their original parentUuids on disk (dedup-skipped);
 343   * the loader uses this to patch head→anchor and anchor's-other-children→tail.
 344   *
 345   * `anchorUuid` = what sits immediately before keep[0] in the desired chain:
 346   *   - suffix-preserving (reactive/session-memory): last summary message
 347   *   - prefix-preserving (partial compact): the boundary itself
 348   */
 349  export function annotateBoundaryWithPreservedSegment(
 350    boundary: SystemCompactBoundaryMessage,
 351    anchorUuid: UUID,
 352    messagesToKeep: readonly Message[] | undefined,
 353  ): SystemCompactBoundaryMessage {
 354    const keep = messagesToKeep ?? []
 355    if (keep.length === 0) return boundary
 356    return {
 357      ...boundary,
 358      compactMetadata: {
 359        ...boundary.compactMetadata,
 360        preservedSegment: {
 361          headUuid: keep[0]!.uuid,
 362          anchorUuid,
 363          tailUuid: keep.at(-1)!.uuid,
 364        },
 365      },
 366    }
 367  }
 368  
 369  /**
 370   * Merges user-supplied custom instructions with hook-provided instructions.
 371   * User instructions come first; hook instructions are appended.
 372   * Empty strings normalize to undefined.
 373   */
 374  export function mergeHookInstructions(
 375    userInstructions: string | undefined,
 376    hookInstructions: string | undefined,
 377  ): string | undefined {
 378    if (!hookInstructions) return userInstructions || undefined
 379    if (!userInstructions) return hookInstructions
 380    return `${userInstructions}\n\n${hookInstructions}`
 381  }
 382  
 383  /**
 384   * Creates a compact version of a conversation by summarizing older messages
 385   * and preserving recent conversation history.
 386   */
 387  export async function compactConversation(
 388    messages: Message[],
 389    context: ToolUseContext,
 390    cacheSafeParams: CacheSafeParams,
 391    suppressFollowUpQuestions: boolean,
 392    customInstructions?: string,
 393    isAutoCompact: boolean = false,
 394    recompactionInfo?: RecompactionInfo,
 395  ): Promise<CompactionResult> {
 396    try {
 397      if (messages.length === 0) {
 398        throw new Error(ERROR_MESSAGE_NOT_ENOUGH_MESSAGES)
 399      }
 400  
 401      const preCompactTokenCount = tokenCountWithEstimation(messages)
 402  
 403      const appState = context.getAppState()
 404      void logPermissionContextForAnts(appState.toolPermissionContext, 'summary')
 405  
 406      context.onCompactProgress?.({
 407        type: 'hooks_start',
 408        hookType: 'pre_compact',
 409      })
 410  
 411      // Execute PreCompact hooks
 412      context.setSDKStatus?.('compacting')
 413      const hookResult = await executePreCompactHooks(
 414        {
 415          trigger: isAutoCompact ? 'auto' : 'manual',
 416          customInstructions: customInstructions ?? null,
 417        },
 418        context.abortController.signal,
 419      )
 420      customInstructions = mergeHookInstructions(
 421        customInstructions,
 422        hookResult.newCustomInstructions,
 423      )
 424      const userDisplayMessage = hookResult.userDisplayMessage
 425  
 426      // Show requesting mode with up arrow and custom message
 427      context.setStreamMode?.('requesting')
 428      context.setResponseLength?.(() => 0)
 429      context.onCompactProgress?.({ type: 'compact_start' })
 430  
 431      // 3P default: true — forked-agent path reuses main conversation's prompt cache.
 432      // Experiment (Jan 2026) confirmed: false path is 98% cache miss, costs ~0.76% of
 433      // fleet cache_creation (~38B tok/day), concentrated in ephemeral envs (CCR/GHA/SDK)
 434      // with cold GB cache and 3P providers where GB is disabled. GB gate kept as kill-switch.
 435      const promptCacheSharingEnabled = getFeatureValue_CACHED_MAY_BE_STALE(
 436        'tengu_compact_cache_prefix',
 437        true,
 438      )
 439  
 440      const compactPrompt = getCompactPrompt(customInstructions)
 441      const summaryRequest = createUserMessage({
 442        content: compactPrompt,
 443      })
 444  
 445      let messagesToSummarize = messages
 446      let retryCacheSafeParams = cacheSafeParams
 447      let summaryResponse: AssistantMessage
 448      let summary: string | null
 449      let ptlAttempts = 0
 450      for (;;) {
 451        summaryResponse = await streamCompactSummary({
 452          messages: messagesToSummarize,
 453          summaryRequest,
 454          appState,
 455          context,
 456          preCompactTokenCount,
 457          cacheSafeParams: retryCacheSafeParams,
 458        })
 459        summary = getAssistantMessageText(summaryResponse)
 460        if (!summary?.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) break
 461  
 462        // CC-1180: compact request itself hit prompt-too-long. Truncate the
 463        // oldest API-round groups and retry rather than leaving the user stuck.
 464        ptlAttempts++
 465        const truncated =
 466          ptlAttempts <= MAX_PTL_RETRIES
 467            ? truncateHeadForPTLRetry(messagesToSummarize, summaryResponse)
 468            : null
 469        if (!truncated) {
 470          logEvent('tengu_compact_failed', {
 471            reason:
 472              'prompt_too_long' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 473            preCompactTokenCount,
 474            promptCacheSharingEnabled,
 475            ptlAttempts,
 476          })
 477          throw new Error(ERROR_MESSAGE_PROMPT_TOO_LONG)
 478        }
 479        logEvent('tengu_compact_ptl_retry', {
 480          attempt: ptlAttempts,
 481          droppedMessages: messagesToSummarize.length - truncated.length,
 482          remainingMessages: truncated.length,
 483        })
 484        messagesToSummarize = truncated
 485        // The forked-agent path reads from cacheSafeParams.forkContextMessages,
 486        // not the messages param — thread the truncated set through both paths.
 487        retryCacheSafeParams = {
 488          ...retryCacheSafeParams,
 489          forkContextMessages: truncated,
 490        }
 491      }
 492  
 493      if (!summary) {
 494        logForDebugging(
 495          `Compact failed: no summary text in response. Response: ${jsonStringify(summaryResponse)}`,
 496          { level: 'error' },
 497        )
 498        logEvent('tengu_compact_failed', {
 499          reason:
 500            'no_summary' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 501          preCompactTokenCount,
 502          promptCacheSharingEnabled,
 503        })
 504        throw new Error(
 505          `Failed to generate conversation summary - response did not contain valid text content`,
 506        )
 507      } else if (startsWithApiErrorPrefix(summary)) {
 508        logEvent('tengu_compact_failed', {
 509          reason:
 510            'api_error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 511          preCompactTokenCount,
 512          promptCacheSharingEnabled,
 513        })
 514        throw new Error(summary)
 515      }
 516  
 517      // Store the current file state before clearing
 518      const preCompactReadFileState = cacheToObject(context.readFileState)
 519  
 520      // Clear the cache
 521      context.readFileState.clear()
 522      context.loadedNestedMemoryPaths?.clear()
 523  
 524      // Intentionally NOT resetting sentSkillNames: re-injecting the full
 525      // skill_listing (~4K tokens) post-compact is pure cache_creation with
 526      // marginal benefit. The model still has SkillTool in its schema and
 527      // invoked_skills attachment (below) preserves used-skill content. Ants
 528      // with EXPERIMENTAL_SKILL_SEARCH already skip re-injection via the
 529      // early-return in getSkillListingAttachments.
 530  
 531      // Run async attachment generation in parallel
 532      const [fileAttachments, asyncAgentAttachments] = await Promise.all([
 533        createPostCompactFileAttachments(
 534          preCompactReadFileState,
 535          context,
 536          POST_COMPACT_MAX_FILES_TO_RESTORE,
 537        ),
 538        createAsyncAgentAttachmentsIfNeeded(context),
 539      ])
 540  
 541      const postCompactFileAttachments: AttachmentMessage[] = [
 542        ...fileAttachments,
 543        ...asyncAgentAttachments,
 544      ]
 545      const planAttachment = createPlanAttachmentIfNeeded(context.agentId)
 546      if (planAttachment) {
 547        postCompactFileAttachments.push(planAttachment)
 548      }
 549  
 550      // Add plan mode instructions if currently in plan mode, so the model
 551      // continues operating in plan mode after compaction
 552      const planModeAttachment = await createPlanModeAttachmentIfNeeded(context)
 553      if (planModeAttachment) {
 554        postCompactFileAttachments.push(planModeAttachment)
 555      }
 556  
 557      // Add skill attachment if skills were invoked in this session
 558      const skillAttachment = createSkillAttachmentIfNeeded(context.agentId)
 559      if (skillAttachment) {
 560        postCompactFileAttachments.push(skillAttachment)
 561      }
 562  
 563      // Compaction ate prior delta attachments. Re-announce from the current
 564      // state so the model has tool/instruction context on the first
 565      // post-compact turn. Empty message history → diff against nothing →
 566      // announces the full set.
 567      for (const att of getDeferredToolsDeltaAttachment(
 568        context.options.tools,
 569        context.options.mainLoopModel,
 570        [],
 571        { callSite: 'compact_full' },
 572      )) {
 573        postCompactFileAttachments.push(createAttachmentMessage(att))
 574      }
 575      for (const att of getAgentListingDeltaAttachment(context, [])) {
 576        postCompactFileAttachments.push(createAttachmentMessage(att))
 577      }
 578      for (const att of getMcpInstructionsDeltaAttachment(
 579        context.options.mcpClients,
 580        context.options.tools,
 581        context.options.mainLoopModel,
 582        [],
 583      )) {
 584        postCompactFileAttachments.push(createAttachmentMessage(att))
 585      }
 586  
 587      context.onCompactProgress?.({
 588        type: 'hooks_start',
 589        hookType: 'session_start',
 590      })
 591      // Execute SessionStart hooks after successful compaction
 592      const hookMessages = await processSessionStartHooks('compact', {
 593        model: context.options.mainLoopModel,
 594      })
 595  
 596      // Create the compact boundary marker and summary messages before the
 597      // event so we can compute the true resulting-context size.
 598      const boundaryMarker = createCompactBoundaryMessage(
 599        isAutoCompact ? 'auto' : 'manual',
 600        preCompactTokenCount ?? 0,
 601        messages.at(-1)?.uuid,
 602      )
 603      // Carry loaded-tool state — the summary doesn't preserve tool_reference
 604      // blocks, so the post-compact schema filter needs this to keep sending
 605      // already-loaded deferred tool schemas to the API.
 606      const preCompactDiscovered = extractDiscoveredToolNames(messages)
 607      if (preCompactDiscovered.size > 0) {
 608        boundaryMarker.compactMetadata.preCompactDiscoveredTools = [
 609          ...preCompactDiscovered,
 610        ].sort()
 611      }
 612  
 613      const transcriptPath = getTranscriptPath()
 614      const summaryMessages: UserMessage[] = [
 615        createUserMessage({
 616          content: getCompactUserSummaryMessage(
 617            summary,
 618            suppressFollowUpQuestions,
 619            transcriptPath,
 620          ),
 621          isCompactSummary: true,
 622          isVisibleInTranscriptOnly: true,
 623        }),
 624      ]
 625  
 626      // Previously "postCompactTokenCount" — renamed because this is the
 627      // compact API call's total usage (input_tokens ≈ preCompactTokenCount),
 628      // NOT the size of the resulting context. Kept for event-field continuity.
 629      const compactionCallTotalTokens = tokenCountFromLastAPIResponse([
 630        summaryResponse,
 631      ])
 632  
 633      // Message-payload estimate of the resulting context. The next iteration's
 634      // shouldAutoCompact will see this PLUS ~20-40K for system prompt + tools +
 635      // userContext (via API usage.input_tokens). So `willRetriggerNextTurn: true`
 636      // is a strong signal; `false` may still retrigger when this is close to threshold.
 637      const truePostCompactTokenCount = roughTokenCountEstimationForMessages([
 638        boundaryMarker,
 639        ...summaryMessages,
 640        ...postCompactFileAttachments,
 641        ...hookMessages,
 642      ])
 643  
 644      // Extract compaction API usage metrics
 645      const compactionUsage = getTokenUsage(summaryResponse)
 646  
 647      const querySourceForEvent =
 648        recompactionInfo?.querySource ?? context.options.querySource ?? 'unknown'
 649  
 650      logEvent('tengu_compact', {
 651        preCompactTokenCount,
 652        // Kept for continuity — semantically the compact API call's total usage
 653        postCompactTokenCount: compactionCallTotalTokens,
 654        truePostCompactTokenCount,
 655        autoCompactThreshold: recompactionInfo?.autoCompactThreshold ?? -1,
 656        willRetriggerNextTurn:
 657          recompactionInfo !== undefined &&
 658          truePostCompactTokenCount >= recompactionInfo.autoCompactThreshold,
 659        isAutoCompact,
 660        querySource:
 661          querySourceForEvent as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 662        queryChainId: (context.queryTracking?.chainId ??
 663          '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 664        queryDepth: context.queryTracking?.depth ?? -1,
 665        isRecompactionInChain: recompactionInfo?.isRecompactionInChain ?? false,
 666        turnsSincePreviousCompact:
 667          recompactionInfo?.turnsSincePreviousCompact ?? -1,
 668        previousCompactTurnId: (recompactionInfo?.previousCompactTurnId ??
 669          '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 670        compactionInputTokens: compactionUsage?.input_tokens,
 671        compactionOutputTokens: compactionUsage?.output_tokens,
 672        compactionCacheReadTokens: compactionUsage?.cache_read_input_tokens ?? 0,
 673        compactionCacheCreationTokens:
 674          compactionUsage?.cache_creation_input_tokens ?? 0,
 675        compactionTotalTokens: compactionUsage
 676          ? compactionUsage.input_tokens +
 677            (compactionUsage.cache_creation_input_tokens ?? 0) +
 678            (compactionUsage.cache_read_input_tokens ?? 0) +
 679            compactionUsage.output_tokens
 680          : 0,
 681        promptCacheSharingEnabled,
 682        // analyzeContext walks every content block (~11ms on a 4.5K-message
 683        // session) purely for this telemetry breakdown. Computed here, past
 684        // the compaction-API await, so the sync walk doesn't starve the
 685        // render loop before compaction even starts. Same deferral pattern
 686        // as reactiveCompact.ts.
 687        ...(() => {
 688          try {
 689            return tokenStatsToStatsigMetrics(analyzeContext(messages))
 690          } catch (error) {
 691            logError(error as Error)
 692            return {}
 693          }
 694        })(),
 695      })
 696  
 697      // Reset cache read baseline so the post-compact drop isn't flagged as a break
 698      if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
 699        notifyCompaction(
 700          context.options.querySource ?? 'compact',
 701          context.agentId,
 702        )
 703      }
 704      markPostCompaction()
 705  
 706      // Re-append session metadata (custom title, tag) so it stays within
 707      // the 16KB tail window that readLiteMetadata reads for --resume display.
 708      // Without this, enough post-compaction messages push the metadata entry
 709      // out of the window, causing --resume to show the auto-generated title
 710      // instead of the user-set session name.
 711      reAppendSessionMetadata()
 712  
 713      // Write a reduced transcript segment for the pre-compaction messages
 714      // (assistant mode only). Fire-and-forget — errors are logged internally.
 715      if (feature('KAIROS')) {
 716        void sessionTranscriptModule?.writeSessionTranscriptSegment(messages)
 717      }
 718  
 719      context.onCompactProgress?.({
 720        type: 'hooks_start',
 721        hookType: 'post_compact',
 722      })
 723      const postCompactHookResult = await executePostCompactHooks(
 724        {
 725          trigger: isAutoCompact ? 'auto' : 'manual',
 726          compactSummary: summary,
 727        },
 728        context.abortController.signal,
 729      )
 730  
 731      const combinedUserDisplayMessage = [
 732        userDisplayMessage,
 733        postCompactHookResult.userDisplayMessage,
 734      ]
 735        .filter(Boolean)
 736        .join('\n')
 737  
 738      return {
 739        boundaryMarker,
 740        summaryMessages,
 741        attachments: postCompactFileAttachments,
 742        hookResults: hookMessages,
 743        userDisplayMessage: combinedUserDisplayMessage || undefined,
 744        preCompactTokenCount,
 745        postCompactTokenCount: compactionCallTotalTokens,
 746        truePostCompactTokenCount,
 747        compactionUsage,
 748      }
 749    } catch (error) {
 750      // Only show the error notification for manual /compact.
 751      // Auto-compact failures are retried on the next turn and the
 752      // notification is confusing when compaction eventually succeeds.
 753      if (!isAutoCompact) {
 754        addErrorNotificationIfNeeded(error, context)
 755      }
 756      throw error
 757    } finally {
 758      context.setStreamMode?.('requesting')
 759      context.setResponseLength?.(() => 0)
 760      context.onCompactProgress?.({ type: 'compact_end' })
 761      context.setSDKStatus?.(null)
 762    }
 763  }
 764  
 765  /**
 766   * Performs a partial compaction around the selected message index.
 767   * Direction 'from': summarizes messages after the index, keeps earlier ones.
 768   *   Prompt cache for kept (earlier) messages is preserved.
 769   * Direction 'up_to': summarizes messages before the index, keeps later ones.
 770   *   Prompt cache is invalidated since the summary precedes the kept messages.
 771   */
 772  export async function partialCompactConversation(
 773    allMessages: Message[],
 774    pivotIndex: number,
 775    context: ToolUseContext,
 776    cacheSafeParams: CacheSafeParams,
 777    userFeedback?: string,
 778    direction: PartialCompactDirection = 'from',
 779  ): Promise<CompactionResult> {
 780    try {
 781      const messagesToSummarize =
 782        direction === 'up_to'
 783          ? allMessages.slice(0, pivotIndex)
 784          : allMessages.slice(pivotIndex)
 785      // 'up_to' must strip old compact boundaries/summaries: for 'up_to',
 786      // summary_B sits BEFORE kept, so a stale boundary_A in kept wins
 787      // findLastCompactBoundaryIndex's backward scan and drops summary_B.
 788      // 'from' keeps them: summary_B sits AFTER kept (backward scan still
 789      // works), and removing an old summary would lose its covered history.
 790      const messagesToKeep =
 791        direction === 'up_to'
 792          ? allMessages
 793              .slice(pivotIndex)
 794              .filter(
 795                m =>
 796                  m.type !== 'progress' &&
 797                  !isCompactBoundaryMessage(m) &&
 798                  !(m.type === 'user' && m.isCompactSummary),
 799              )
 800          : allMessages.slice(0, pivotIndex).filter(m => m.type !== 'progress')
 801  
 802      if (messagesToSummarize.length === 0) {
 803        throw new Error(
 804          direction === 'up_to'
 805            ? 'Nothing to summarize before the selected message.'
 806            : 'Nothing to summarize after the selected message.',
 807        )
 808      }
 809  
 810      const preCompactTokenCount = tokenCountWithEstimation(allMessages)
 811  
 812      context.onCompactProgress?.({
 813        type: 'hooks_start',
 814        hookType: 'pre_compact',
 815      })
 816  
 817      context.setSDKStatus?.('compacting')
 818      const hookResult = await executePreCompactHooks(
 819        {
 820          trigger: 'manual',
 821          customInstructions: null,
 822        },
 823        context.abortController.signal,
 824      )
 825  
 826      // Merge hook instructions with user feedback
 827      let customInstructions: string | undefined
 828      if (hookResult.newCustomInstructions && userFeedback) {
 829        customInstructions = `${hookResult.newCustomInstructions}\n\nUser context: ${userFeedback}`
 830      } else if (hookResult.newCustomInstructions) {
 831        customInstructions = hookResult.newCustomInstructions
 832      } else if (userFeedback) {
 833        customInstructions = `User context: ${userFeedback}`
 834      }
 835  
 836      context.setStreamMode?.('requesting')
 837      context.setResponseLength?.(() => 0)
 838      context.onCompactProgress?.({ type: 'compact_start' })
 839  
 840      const compactPrompt = getPartialCompactPrompt(customInstructions, direction)
 841      const summaryRequest = createUserMessage({
 842        content: compactPrompt,
 843      })
 844  
 845      const failureMetadata = {
 846        preCompactTokenCount,
 847        direction:
 848          direction as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 849        messagesSummarized: messagesToSummarize.length,
 850      }
 851  
 852      // 'up_to' prefix hits cache directly; 'from' sends all (tail wouldn't cache).
 853      // PTL retry breaks the cache prefix but unblocks the user (CC-1180).
 854      let apiMessages = direction === 'up_to' ? messagesToSummarize : allMessages
 855      let retryCacheSafeParams =
 856        direction === 'up_to'
 857          ? { ...cacheSafeParams, forkContextMessages: messagesToSummarize }
 858          : cacheSafeParams
 859      let summaryResponse: AssistantMessage
 860      let summary: string | null
 861      let ptlAttempts = 0
 862      for (;;) {
 863        summaryResponse = await streamCompactSummary({
 864          messages: apiMessages,
 865          summaryRequest,
 866          appState: context.getAppState(),
 867          context,
 868          preCompactTokenCount,
 869          cacheSafeParams: retryCacheSafeParams,
 870        })
 871        summary = getAssistantMessageText(summaryResponse)
 872        if (!summary?.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) break
 873  
 874        ptlAttempts++
 875        const truncated =
 876          ptlAttempts <= MAX_PTL_RETRIES
 877            ? truncateHeadForPTLRetry(apiMessages, summaryResponse)
 878            : null
 879        if (!truncated) {
 880          logEvent('tengu_partial_compact_failed', {
 881            reason:
 882              'prompt_too_long' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 883            ...failureMetadata,
 884            ptlAttempts,
 885          })
 886          throw new Error(ERROR_MESSAGE_PROMPT_TOO_LONG)
 887        }
 888        logEvent('tengu_compact_ptl_retry', {
 889          attempt: ptlAttempts,
 890          droppedMessages: apiMessages.length - truncated.length,
 891          remainingMessages: truncated.length,
 892          path: 'partial' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 893        })
 894        apiMessages = truncated
 895        retryCacheSafeParams = {
 896          ...retryCacheSafeParams,
 897          forkContextMessages: truncated,
 898        }
 899      }
 900      if (!summary) {
 901        logEvent('tengu_partial_compact_failed', {
 902          reason:
 903            'no_summary' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 904          ...failureMetadata,
 905        })
 906        throw new Error(
 907          'Failed to generate conversation summary - response did not contain valid text content',
 908        )
 909      } else if (startsWithApiErrorPrefix(summary)) {
 910        logEvent('tengu_partial_compact_failed', {
 911          reason:
 912            'api_error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 913          ...failureMetadata,
 914        })
 915        throw new Error(summary)
 916      }
 917  
 918      // Store the current file state before clearing
 919      const preCompactReadFileState = cacheToObject(context.readFileState)
 920      context.readFileState.clear()
 921      context.loadedNestedMemoryPaths?.clear()
 922      // Intentionally NOT resetting sentSkillNames — see compactConversation()
 923      // for rationale (~4K tokens saved per compact event).
 924  
 925      const [fileAttachments, asyncAgentAttachments] = await Promise.all([
 926        createPostCompactFileAttachments(
 927          preCompactReadFileState,
 928          context,
 929          POST_COMPACT_MAX_FILES_TO_RESTORE,
 930          messagesToKeep,
 931        ),
 932        createAsyncAgentAttachmentsIfNeeded(context),
 933      ])
 934  
 935      const postCompactFileAttachments: AttachmentMessage[] = [
 936        ...fileAttachments,
 937        ...asyncAgentAttachments,
 938      ]
 939      const planAttachment = createPlanAttachmentIfNeeded(context.agentId)
 940      if (planAttachment) {
 941        postCompactFileAttachments.push(planAttachment)
 942      }
 943  
 944      // Add plan mode instructions if currently in plan mode
 945      const planModeAttachment = await createPlanModeAttachmentIfNeeded(context)
 946      if (planModeAttachment) {
 947        postCompactFileAttachments.push(planModeAttachment)
 948      }
 949  
 950      const skillAttachment = createSkillAttachmentIfNeeded(context.agentId)
 951      if (skillAttachment) {
 952        postCompactFileAttachments.push(skillAttachment)
 953      }
 954  
 955      // Re-announce only what was in the summarized portion — messagesToKeep
 956      // is scanned, so anything already announced there is skipped.
 957      for (const att of getDeferredToolsDeltaAttachment(
 958        context.options.tools,
 959        context.options.mainLoopModel,
 960        messagesToKeep,
 961        { callSite: 'compact_partial' },
 962      )) {
 963        postCompactFileAttachments.push(createAttachmentMessage(att))
 964      }
 965      for (const att of getAgentListingDeltaAttachment(context, messagesToKeep)) {
 966        postCompactFileAttachments.push(createAttachmentMessage(att))
 967      }
 968      for (const att of getMcpInstructionsDeltaAttachment(
 969        context.options.mcpClients,
 970        context.options.tools,
 971        context.options.mainLoopModel,
 972        messagesToKeep,
 973      )) {
 974        postCompactFileAttachments.push(createAttachmentMessage(att))
 975      }
 976  
 977      context.onCompactProgress?.({
 978        type: 'hooks_start',
 979        hookType: 'session_start',
 980      })
 981      const hookMessages = await processSessionStartHooks('compact', {
 982        model: context.options.mainLoopModel,
 983      })
 984  
 985      const postCompactTokenCount = tokenCountFromLastAPIResponse([
 986        summaryResponse,
 987      ])
 988      const compactionUsage = getTokenUsage(summaryResponse)
 989  
 990      logEvent('tengu_partial_compact', {
 991        preCompactTokenCount,
 992        postCompactTokenCount,
 993        messagesKept: messagesToKeep.length,
 994        messagesSummarized: messagesToSummarize.length,
 995        direction:
 996          direction as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 997        hasUserFeedback: !!userFeedback,
 998        trigger:
 999          'message_selector' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1000        compactionInputTokens: compactionUsage?.input_tokens,
1001        compactionOutputTokens: compactionUsage?.output_tokens,
1002        compactionCacheReadTokens: compactionUsage?.cache_read_input_tokens ?? 0,
1003        compactionCacheCreationTokens:
1004          compactionUsage?.cache_creation_input_tokens ?? 0,
1005      })
1006  
1007      // Progress messages aren't loggable, so forkSessionImpl would null out
1008      // a logicalParentUuid pointing at one. Both directions skip them.
1009      const lastPreCompactUuid =
1010        direction === 'up_to'
1011          ? allMessages.slice(0, pivotIndex).findLast(m => m.type !== 'progress')
1012              ?.uuid
1013          : messagesToKeep.at(-1)?.uuid
1014      const boundaryMarker = createCompactBoundaryMessage(
1015        'manual',
1016        preCompactTokenCount ?? 0,
1017        lastPreCompactUuid,
1018        userFeedback,
1019        messagesToSummarize.length,
1020      )
1021      // allMessages not just messagesToSummarize — set union is idempotent,
1022      // simpler than tracking which half each tool lived in.
1023      const preCompactDiscovered = extractDiscoveredToolNames(allMessages)
1024      if (preCompactDiscovered.size > 0) {
1025        boundaryMarker.compactMetadata.preCompactDiscoveredTools = [
1026          ...preCompactDiscovered,
1027        ].sort()
1028      }
1029  
1030      const transcriptPath = getTranscriptPath()
1031      const summaryMessages: UserMessage[] = [
1032        createUserMessage({
1033          content: getCompactUserSummaryMessage(summary, false, transcriptPath),
1034          isCompactSummary: true,
1035          ...(messagesToKeep.length > 0
1036            ? {
1037                summarizeMetadata: {
1038                  messagesSummarized: messagesToSummarize.length,
1039                  userContext: userFeedback,
1040                  direction,
1041                },
1042              }
1043            : { isVisibleInTranscriptOnly: true as const }),
1044        }),
1045      ]
1046  
1047      if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
1048        notifyCompaction(
1049          context.options.querySource ?? 'compact',
1050          context.agentId,
1051        )
1052      }
1053      markPostCompaction()
1054  
1055      // Re-append session metadata (custom title, tag) so it stays within
1056      // the 16KB tail window that readLiteMetadata reads for --resume display.
1057      reAppendSessionMetadata()
1058  
1059      if (feature('KAIROS')) {
1060        void sessionTranscriptModule?.writeSessionTranscriptSegment(
1061          messagesToSummarize,
1062        )
1063      }
1064  
1065      context.onCompactProgress?.({
1066        type: 'hooks_start',
1067        hookType: 'post_compact',
1068      })
1069      const postCompactHookResult = await executePostCompactHooks(
1070        {
1071          trigger: 'manual',
1072          compactSummary: summary,
1073        },
1074        context.abortController.signal,
1075      )
1076  
1077      // 'from': prefix-preserving → boundary; 'up_to': suffix → last summary
1078      const anchorUuid =
1079        direction === 'up_to'
1080          ? (summaryMessages.at(-1)?.uuid ?? boundaryMarker.uuid)
1081          : boundaryMarker.uuid
1082      return {
1083        boundaryMarker: annotateBoundaryWithPreservedSegment(
1084          boundaryMarker,
1085          anchorUuid,
1086          messagesToKeep,
1087        ),
1088        summaryMessages,
1089        messagesToKeep,
1090        attachments: postCompactFileAttachments,
1091        hookResults: hookMessages,
1092        userDisplayMessage: postCompactHookResult.userDisplayMessage,
1093        preCompactTokenCount,
1094        postCompactTokenCount,
1095        compactionUsage,
1096      }
1097    } catch (error) {
1098      addErrorNotificationIfNeeded(error, context)
1099      throw error
1100    } finally {
1101      context.setStreamMode?.('requesting')
1102      context.setResponseLength?.(() => 0)
1103      context.onCompactProgress?.({ type: 'compact_end' })
1104      context.setSDKStatus?.(null)
1105    }
1106  }
1107  
1108  function addErrorNotificationIfNeeded(
1109    error: unknown,
1110    context: Pick<ToolUseContext, 'addNotification'>,
1111  ) {
1112    if (
1113      !hasExactErrorMessage(error, ERROR_MESSAGE_USER_ABORT) &&
1114      !hasExactErrorMessage(error, ERROR_MESSAGE_NOT_ENOUGH_MESSAGES)
1115    ) {
1116      context.addNotification?.({
1117        key: 'error-compacting-conversation',
1118        text: 'Error compacting conversation',
1119        priority: 'immediate',
1120        color: 'error',
1121      })
1122    }
1123  }
1124  
1125  export function createCompactCanUseTool(): CanUseToolFn {
1126    return async () => ({
1127      behavior: 'deny' as const,
1128      message: 'Tool use is not allowed during compaction',
1129      decisionReason: {
1130        type: 'other' as const,
1131        reason: 'compaction agent should only produce text summary',
1132      },
1133    })
1134  }
1135  
1136  async function streamCompactSummary({
1137    messages,
1138    summaryRequest,
1139    appState,
1140    context,
1141    preCompactTokenCount,
1142    cacheSafeParams,
1143  }: {
1144    messages: Message[]
1145    summaryRequest: UserMessage
1146    appState: Awaited<ReturnType<ToolUseContext['getAppState']>>
1147    context: ToolUseContext
1148    preCompactTokenCount: number
1149    cacheSafeParams: CacheSafeParams
1150  }): Promise<AssistantMessage> {
1151    // When prompt cache sharing is enabled, use forked agent to reuse the
1152    // main conversation's cached prefix (system prompt, tools, context messages).
1153    // Falls back to regular streaming path on failure.
1154    // 3P default: true — see comment at the other tengu_compact_cache_prefix read above.
1155    const promptCacheSharingEnabled = getFeatureValue_CACHED_MAY_BE_STALE(
1156      'tengu_compact_cache_prefix',
1157      true,
1158    )
1159    // Send keep-alive signals during compaction to prevent remote session
1160    // WebSocket idle timeouts from dropping bridge connections. Compaction
1161    // API calls can take 5-10+ seconds, during which no other messages
1162    // flow through the transport — without keep-alives, the server may
1163    // close the WebSocket for inactivity.
1164    // Two signals: (1) PUT /worker heartbeat via sessionActivity, and
1165    // (2) re-emit 'compacting' status so the SDK event stream stays active
1166    // and the server doesn't consider the session stale.
1167    const activityInterval = isSessionActivityTrackingActive()
1168      ? setInterval(
1169          (statusSetter?: (status: 'compacting' | null) => void) => {
1170            sendSessionActivitySignal()
1171            statusSetter?.('compacting')
1172          },
1173          30_000,
1174          context.setSDKStatus,
1175        )
1176      : undefined
1177  
1178    try {
1179      if (promptCacheSharingEnabled) {
1180        try {
1181          // DO NOT set maxOutputTokens here. The fork piggybacks on the main thread's
1182          // prompt cache by sending identical cache-key params (system, tools, model,
1183          // messages prefix, thinking config). Setting maxOutputTokens would clamp
1184          // budget_tokens via Math.min(budget, maxOutputTokens-1) in claude.ts,
1185          // creating a thinking config mismatch that invalidates the cache.
1186          // The streaming fallback path (below) can safely set maxOutputTokensOverride
1187          // since it doesn't share cache with the main thread.
1188          const result = await runForkedAgent({
1189            promptMessages: [summaryRequest],
1190            cacheSafeParams,
1191            canUseTool: createCompactCanUseTool(),
1192            querySource: 'compact',
1193            forkLabel: 'compact',
1194            maxTurns: 1,
1195            skipCacheWrite: true,
1196            // Pass the compact context's abortController so user Esc aborts the
1197            // fork — same signal the streaming fallback uses at
1198            // `signal: context.abortController.signal` below.
1199            overrides: { abortController: context.abortController },
1200          })
1201          const assistantMsg = getLastAssistantMessage(result.messages)
1202          const assistantText = assistantMsg
1203            ? getAssistantMessageText(assistantMsg)
1204            : null
1205          // Guard isApiErrorMessage: query() catches API errors (including
1206          // APIUserAbortError on ESC) and yields them as synthetic assistant
1207          // messages. Without this check, an aborted compact "succeeds" with
1208          // "Request was aborted." as the summary — the text doesn't start with
1209          // "API Error" so the caller's startsWithApiErrorPrefix guard misses it.
1210          if (assistantMsg && assistantText && !assistantMsg.isApiErrorMessage) {
1211            // Skip success logging for PTL error text — it's returned so the
1212            // caller's retry loop catches it, but it's not a successful summary.
1213            if (!assistantText.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) {
1214              logEvent('tengu_compact_cache_sharing_success', {
1215                preCompactTokenCount,
1216                outputTokens: result.totalUsage.output_tokens,
1217                cacheReadInputTokens: result.totalUsage.cache_read_input_tokens,
1218                cacheCreationInputTokens:
1219                  result.totalUsage.cache_creation_input_tokens,
1220                cacheHitRate:
1221                  result.totalUsage.cache_read_input_tokens > 0
1222                    ? result.totalUsage.cache_read_input_tokens /
1223                      (result.totalUsage.cache_read_input_tokens +
1224                        result.totalUsage.cache_creation_input_tokens +
1225                        result.totalUsage.input_tokens)
1226                    : 0,
1227              })
1228            }
1229            return assistantMsg
1230          }
1231          logForDebugging(
1232            `Compact cache sharing: no text in response, falling back. Response: ${jsonStringify(assistantMsg)}`,
1233            { level: 'warn' },
1234          )
1235          logEvent('tengu_compact_cache_sharing_fallback', {
1236            reason:
1237              'no_text_response' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1238            preCompactTokenCount,
1239          })
1240        } catch (error) {
1241          logError(error)
1242          logEvent('tengu_compact_cache_sharing_fallback', {
1243            reason:
1244              'error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1245            preCompactTokenCount,
1246          })
1247        }
1248      }
1249  
1250      // Regular streaming path (fallback when cache sharing fails or is disabled)
1251      const retryEnabled = getFeatureValue_CACHED_MAY_BE_STALE(
1252        'tengu_compact_streaming_retry',
1253        false,
1254      )
1255      const maxAttempts = retryEnabled ? MAX_COMPACT_STREAMING_RETRIES : 1
1256  
1257      for (let attempt = 1; attempt <= maxAttempts; attempt++) {
1258        // Reset state for retry
1259        let hasStartedStreaming = false
1260        let response: AssistantMessage | undefined
1261        context.setResponseLength?.(() => 0)
1262  
1263        // Check if tool search is enabled using the main loop's tools list.
1264        // context.options.tools includes MCP tools merged via useMergedTools.
1265        const useToolSearch = await isToolSearchEnabled(
1266          context.options.mainLoopModel,
1267          context.options.tools,
1268          async () => appState.toolPermissionContext,
1269          context.options.agentDefinitions.activeAgents,
1270          'compact',
1271        )
1272  
1273        // When tool search is enabled, include ToolSearchTool and MCP tools. They get
1274        // defer_loading: true and don't count against context - the API filters them out
1275        // of system_prompt_tools before token counting (see api/token_count_api/counting.py:188
1276        // and api/public_api/messages/handler.py:324).
1277        // Filter MCP tools from context.options.tools (not appState.mcp.tools) so we
1278        // get the permission-filtered set from useMergedTools — same source used for
1279        // isToolSearchEnabled above and normalizeMessagesForAPI below.
1280        // Deduplicate by name to avoid API errors when MCP tools share names with built-in tools.
1281        const tools: Tool[] = useToolSearch
1282          ? uniqBy(
1283              [
1284                FileReadTool,
1285                ToolSearchTool,
1286                ...context.options.tools.filter(t => t.isMcp),
1287              ],
1288              'name',
1289            )
1290          : [FileReadTool]
1291  
1292        const streamingGen = queryModelWithStreaming({
1293          messages: normalizeMessagesForAPI(
1294            stripImagesFromMessages(
1295              stripReinjectedAttachments([
1296                ...getMessagesAfterCompactBoundary(messages),
1297                summaryRequest,
1298              ]),
1299            ),
1300            context.options.tools,
1301          ),
1302          systemPrompt: asSystemPrompt([
1303            'You are a helpful AI assistant tasked with summarizing conversations.',
1304          ]),
1305          thinkingConfig: { type: 'disabled' as const },
1306          tools,
1307          signal: context.abortController.signal,
1308          options: {
1309            async getToolPermissionContext() {
1310              const appState = context.getAppState()
1311              return appState.toolPermissionContext
1312            },
1313            model: context.options.mainLoopModel,
1314            toolChoice: undefined,
1315            isNonInteractiveSession: context.options.isNonInteractiveSession,
1316            hasAppendSystemPrompt: !!context.options.appendSystemPrompt,
1317            maxOutputTokensOverride: Math.min(
1318              COMPACT_MAX_OUTPUT_TOKENS,
1319              getMaxOutputTokensForModel(context.options.mainLoopModel),
1320            ),
1321            querySource: 'compact',
1322            agents: context.options.agentDefinitions.activeAgents,
1323            mcpTools: [],
1324            effortValue: appState.effortValue,
1325          },
1326        })
1327        const streamIter = streamingGen[Symbol.asyncIterator]()
1328        let next = await streamIter.next()
1329  
1330        while (!next.done) {
1331          const event = next.value
1332  
1333          if (
1334            !hasStartedStreaming &&
1335            event.type === 'stream_event' &&
1336            event.event.type === 'content_block_start' &&
1337            event.event.content_block.type === 'text'
1338          ) {
1339            hasStartedStreaming = true
1340            context.setStreamMode?.('responding')
1341          }
1342  
1343          if (
1344            event.type === 'stream_event' &&
1345            event.event.type === 'content_block_delta' &&
1346            event.event.delta.type === 'text_delta'
1347          ) {
1348            const charactersStreamed = event.event.delta.text.length
1349            context.setResponseLength?.(length => length + charactersStreamed)
1350          }
1351  
1352          if (event.type === 'assistant') {
1353            response = event
1354          }
1355  
1356          next = await streamIter.next()
1357        }
1358  
1359        if (response) {
1360          return response
1361        }
1362  
1363        if (attempt < maxAttempts) {
1364          logEvent('tengu_compact_streaming_retry', {
1365            attempt,
1366            preCompactTokenCount,
1367            hasStartedStreaming,
1368          })
1369          await sleep(getRetryDelay(attempt), context.abortController.signal, {
1370            abortError: () => new APIUserAbortError(),
1371          })
1372          continue
1373        }
1374  
1375        logForDebugging(
1376          `Compact streaming failed after ${attempt} attempts. hasStartedStreaming=${hasStartedStreaming}`,
1377          { level: 'error' },
1378        )
1379        logEvent('tengu_compact_failed', {
1380          reason:
1381            'no_streaming_response' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1382          preCompactTokenCount,
1383          hasStartedStreaming,
1384          retryEnabled,
1385          attempts: attempt,
1386          promptCacheSharingEnabled,
1387        })
1388        throw new Error(ERROR_MESSAGE_INCOMPLETE_RESPONSE)
1389      }
1390  
1391      // This should never be reached due to the throw above, but TypeScript needs it
1392      throw new Error(ERROR_MESSAGE_INCOMPLETE_RESPONSE)
1393    } finally {
1394      clearInterval(activityInterval)
1395    }
1396  }
1397  
1398  /**
1399   * Creates attachment messages for recently accessed files to restore them after compaction.
1400   * This prevents the model from having to re-read files that were recently accessed.
1401   * Re-reads files using FileReadTool to get fresh content with proper validation.
1402   * Files are selected based on recency, but constrained by both file count and token budget limits.
1403   *
1404   * Files already present as Read tool results in preservedMessages are skipped —
1405   * re-injecting identical content the model can already see in the preserved tail
1406   * is pure waste (up to 25K tok/compact). Mirrors the diff-against-preserved
1407   * pattern that getDeferredToolsDeltaAttachment uses at the same call sites.
1408   *
1409   * @param readFileState The current file state tracking recently read files
1410   * @param toolUseContext The tool use context for calling FileReadTool
1411   * @param maxFiles Maximum number of files to restore (default: 5)
1412   * @param preservedMessages Messages kept post-compact; Read results here are skipped
1413   * @returns Array of attachment messages for the most recently accessed files that fit within token budget
1414   */
1415  export async function createPostCompactFileAttachments(
1416    readFileState: Record<string, { content: string; timestamp: number }>,
1417    toolUseContext: ToolUseContext,
1418    maxFiles: number,
1419    preservedMessages: Message[] = [],
1420  ): Promise<AttachmentMessage[]> {
1421    const preservedReadPaths = collectReadToolFilePaths(preservedMessages)
1422    const recentFiles = Object.entries(readFileState)
1423      .map(([filename, state]) => ({ filename, ...state }))
1424      .filter(
1425        file =>
1426          !shouldExcludeFromPostCompactRestore(
1427            file.filename,
1428            toolUseContext.agentId,
1429          ) && !preservedReadPaths.has(expandPath(file.filename)),
1430      )
1431      .sort((a, b) => b.timestamp - a.timestamp)
1432      .slice(0, maxFiles)
1433  
1434    const results = await Promise.all(
1435      recentFiles.map(async file => {
1436        const attachment = await generateFileAttachment(
1437          file.filename,
1438          {
1439            ...toolUseContext,
1440            fileReadingLimits: {
1441              maxTokens: POST_COMPACT_MAX_TOKENS_PER_FILE,
1442            },
1443          },
1444          'tengu_post_compact_file_restore_success',
1445          'tengu_post_compact_file_restore_error',
1446          'compact',
1447        )
1448        return attachment ? createAttachmentMessage(attachment) : null
1449      }),
1450    )
1451  
1452    let usedTokens = 0
1453    return results.filter((result): result is AttachmentMessage => {
1454      if (result === null) {
1455        return false
1456      }
1457      const attachmentTokens = roughTokenCountEstimation(jsonStringify(result))
1458      if (usedTokens + attachmentTokens <= POST_COMPACT_TOKEN_BUDGET) {
1459        usedTokens += attachmentTokens
1460        return true
1461      }
1462      return false
1463    })
1464  }
1465  
1466  /**
1467   * Creates a plan file attachment if a plan file exists for the current session.
1468   * This ensures the plan is preserved after compaction.
1469   */
1470  export function createPlanAttachmentIfNeeded(
1471    agentId?: AgentId,
1472  ): AttachmentMessage | null {
1473    const planContent = getPlan(agentId)
1474  
1475    if (!planContent) {
1476      return null
1477    }
1478  
1479    const planFilePath = getPlanFilePath(agentId)
1480  
1481    return createAttachmentMessage({
1482      type: 'plan_file_reference',
1483      planFilePath,
1484      planContent,
1485    })
1486  }
1487  
1488  /**
1489   * Creates an attachment for invoked skills to preserve their content across compaction.
1490   * Only includes skills scoped to the given agent (or main session when agentId is null/undefined).
1491   * This ensures skill guidelines remain available after the conversation is summarized
1492   * without leaking skills from other agent contexts.
1493   */
1494  export function createSkillAttachmentIfNeeded(
1495    agentId?: string,
1496  ): AttachmentMessage | null {
1497    const invokedSkills = getInvokedSkillsForAgent(agentId)
1498  
1499    if (invokedSkills.size === 0) {
1500      return null
1501    }
1502  
1503    // Sorted most-recent-first so budget pressure drops the least-relevant skills.
1504    // Per-skill truncation keeps the head of each file (where setup/usage
1505    // instructions typically live) rather than dropping whole skills.
1506    let usedTokens = 0
1507    const skills = Array.from(invokedSkills.values())
1508      .sort((a, b) => b.invokedAt - a.invokedAt)
1509      .map(skill => ({
1510        name: skill.skillName,
1511        path: skill.skillPath,
1512        content: truncateToTokens(
1513          skill.content,
1514          POST_COMPACT_MAX_TOKENS_PER_SKILL,
1515        ),
1516      }))
1517      .filter(skill => {
1518        const tokens = roughTokenCountEstimation(skill.content)
1519        if (usedTokens + tokens > POST_COMPACT_SKILLS_TOKEN_BUDGET) {
1520          return false
1521        }
1522        usedTokens += tokens
1523        return true
1524      })
1525  
1526    if (skills.length === 0) {
1527      return null
1528    }
1529  
1530    return createAttachmentMessage({
1531      type: 'invoked_skills',
1532      skills,
1533    })
1534  }
1535  
1536  /**
1537   * Creates a plan_mode attachment if the user is currently in plan mode.
1538   * This ensures the model continues to operate in plan mode after compaction
1539   * (otherwise it would lose the plan mode instructions since those are
1540   * normally only injected on tool-use turns via getAttachmentMessages).
1541   */
1542  export async function createPlanModeAttachmentIfNeeded(
1543    context: ToolUseContext,
1544  ): Promise<AttachmentMessage | null> {
1545    const appState = context.getAppState()
1546    if (appState.toolPermissionContext.mode !== 'plan') {
1547      return null
1548    }
1549  
1550    const planFilePath = getPlanFilePath(context.agentId)
1551    const planExists = getPlan(context.agentId) !== null
1552  
1553    return createAttachmentMessage({
1554      type: 'plan_mode',
1555      reminderType: 'full',
1556      isSubAgent: !!context.agentId,
1557      planFilePath,
1558      planExists,
1559    })
1560  }
1561  
1562  /**
1563   * Creates attachments for async agents so the model knows about them after
1564   * compaction. Covers both agents still running in the background (so the model
1565   * doesn't spawn a duplicate) and agents that have finished but whose results
1566   * haven't been retrieved yet.
1567   */
1568  export async function createAsyncAgentAttachmentsIfNeeded(
1569    context: ToolUseContext,
1570  ): Promise<AttachmentMessage[]> {
1571    const appState = context.getAppState()
1572    const asyncAgents = Object.values(appState.tasks).filter(
1573      (task): task is LocalAgentTaskState => task.type === 'local_agent',
1574    )
1575  
1576    return asyncAgents.flatMap(agent => {
1577      if (
1578        agent.retrieved ||
1579        agent.status === 'pending' ||
1580        agent.agentId === context.agentId
1581      ) {
1582        return []
1583      }
1584      return [
1585        createAttachmentMessage({
1586          type: 'task_status',
1587          taskId: agent.agentId,
1588          taskType: 'local_agent',
1589          description: agent.description,
1590          status: agent.status,
1591          deltaSummary:
1592            agent.status === 'running'
1593              ? (agent.progress?.summary ?? null)
1594              : (agent.error ?? null),
1595          outputFilePath: getTaskOutputPath(agent.agentId),
1596        }),
1597      ]
1598    })
1599  }
1600  
1601  /**
1602   * Scan messages for Read tool_use blocks and collect their file_path inputs
1603   * (normalized via expandPath). Used to dedup post-compact file restoration
1604   * against what's already visible in the preserved tail.
1605   *
1606   * Skips Reads whose tool_result is a dedup stub — the stub points at an
1607   * earlier full Read that may have been compacted away, so we want
1608   * createPostCompactFileAttachments to re-inject the real content.
1609   */
1610  function collectReadToolFilePaths(messages: Message[]): Set<string> {
1611    const stubIds = new Set<string>()
1612    for (const message of messages) {
1613      if (message.type !== 'user' || !Array.isArray(message.message.content)) {
1614        continue
1615      }
1616      for (const block of message.message.content) {
1617        if (
1618          block.type === 'tool_result' &&
1619          typeof block.content === 'string' &&
1620          block.content.startsWith(FILE_UNCHANGED_STUB)
1621        ) {
1622          stubIds.add(block.tool_use_id)
1623        }
1624      }
1625    }
1626  
1627    const paths = new Set<string>()
1628    for (const message of messages) {
1629      if (
1630        message.type !== 'assistant' ||
1631        !Array.isArray(message.message.content)
1632      ) {
1633        continue
1634      }
1635      for (const block of message.message.content) {
1636        if (
1637          block.type !== 'tool_use' ||
1638          block.name !== FILE_READ_TOOL_NAME ||
1639          stubIds.has(block.id)
1640        ) {
1641          continue
1642        }
1643        const input = block.input
1644        if (
1645          input &&
1646          typeof input === 'object' &&
1647          'file_path' in input &&
1648          typeof input.file_path === 'string'
1649        ) {
1650          paths.add(expandPath(input.file_path))
1651        }
1652      }
1653    }
1654    return paths
1655  }
1656  
1657  const SKILL_TRUNCATION_MARKER =
1658    '\n\n[... skill content truncated for compaction; use Read on the skill path if you need the full text]'
1659  
1660  /**
1661   * Truncate content to roughly maxTokens, keeping the head. roughTokenCountEstimation
1662   * uses ~4 chars/token (its default bytesPerToken), so char budget = maxTokens * 4
1663   * minus the marker so the result stays within budget. Marker tells the model it
1664   * can Read the full file if needed.
1665   */
1666  function truncateToTokens(content: string, maxTokens: number): string {
1667    if (roughTokenCountEstimation(content) <= maxTokens) {
1668      return content
1669    }
1670    const charBudget = maxTokens * 4 - SKILL_TRUNCATION_MARKER.length
1671    return content.slice(0, charBudget) + SKILL_TRUNCATION_MARKER
1672  }
1673  
1674  function shouldExcludeFromPostCompactRestore(
1675    filename: string,
1676    agentId?: AgentId,
1677  ): boolean {
1678    const normalizedFilename = expandPath(filename)
1679    // Exclude plan files
1680    try {
1681      const planFilePath = expandPath(getPlanFilePath(agentId))
1682      if (normalizedFilename === planFilePath) {
1683        return true
1684      }
1685    } catch {
1686      // If we can't get plan file path, continue with other checks
1687    }
1688  
1689    // Exclude all types of claude.md files
1690    // TODO: Refactor to use isMemoryFilePath() from claudemd.ts for consistency
1691    // and to also match child directory memory files (.claude/rules/*.md, etc.)
1692    try {
1693      const normalizedMemoryPaths = new Set(
1694        MEMORY_TYPE_VALUES.map(type => expandPath(getMemoryPath(type))),
1695      )
1696  
1697      if (normalizedMemoryPaths.has(normalizedFilename)) {
1698        return true
1699      }
1700    } catch {
1701      // If we can't get memory paths, continue
1702    }
1703  
1704    return false
1705  }