/ src / lib / server / context-manager.ts
context-manager.ts
  1  import type { Message, Session } from '@/types'
  2  import { getMemoryDb } from '@/lib/server/memory/memory-db'
  3  import { extractFactsFromMessages, ensureRunContext, pruneRunContext } from '@/lib/server/run-context'
  4  import { getSession, saveSession } from '@/lib/server/sessions/session-repository'
  5  
  6  import { repairTranscriptConsistency } from './transcript-repair'
  7  
  8  // --- LLM compaction constants ---
  9  
 10  const BASE_CHUNK_RATIO = 0.4
 11  const MIN_CHUNK_RATIO = 0.15
 12  const COMPACTION_SAFETY_MARGIN = 1.2
 13  const COMPACTION_OVERHEAD_TOKENS = 4096
 14  const DEFAULT_COMPACTION_RESERVE_TOKENS_FLOOR = 20_000
 15  const MIN_COMPACTION_RESERVE_TOKENS = 1_024
 16  const MAX_TOOL_FAILURES = 8
 17  const MAX_FAILURE_CHARS = 240
 18  
 19  const MERGE_SUMMARIES_INSTRUCTIONS =
 20    'Merge these partial summaries into a single cohesive summary. Preserve decisions,' +
 21    ' TODOs, open questions, constraints, active tasks and their current status,' +
 22    ' batch operation progress (e.g., "5/17 items completed"),' +
 23    ' the last thing the user requested, and any commitments or follow-ups promised.'
 24  
 25  const IDENTIFIER_PRESERVATION_INSTRUCTIONS =
 26    'Preserve all opaque identifiers exactly as written (no shortening or reconstruction), ' +
 27    'including UUIDs, hashes, IDs, tokens, API keys, hostnames, IPs, ports, URLs, and file names.'
 28  
 29  /** Callback that sends a prompt to an LLM and returns response text */
 30  export type LLMSummarizer = (prompt: string) => Promise<string>
 31  
 32  // --- Context window sizes (tokens) per provider/model ---
 33  
 34  const PROVIDER_CONTEXT_WINDOWS: Record<string, number> = {
 35    // Anthropic
 36    'claude-opus-4-6': 200_000,
 37    'claude-sonnet-4-6': 200_000,
 38    'claude-haiku-4-5-20251001': 200_000,
 39    'claude-sonnet-4-5-20250514': 200_000,
 40    // OpenAI
 41    'gpt-4o': 128_000,
 42    'gpt-4o-mini': 128_000,
 43    'gpt-4.1': 1_047_576,
 44    'gpt-4.1-mini': 1_047_576,
 45    'gpt-4.1-nano': 1_047_576,
 46    'o3': 200_000,
 47    'o3-mini': 128_000,
 48    'o4-mini': 200_000,
 49    // Codex CLI
 50    'gpt-5.3-codex': 1_047_576,
 51    'gpt-5.2-codex': 1_047_576,
 52    'gpt-5.1-codex': 1_047_576,
 53    'gpt-5-codex': 1_047_576,
 54    'gpt-5-codex-mini': 1_047_576,
 55    // Google Gemini
 56    'gemini-2.5-pro': 1_048_576,
 57    'gemini-2.5-flash': 1_048_576,
 58    'gemini-2.5-flash-lite': 1_048_576,
 59    // DeepSeek
 60    'deepseek-chat': 64_000,
 61    'deepseek-reasoner': 64_000,
 62    // Mistral
 63    'mistral-large-latest': 128_000,
 64    'mistral-small-latest': 128_000,
 65    'magistral-medium-2506': 128_000,
 66    'devstral-small-latest': 128_000,
 67    // xAI
 68    'grok-3': 131_072,
 69    'grok-3-fast': 131_072,
 70    'grok-3-mini': 131_072,
 71    'grok-3-mini-fast': 131_072,
 72  }
 73  
 74  const PROVIDER_DEFAULT_WINDOWS: Record<string, number> = {
 75    anthropic: 200_000,
 76    'claude-cli': 200_000,
 77    openai: 128_000,
 78    'codex-cli': 1_047_576,
 79    'opencode-cli': 200_000,
 80    'opencode-web': 200_000,
 81    'gemini-cli': 1_048_576,
 82    'copilot-cli': 200_000,
 83    'droid-cli': 200_000,
 84    'cursor-cli': 200_000,
 85    'qwen-code-cli': 1_048_576,
 86    google: 1_048_576,
 87    deepseek: 64_000,
 88    groq: 32_768,
 89    together: 32_768,
 90    mistral: 128_000,
 91    xai: 131_072,
 92    fireworks: 32_768,
 93    ollama: 32_768,
 94    goose: 200_000,
 95    openclaw: 128_000,
 96  }
 97  
 98  /** Get context window size for a model, falling back to provider default */
 99  export function getContextWindowSize(provider: string, model: string): number {
100    return PROVIDER_CONTEXT_WINDOWS[model]
101      || PROVIDER_DEFAULT_WINDOWS[provider]
102      || 8_192
103  }
104  
105  // --- Token estimation ---
106  
107  /** Rough token estimate: ~4 chars per token for English text */
108  export function estimateTokens(text: string): number {
109    if (!text) return 0
110    return Math.ceil(text.length / 4)
111  }
112  
113  /** Estimate total tokens for a message array */
114  export function estimateMessagesTokens(
115    messages: Message[],
116    options: { includeToolEvents?: boolean } = {},
117  ): number {
118    const includeToolEvents = options.includeToolEvents !== false
119    let total = 0
120    for (const m of messages) {
121      // Role + overhead per message (~4 tokens)
122      total += 4
123      total += estimateTokens(m.text)
124      if (includeToolEvents && m.toolEvents) {
125        for (const te of m.toolEvents) {
126          total += estimateTokens(te.name) + estimateTokens(te.input)
127          if (te.output) total += estimateTokens(te.output)
128        }
129      }
130    }
131    return total
132  }
133  
134  // --- Context window guard ---
135  
136  /** Hard minimum: don't even attempt LLM calls below this */
137  const CONTEXT_WINDOW_HARD_MIN_TOKENS = 16_000
138  /** Warn the agent when remaining context is below this */
139  const CONTEXT_WINDOW_WARN_BELOW_TOKENS = 32_000
140  
141  export interface ContextWindowGuardResult {
142    contextWindowTokens: number
143    shouldBlock: boolean
144    shouldWarn: boolean
145    message: string | null
146  }
147  
148  /** Evaluate whether the context window is too small for useful execution */
149  export function evaluateContextWindowGuard(provider: string, model: string): ContextWindowGuardResult {
150    const tokens = getContextWindowSize(provider, model)
151    if (tokens < CONTEXT_WINDOW_HARD_MIN_TOKENS) {
152      return {
153        contextWindowTokens: tokens,
154        shouldBlock: true,
155        shouldWarn: false,
156        message: `Context window too small (${tokens.toLocaleString()} tokens). Minimum required: ${CONTEXT_WINDOW_HARD_MIN_TOKENS.toLocaleString()} tokens.`,
157      }
158    }
159    if (tokens < CONTEXT_WINDOW_WARN_BELOW_TOKENS) {
160      return {
161        contextWindowTokens: tokens,
162        shouldBlock: false,
163        shouldWarn: true,
164        message: `Small context window (${tokens.toLocaleString()} tokens). Agent may struggle with complex tasks. Consider using a model with a larger context window.`,
165      }
166    }
167    return { contextWindowTokens: tokens, shouldBlock: false, shouldWarn: false, message: null }
168  }
169  
170  // --- Context status ---
171  
172  export interface ContextStatus {
173    estimatedTokens: number
174    effectiveTokens: number
175    contextWindow: number
176    percentUsed: number
177    messageCount: number
178    extraTokens: number
179    reserveTokens: number
180    remainingTokens: number
181    strategy: 'ok' | 'warning' | 'critical'
182  }
183  
184  export interface ContextStatusOptions {
185    extraTokens?: number
186    reserveTokens?: number
187    includeToolEvents?: boolean
188  }
189  
190  export function resolveCompactionReserveTokens(provider: string, model: string): number {
191    const contextWindow = getContextWindowSize(provider, model)
192    if (contextWindow <= 0) return MIN_COMPACTION_RESERVE_TOKENS
193    return Math.max(
194      MIN_COMPACTION_RESERVE_TOKENS,
195      Math.min(DEFAULT_COMPACTION_RESERVE_TOKENS_FLOOR, Math.floor(contextWindow * 0.2)),
196    )
197  }
198  
199  export function getContextStatus(
200    messages: Message[],
201    systemPromptTokens: number,
202    provider: string,
203    model: string,
204    options: ContextStatusOptions = {},
205  ): ContextStatus {
206    const contextWindow = getContextWindowSize(provider, model)
207    const messageTokens = estimateMessagesTokens(messages, {
208      includeToolEvents: options.includeToolEvents,
209    })
210    const extraTokens = Math.max(0, Math.trunc(options.extraTokens || 0))
211    const reserveTokens = Math.max(0, Math.trunc(options.reserveTokens || 0))
212    const estimatedTokens = messageTokens + systemPromptTokens + extraTokens
213    const effectiveTokens = estimatedTokens + reserveTokens
214    const percentUsed = Math.round((effectiveTokens / contextWindow) * 100)
215    const remainingTokens = Math.max(0, contextWindow - effectiveTokens)
216    return {
217      estimatedTokens,
218      effectiveTokens,
219      contextWindow,
220      percentUsed,
221      messageCount: messages.length,
222      extraTokens,
223      reserveTokens,
224      remainingTokens,
225      strategy: percentUsed >= 90 ? 'critical' : percentUsed >= 70 ? 'warning' : 'ok',
226    }
227  }
228  
229  // --- Context degradation warnings ---
230  
231  /** Returns a warning string when context usage exceeds thresholds, or null if within safe bounds. */
232  export function getContextDegradationWarning(
233    messages: Message[],
234    systemPromptTokens: number,
235    provider: string,
236    model: string,
237    options: ContextStatusOptions = {},
238  ): string | null {
239    const status = getContextStatus(messages, systemPromptTokens, provider, model, options)
240    const pct = status.percentUsed
241    const remaining = status.remainingTokens
242    const estTurnsLeft = Math.max(0, Math.floor(remaining / 2000))
243  
244    if (pct >= 85) {
245      return [
246        `[CONTEXT_WARNING] Context window is ${pct}% full (${status.effectiveTokens.toLocaleString()} / ${status.contextWindow.toLocaleString()} effective tokens).`,
247        `Estimated remaining capacity: ~${estTurnsLeft} turns.`,
248        'CRITICAL: Save essential state to memory immediately. Summarize key findings, decisions, and next steps.',
249        'Consider completing the current subtask and storing a checkpoint before context is exhausted.',
250      ].join(' ')
251    }
252    if (pct >= 70) {
253      return [
254        `[CONTEXT_WARNING] Context window is ${pct}% full.`,
255        `Estimated remaining capacity: ~${estTurnsLeft} turns.`,
256        'Recommended: Store important progress notes to memory. Prioritize completing high-value subtasks.',
257      ].join(' ')
258    }
259    if (pct >= 60) {
260      return [
261        `[CONTEXT_WARNING] Context window is ${pct}% full (~${estTurnsLeft} turns remaining).`,
262        'Consider saving intermediate state to memory for continuity.',
263      ].join(' ')
264    }
265    return null
266  }
267  
268  // --- Memory consolidation ---
269  
270  /** Extract important facts from old messages before pruning */
271  export function consolidateToMemory(
272    messages: Message[],
273    agentId: string | null,
274    sessionId: string,
275  ): number {
276    if (!agentId) return 0
277    const db = getMemoryDb()
278    let stored = 0
279  
280    for (const m of messages) {
281      if (m.role !== 'assistant' || !m.text) continue
282      // Look for decisions, commitments, key facts
283      const text = m.text
284      const hasDecision = /\b(decided|decision|agreed|committed|will do|plan is|approach is|chosen|selected)\b/i.test(text)
285      const hasKeyFact = /\b(important|critical|note|remember|key point|constraint|requirement|deadline)\b/i.test(text)
286      const hasResult = /\b(result|found|discovered|concluded|completed|built|created|deployed)\b/i.test(text)
287  
288      if (hasDecision || hasKeyFact || hasResult) {
289        // Create a concise summary (first 500 chars)
290        const summary = text.length > 500 ? text.slice(0, 500) + '...' : text
291        const category = 'working/scratch'
292        const title = `[auto-consolidated] ${text.slice(0, 60).replace(/\n/g, ' ')}`
293  
294        db.add({
295          agentId,
296          sessionId,
297          category,
298          title,
299          content: summary,
300          metadata: {
301            origin: 'auto-consolidated',
302            kind: hasDecision ? 'decision' : hasResult ? 'result' : 'note',
303            tier: 'working',
304          },
305        })
306        stored++
307      }
308    }
309    return stored
310  }
311  
312  // --- LLM compaction helpers ---
313  
314  /** Extract recent tool failures from messages for metadata appendix */
315  export function extractToolFailures(messages: Message[]): string[] {
316    const failures: string[] = []
317    for (const m of messages) {
318      if (!m.toolEvents) continue
319      for (const te of m.toolEvents) {
320        if (!te.error) continue
321        const snippet = (te.output || '').slice(0, MAX_FAILURE_CHARS)
322        failures.push(`[${te.name}] error: ${snippet}`)
323      }
324    }
325    return failures.slice(-MAX_TOOL_FAILURES)
326  }
327  
328  /** Extract file paths read and modified from tool events */
329  export function extractFileOperations(messages: Message[]): { read: string[]; modified: string[] } {
330    const readSet = new Set<string>()
331    const modifiedSet = new Set<string>()
332  
333    const READ_TOOLS = new Set(['read_file', 'list_files'])
334    const WRITE_TOOLS = new Set(['write_file', 'edit_file', 'copy_file', 'move_file', 'delete_file'])
335  
336    for (const m of messages) {
337      if (!m.toolEvents) continue
338      for (const te of m.toolEvents) {
339        let parsed: Record<string, unknown> | null = null
340        try { parsed = JSON.parse(te.input) } catch { /* not JSON */ }
341        if (!parsed) continue
342  
343        const paths: string[] = []
344        for (const key of ['filePath', 'sourcePath', 'destinationPath']) {
345          const v = parsed[key]
346          if (typeof v === 'string' && v) paths.push(v)
347        }
348  
349        const isRead = READ_TOOLS.has(te.name)
350        const isWrite = WRITE_TOOLS.has(te.name)
351        for (const p of paths) {
352          if (isWrite) modifiedSet.add(p)
353          else if (isRead) readSet.add(p)
354        }
355      }
356    }
357    return { read: [...readSet], modified: [...modifiedSet] }
358  }
359  
360  /** Split messages into chunks that fit within a token budget each */
361  export function splitMessagesByTokenBudget(messages: Message[], budgetPerChunk: number): Message[][] {
362    if (messages.length === 0) return []
363    const chunks: Message[][] = []
364    let current: Message[] = []
365    let currentTokens = 0
366  
367    for (const m of messages) {
368      const msgTokens = estimateMessagesTokens([m])
369      if (current.length > 0 && currentTokens + msgTokens > budgetPerChunk) {
370        chunks.push(current)
371        current = []
372        currentTokens = 0
373      }
374      current.push(m)
375      currentTokens += msgTokens
376    }
377    if (current.length > 0) chunks.push(current)
378    return chunks
379  }
380  
381  /** Compute adaptive chunk ratio based on average message size. */
382  export function computeAdaptiveChunkRatio(messages: Message[], contextWindow: number): number {
383    if (messages.length === 0) return BASE_CHUNK_RATIO
384    const totalTokens = estimateMessagesTokens(messages)
385    const avgTokens = totalTokens / messages.length
386    const safeAvgTokens = avgTokens * COMPACTION_SAFETY_MARGIN
387    const avgRatio = safeAvgTokens / contextWindow
388  
389    if (avgRatio > 0.1) {
390      const reduction = Math.min(avgRatio * 2, BASE_CHUNK_RATIO - MIN_CHUNK_RATIO)
391      return Math.max(MIN_CHUNK_RATIO, BASE_CHUNK_RATIO - reduction)
392    }
393    return BASE_CHUNK_RATIO
394  }
395  
396  /** Retry an async function with exponential backoff */
397  async function retryAsync<T>(fn: () => Promise<T>, maxAttempts = 3, baseMs = 500): Promise<T> {
398    let lastError: unknown
399    for (let attempt = 0; attempt < maxAttempts; attempt++) {
400      try {
401        return await fn()
402      } catch (err) {
403        lastError = err
404        if (attempt < maxAttempts - 1) {
405          const delay = Math.min(baseMs * Math.pow(2, attempt) + Math.random() * 200, 5000)
406          await new Promise((r) => setTimeout(r, delay))
407        }
408      }
409    }
410    throw lastError
411  }
412  
413  /** Check if a single message is too large for the summarization context */
414  function isOversizedForSummary(msg: Message, maxChunkTokens: number): boolean {
415    return estimateMessagesTokens([msg]) > maxChunkTokens * 0.5
416  }
417  
418  /** Summarize in hierarchical stages if context is very large, with retry and oversized message handling */
419  export async function summarizeInStages(opts: {
420    messages: Message[]
421    contextWindow: number
422    summarize: LLMSummarizer
423    maxChunkTokens: number
424  }): Promise<string> {
425    const { messages, summarize, maxChunkTokens } = opts
426    const totalTokens = estimateMessagesTokens(messages)
427  
428    if (totalTokens <= maxChunkTokens || messages.length < 4) {
429      return retryAsync(() => summarize(buildSummarizationPrompt(messages)))
430    }
431  
432    // Separate oversized messages that would blow a single chunk
433    const normalMessages: Message[] = []
434    const oversizedMessages: Message[] = []
435    for (const m of messages) {
436      if (isOversizedForSummary(m, maxChunkTokens)) {
437        oversizedMessages.push(m)
438      } else {
439        normalMessages.push(m)
440      }
441    }
442  
443    const chunks = splitMessagesByTokenBudget(normalMessages, maxChunkTokens)
444    if (chunks.length <= 1 && oversizedMessages.length === 0) {
445      return retryAsync(() => summarize(buildSummarizationPrompt(messages)))
446    }
447  
448    const partialSummaries: string[] = []
449    for (const chunk of chunks) {
450      try {
451        const partial = await retryAsync(() => summarize(buildSummarizationPrompt(chunk)))
452        if (partial?.trim()) partialSummaries.push(partial.trim())
453      } catch { /* skip failed chunk after retries */ }
454    }
455  
456    // Note oversized messages that were excluded from summarization
457    if (oversizedMessages.length > 0) {
458      const notes = oversizedMessages.map((m) => {
459        const preview = m.text.slice(0, 200).replace(/\n/g, ' ')
460        const toolCount = m.toolEvents?.length || 0
461        return `- [${m.role}] (oversized, ~${estimateMessagesTokens([m])} tokens${toolCount ? `, ${toolCount} tool calls` : ''}): ${preview}...`
462      })
463      partialSummaries.push(`## Oversized Messages (excluded from detailed summarization)\n${notes.join('\n')}`)
464    }
465  
466    if (partialSummaries.length === 0) return 'Summary unavailable.'
467    if (partialSummaries.length === 1) return partialSummaries[0]
468  
469    return retryAsync(() => summarize(buildMergePrompt(partialSummaries)))
470  }
471  
472  /** Build an OpenClaw-aligned summarization prompt for a batch of messages */
473  function buildSummarizationPrompt(messages: Message[]): string {
474    const transcript = messages.map((m) => {
475      let line = `[${m.role}]: ${m.text}`
476      if (m.toolEvents?.length) {
477        for (const te of m.toolEvents) {
478          const inp = (te.input || '').slice(0, 500)
479          const out = (te.output || '').slice(0, 500)
480          line += `\n  tool:${te.name}(${inp})${te.error ? ' [ERROR]' : ''} → ${out}`
481        }
482      }
483      return line
484    }).join('\n\n')
485  
486    return [
487      'Summarize the following conversation transcript into structured notes.',
488      '',
489      'Rules:',
490      '- Preserve all decisions, TODOs, open questions, and any constraints.',
491      `- ${IDENTIFIER_PRESERVATION_INSTRUCTIONS}`,
492      '- Note errors encountered and their resolutions.',
493      '- Keep technical details needed to continue work (versions, configs, commands).',
494      '- Aim for 20-40% of original length.',
495      '- Use structured notes with bullet points, not narrative prose.',
496      '- Group by topic/theme when possible.',
497      '',
498      '---TRANSCRIPT---',
499      transcript,
500      '---END TRANSCRIPT---',
501    ].join('\n')
502  }
503  
504  /** Build a merge prompt for combining multiple partial summaries */
505  function buildMergePrompt(partialSummaries: string[]): string {
506    const numbered = partialSummaries.map((s, i) => `--- Part ${i + 1} ---\n${s}`).join('\n\n')
507  
508    return [
509      'Merge the following partial conversation summaries into a single cohesive summary.',
510      '',
511      'Rules:',
512      '- Remove redundancy across parts while preserving all important details.',
513      `- ${MERGE_SUMMARIES_INSTRUCTIONS}`,
514      `- ${IDENTIFIER_PRESERVATION_INSTRUCTIONS}`,
515      '- Keep decisions, TODOs, open questions, constraints, and error resolutions.',
516      '- Use structured notes with bullet points.',
517      '- The result should be shorter than the combined input.',
518      '',
519      numbered,
520    ].join('\n')
521  }
522  
523  // --- Compaction strategies ---
524  
525  export interface CompactionResult {
526    messages: Message[]
527    prunedCount: number
528    memoriesStored: number
529    summaryAdded: boolean
530  }
531  
532  /** Sliding window: keep last N messages */
533  export function slidingWindowCompact(
534    messages: Message[],
535    keepLastN: number,
536  ): Message[] {
537    if (messages.length <= keepLastN) return messages
538    return messages.slice(-keepLastN)
539  }
540  
541  /** LLM-powered compaction: summarize old messages using an LLM, with progressive fallback */
542  export async function llmCompact(opts: {
543    messages: Message[]
544    provider: string
545    model: string
546    agentId: string | null
547    sessionId: string
548    summarize: LLMSummarizer
549    keepLastN?: number
550  }): Promise<CompactionResult> {
551    const { messages, provider, model, agentId, sessionId, summarize, keepLastN = 10 } = opts
552  
553    if (messages.length <= keepLastN) {
554      return { messages, prunedCount: 0, memoriesStored: 0, summaryAdded: false }
555    }
556  
557    const repaired = repairTranscriptConsistency(messages)
558    const oldMessages = repaired.slice(0, -keepLastN)
559    const recentMessages = repaired.slice(-keepLastN)
560  
561    // 0. Extract facts from messages about to be dropped into RunContext (non-critical)
562    try {
563      const session = getSession(sessionId) as Session | undefined
564      if (session && oldMessages.length > 0) {
565        const extracted = extractFactsFromMessages(oldMessages)
566        if (extracted.keyFacts.length > 0 || extracted.failedApproaches.length > 0) {
567          const ctx = ensureRunContext(session.runContext)
568          ctx.keyFacts = [...ctx.keyFacts, ...extracted.keyFacts]
569          ctx.failedApproaches = [...ctx.failedApproaches, ...extracted.failedApproaches]
570          ctx.version++
571          ctx.updatedAt = Date.now()
572          session.runContext = pruneRunContext(ctx)
573          saveSession(sessionId, session)
574        }
575      }
576    } catch { /* non-critical — compaction continues even if extraction fails */ }
577  
578    // 1. Consolidate important info to memory
579    const memoriesStored = consolidateToMemory(oldMessages, agentId, sessionId)
580  
581    // 2. Extract metadata
582    const toolFailures = extractToolFailures(oldMessages)
583    const fileOps = extractFileOperations(oldMessages)
584  
585    // 3. Compute adaptive budget
586    const contextWindow = getContextWindowSize(provider, model)
587    const ratio = computeAdaptiveChunkRatio(oldMessages, contextWindow)
588    const chunkBudget = Math.floor((contextWindow / COMPACTION_SAFETY_MARGIN) * ratio) - COMPACTION_OVERHEAD_TOKENS
589  
590    // 4. Hierarchical summarization
591    let finalSummary: string | null = null
592    try {
593      finalSummary = await summarizeInStages({
594        messages: oldMessages,
595        contextWindow,
596        summarize,
597        maxChunkTokens: Math.max(chunkBudget, 2000),
598      })
599    } catch {
600      finalSummary = null
601    }
602  
603    // 5. Fall back to sliding window if LLM summarization failed entirely
604    if (!finalSummary?.trim()) {
605      return {
606        messages: slidingWindowCompact(repaired, keepLastN),
607        prunedCount: oldMessages.length,
608        memoriesStored,
609        summaryAdded: false,
610      }
611    }
612  
613    // 6. Append metadata sections
614    const metaSections: string[] = [finalSummary.trim()]
615  
616    if (toolFailures.length > 0) {
617      metaSections.push('\n## Tool Failures\n' + toolFailures.join('\n'))
618    }
619    if (fileOps.read.length > 0 || fileOps.modified.length > 0) {
620      const parts: string[] = []
621      if (fileOps.read.length) parts.push('Read: ' + fileOps.read.join(', '))
622      if (fileOps.modified.length) parts.push('Modified: ' + fileOps.modified.join(', '))
623      metaSections.push('\n## File Operations\n' + parts.join('\n'))
624    }
625  
626    // 7. Build context summary message
627    const summaryMessage: Message = {
628      role: 'assistant',
629      text: `[Context Summary]\n${metaSections.join('\n')}`,
630      time: Date.now(),
631      kind: 'system',
632    }
633  
634    return {
635      messages: [summaryMessage, ...recentMessages],
636      prunedCount: oldMessages.length,
637      memoriesStored,
638      summaryAdded: true,
639    }
640  }
641  
642  /** Summarize old messages, keep recent ones. Delegates to llmCompact for LLM-powered summarization. */
643  export async function summarizeAndCompact(opts: {
644    messages: Message[]
645    keepLastN: number
646    agentId: string | null
647    sessionId: string
648    provider: string
649    model: string
650    generateSummary: LLMSummarizer
651  }): Promise<CompactionResult> {
652    const { messages, keepLastN, agentId, sessionId, provider, model, generateSummary } = opts
653  
654    return llmCompact({
655      messages,
656      provider,
657      model,
658      agentId,
659      sessionId,
660      summarize: generateSummary,
661      keepLastN,
662    })
663  }
664  
665  // --- Emergency context reduction (no LLM calls) ---
666  
667  /**
668   * Aggressively reduce message history when the provider rejects the prompt
669   * for exceeding the context window. Uses simple slicing and truncation —
670   * no LLM summarization (which would itself risk overflow).
671   *
672   * @param messages - The effective message history
673   * @param attempt  - 1-based retry attempt (controls how aggressively to cut)
674   * @returns A reduced copy of the messages array
675   */
676  export function emergencyContextReduce(messages: Message[], attempt: number): Message[] {
677    const keepCount = attempt <= 1 ? 15 : 6
678    const maxAssistantChars = attempt <= 1 ? 4_000 : 2_000
679  
680    const sliced = messages.length > keepCount ? messages.slice(-keepCount) : [...messages]
681  
682    return sliced.map((m) => {
683      const copy = { ...m }
684      // Strip toolEvents to save space
685      if (copy.toolEvents) {
686        delete copy.toolEvents
687      }
688      // Truncate long assistant messages
689      if (copy.role === 'assistant' && copy.text && copy.text.length > maxAssistantChars) {
690        copy.text = copy.text.slice(0, maxAssistantChars) + '\n\n[… truncated for context overflow recovery]'
691      }
692      return copy
693    })
694  }
695  
696  /** Auto-compact: triggers when estimated tokens exceed threshold */
697  export function shouldAutoCompact(
698    messages: Message[],
699    systemPromptTokens: number,
700    provider: string,
701    model: string,
702    triggerPercent = 80,
703    options: ContextStatusOptions = {},
704  ): boolean {
705    const status = getContextStatus(messages, systemPromptTokens, provider, model, {
706      ...options,
707      reserveTokens: options.reserveTokens ?? resolveCompactionReserveTokens(provider, model),
708    })
709    return status.percentUsed >= triggerPercent
710  }