context-manager.ts
1 import type { Message, Session } from '@/types' 2 import { getMemoryDb } from '@/lib/server/memory/memory-db' 3 import { extractFactsFromMessages, ensureRunContext, pruneRunContext } from '@/lib/server/run-context' 4 import { getSession, saveSession } from '@/lib/server/sessions/session-repository' 5 6 import { repairTranscriptConsistency } from './transcript-repair' 7 8 // --- LLM compaction constants --- 9 10 const BASE_CHUNK_RATIO = 0.4 11 const MIN_CHUNK_RATIO = 0.15 12 const COMPACTION_SAFETY_MARGIN = 1.2 13 const COMPACTION_OVERHEAD_TOKENS = 4096 14 const DEFAULT_COMPACTION_RESERVE_TOKENS_FLOOR = 20_000 15 const MIN_COMPACTION_RESERVE_TOKENS = 1_024 16 const MAX_TOOL_FAILURES = 8 17 const MAX_FAILURE_CHARS = 240 18 19 const MERGE_SUMMARIES_INSTRUCTIONS = 20 'Merge these partial summaries into a single cohesive summary. Preserve decisions,' + 21 ' TODOs, open questions, constraints, active tasks and their current status,' + 22 ' batch operation progress (e.g., "5/17 items completed"),' + 23 ' the last thing the user requested, and any commitments or follow-ups promised.' 24 25 const IDENTIFIER_PRESERVATION_INSTRUCTIONS = 26 'Preserve all opaque identifiers exactly as written (no shortening or reconstruction), ' + 27 'including UUIDs, hashes, IDs, tokens, API keys, hostnames, IPs, ports, URLs, and file names.' 28 29 /** Callback that sends a prompt to an LLM and returns response text */ 30 export type LLMSummarizer = (prompt: string) => Promise<string> 31 32 // --- Context window sizes (tokens) per provider/model --- 33 34 const PROVIDER_CONTEXT_WINDOWS: Record<string, number> = { 35 // Anthropic 36 'claude-opus-4-6': 200_000, 37 'claude-sonnet-4-6': 200_000, 38 'claude-haiku-4-5-20251001': 200_000, 39 'claude-sonnet-4-5-20250514': 200_000, 40 // OpenAI 41 'gpt-4o': 128_000, 42 'gpt-4o-mini': 128_000, 43 'gpt-4.1': 1_047_576, 44 'gpt-4.1-mini': 1_047_576, 45 'gpt-4.1-nano': 1_047_576, 46 'o3': 200_000, 47 'o3-mini': 128_000, 48 'o4-mini': 200_000, 49 // Codex CLI 50 'gpt-5.3-codex': 1_047_576, 51 'gpt-5.2-codex': 1_047_576, 52 'gpt-5.1-codex': 1_047_576, 53 'gpt-5-codex': 1_047_576, 54 'gpt-5-codex-mini': 1_047_576, 55 // Google Gemini 56 'gemini-2.5-pro': 1_048_576, 57 'gemini-2.5-flash': 1_048_576, 58 'gemini-2.5-flash-lite': 1_048_576, 59 // DeepSeek 60 'deepseek-chat': 64_000, 61 'deepseek-reasoner': 64_000, 62 // Mistral 63 'mistral-large-latest': 128_000, 64 'mistral-small-latest': 128_000, 65 'magistral-medium-2506': 128_000, 66 'devstral-small-latest': 128_000, 67 // xAI 68 'grok-3': 131_072, 69 'grok-3-fast': 131_072, 70 'grok-3-mini': 131_072, 71 'grok-3-mini-fast': 131_072, 72 } 73 74 const PROVIDER_DEFAULT_WINDOWS: Record<string, number> = { 75 anthropic: 200_000, 76 'claude-cli': 200_000, 77 openai: 128_000, 78 'codex-cli': 1_047_576, 79 'opencode-cli': 200_000, 80 'opencode-web': 200_000, 81 'gemini-cli': 1_048_576, 82 'copilot-cli': 200_000, 83 'droid-cli': 200_000, 84 'cursor-cli': 200_000, 85 'qwen-code-cli': 1_048_576, 86 google: 1_048_576, 87 deepseek: 64_000, 88 groq: 32_768, 89 together: 32_768, 90 mistral: 128_000, 91 xai: 131_072, 92 fireworks: 32_768, 93 ollama: 32_768, 94 goose: 200_000, 95 openclaw: 128_000, 96 } 97 98 /** Get context window size for a model, falling back to provider default */ 99 export function getContextWindowSize(provider: string, model: string): number { 100 return PROVIDER_CONTEXT_WINDOWS[model] 101 || PROVIDER_DEFAULT_WINDOWS[provider] 102 || 8_192 103 } 104 105 // --- Token estimation --- 106 107 /** Rough token estimate: ~4 chars per token for English text */ 108 export function estimateTokens(text: string): number { 109 if (!text) return 0 110 return Math.ceil(text.length / 4) 111 } 112 113 /** Estimate total tokens for a message array */ 114 export function estimateMessagesTokens( 115 messages: Message[], 116 options: { includeToolEvents?: boolean } = {}, 117 ): number { 118 const includeToolEvents = options.includeToolEvents !== false 119 let total = 0 120 for (const m of messages) { 121 // Role + overhead per message (~4 tokens) 122 total += 4 123 total += estimateTokens(m.text) 124 if (includeToolEvents && m.toolEvents) { 125 for (const te of m.toolEvents) { 126 total += estimateTokens(te.name) + estimateTokens(te.input) 127 if (te.output) total += estimateTokens(te.output) 128 } 129 } 130 } 131 return total 132 } 133 134 // --- Context window guard --- 135 136 /** Hard minimum: don't even attempt LLM calls below this */ 137 const CONTEXT_WINDOW_HARD_MIN_TOKENS = 16_000 138 /** Warn the agent when remaining context is below this */ 139 const CONTEXT_WINDOW_WARN_BELOW_TOKENS = 32_000 140 141 export interface ContextWindowGuardResult { 142 contextWindowTokens: number 143 shouldBlock: boolean 144 shouldWarn: boolean 145 message: string | null 146 } 147 148 /** Evaluate whether the context window is too small for useful execution */ 149 export function evaluateContextWindowGuard(provider: string, model: string): ContextWindowGuardResult { 150 const tokens = getContextWindowSize(provider, model) 151 if (tokens < CONTEXT_WINDOW_HARD_MIN_TOKENS) { 152 return { 153 contextWindowTokens: tokens, 154 shouldBlock: true, 155 shouldWarn: false, 156 message: `Context window too small (${tokens.toLocaleString()} tokens). Minimum required: ${CONTEXT_WINDOW_HARD_MIN_TOKENS.toLocaleString()} tokens.`, 157 } 158 } 159 if (tokens < CONTEXT_WINDOW_WARN_BELOW_TOKENS) { 160 return { 161 contextWindowTokens: tokens, 162 shouldBlock: false, 163 shouldWarn: true, 164 message: `Small context window (${tokens.toLocaleString()} tokens). Agent may struggle with complex tasks. Consider using a model with a larger context window.`, 165 } 166 } 167 return { contextWindowTokens: tokens, shouldBlock: false, shouldWarn: false, message: null } 168 } 169 170 // --- Context status --- 171 172 export interface ContextStatus { 173 estimatedTokens: number 174 effectiveTokens: number 175 contextWindow: number 176 percentUsed: number 177 messageCount: number 178 extraTokens: number 179 reserveTokens: number 180 remainingTokens: number 181 strategy: 'ok' | 'warning' | 'critical' 182 } 183 184 export interface ContextStatusOptions { 185 extraTokens?: number 186 reserveTokens?: number 187 includeToolEvents?: boolean 188 } 189 190 export function resolveCompactionReserveTokens(provider: string, model: string): number { 191 const contextWindow = getContextWindowSize(provider, model) 192 if (contextWindow <= 0) return MIN_COMPACTION_RESERVE_TOKENS 193 return Math.max( 194 MIN_COMPACTION_RESERVE_TOKENS, 195 Math.min(DEFAULT_COMPACTION_RESERVE_TOKENS_FLOOR, Math.floor(contextWindow * 0.2)), 196 ) 197 } 198 199 export function getContextStatus( 200 messages: Message[], 201 systemPromptTokens: number, 202 provider: string, 203 model: string, 204 options: ContextStatusOptions = {}, 205 ): ContextStatus { 206 const contextWindow = getContextWindowSize(provider, model) 207 const messageTokens = estimateMessagesTokens(messages, { 208 includeToolEvents: options.includeToolEvents, 209 }) 210 const extraTokens = Math.max(0, Math.trunc(options.extraTokens || 0)) 211 const reserveTokens = Math.max(0, Math.trunc(options.reserveTokens || 0)) 212 const estimatedTokens = messageTokens + systemPromptTokens + extraTokens 213 const effectiveTokens = estimatedTokens + reserveTokens 214 const percentUsed = Math.round((effectiveTokens / contextWindow) * 100) 215 const remainingTokens = Math.max(0, contextWindow - effectiveTokens) 216 return { 217 estimatedTokens, 218 effectiveTokens, 219 contextWindow, 220 percentUsed, 221 messageCount: messages.length, 222 extraTokens, 223 reserveTokens, 224 remainingTokens, 225 strategy: percentUsed >= 90 ? 'critical' : percentUsed >= 70 ? 'warning' : 'ok', 226 } 227 } 228 229 // --- Context degradation warnings --- 230 231 /** Returns a warning string when context usage exceeds thresholds, or null if within safe bounds. */ 232 export function getContextDegradationWarning( 233 messages: Message[], 234 systemPromptTokens: number, 235 provider: string, 236 model: string, 237 options: ContextStatusOptions = {}, 238 ): string | null { 239 const status = getContextStatus(messages, systemPromptTokens, provider, model, options) 240 const pct = status.percentUsed 241 const remaining = status.remainingTokens 242 const estTurnsLeft = Math.max(0, Math.floor(remaining / 2000)) 243 244 if (pct >= 85) { 245 return [ 246 `[CONTEXT_WARNING] Context window is ${pct}% full (${status.effectiveTokens.toLocaleString()} / ${status.contextWindow.toLocaleString()} effective tokens).`, 247 `Estimated remaining capacity: ~${estTurnsLeft} turns.`, 248 'CRITICAL: Save essential state to memory immediately. Summarize key findings, decisions, and next steps.', 249 'Consider completing the current subtask and storing a checkpoint before context is exhausted.', 250 ].join(' ') 251 } 252 if (pct >= 70) { 253 return [ 254 `[CONTEXT_WARNING] Context window is ${pct}% full.`, 255 `Estimated remaining capacity: ~${estTurnsLeft} turns.`, 256 'Recommended: Store important progress notes to memory. Prioritize completing high-value subtasks.', 257 ].join(' ') 258 } 259 if (pct >= 60) { 260 return [ 261 `[CONTEXT_WARNING] Context window is ${pct}% full (~${estTurnsLeft} turns remaining).`, 262 'Consider saving intermediate state to memory for continuity.', 263 ].join(' ') 264 } 265 return null 266 } 267 268 // --- Memory consolidation --- 269 270 /** Extract important facts from old messages before pruning */ 271 export function consolidateToMemory( 272 messages: Message[], 273 agentId: string | null, 274 sessionId: string, 275 ): number { 276 if (!agentId) return 0 277 const db = getMemoryDb() 278 let stored = 0 279 280 for (const m of messages) { 281 if (m.role !== 'assistant' || !m.text) continue 282 // Look for decisions, commitments, key facts 283 const text = m.text 284 const hasDecision = /\b(decided|decision|agreed|committed|will do|plan is|approach is|chosen|selected)\b/i.test(text) 285 const hasKeyFact = /\b(important|critical|note|remember|key point|constraint|requirement|deadline)\b/i.test(text) 286 const hasResult = /\b(result|found|discovered|concluded|completed|built|created|deployed)\b/i.test(text) 287 288 if (hasDecision || hasKeyFact || hasResult) { 289 // Create a concise summary (first 500 chars) 290 const summary = text.length > 500 ? text.slice(0, 500) + '...' : text 291 const category = 'working/scratch' 292 const title = `[auto-consolidated] ${text.slice(0, 60).replace(/\n/g, ' ')}` 293 294 db.add({ 295 agentId, 296 sessionId, 297 category, 298 title, 299 content: summary, 300 metadata: { 301 origin: 'auto-consolidated', 302 kind: hasDecision ? 'decision' : hasResult ? 'result' : 'note', 303 tier: 'working', 304 }, 305 }) 306 stored++ 307 } 308 } 309 return stored 310 } 311 312 // --- LLM compaction helpers --- 313 314 /** Extract recent tool failures from messages for metadata appendix */ 315 export function extractToolFailures(messages: Message[]): string[] { 316 const failures: string[] = [] 317 for (const m of messages) { 318 if (!m.toolEvents) continue 319 for (const te of m.toolEvents) { 320 if (!te.error) continue 321 const snippet = (te.output || '').slice(0, MAX_FAILURE_CHARS) 322 failures.push(`[${te.name}] error: ${snippet}`) 323 } 324 } 325 return failures.slice(-MAX_TOOL_FAILURES) 326 } 327 328 /** Extract file paths read and modified from tool events */ 329 export function extractFileOperations(messages: Message[]): { read: string[]; modified: string[] } { 330 const readSet = new Set<string>() 331 const modifiedSet = new Set<string>() 332 333 const READ_TOOLS = new Set(['read_file', 'list_files']) 334 const WRITE_TOOLS = new Set(['write_file', 'edit_file', 'copy_file', 'move_file', 'delete_file']) 335 336 for (const m of messages) { 337 if (!m.toolEvents) continue 338 for (const te of m.toolEvents) { 339 let parsed: Record<string, unknown> | null = null 340 try { parsed = JSON.parse(te.input) } catch { /* not JSON */ } 341 if (!parsed) continue 342 343 const paths: string[] = [] 344 for (const key of ['filePath', 'sourcePath', 'destinationPath']) { 345 const v = parsed[key] 346 if (typeof v === 'string' && v) paths.push(v) 347 } 348 349 const isRead = READ_TOOLS.has(te.name) 350 const isWrite = WRITE_TOOLS.has(te.name) 351 for (const p of paths) { 352 if (isWrite) modifiedSet.add(p) 353 else if (isRead) readSet.add(p) 354 } 355 } 356 } 357 return { read: [...readSet], modified: [...modifiedSet] } 358 } 359 360 /** Split messages into chunks that fit within a token budget each */ 361 export function splitMessagesByTokenBudget(messages: Message[], budgetPerChunk: number): Message[][] { 362 if (messages.length === 0) return [] 363 const chunks: Message[][] = [] 364 let current: Message[] = [] 365 let currentTokens = 0 366 367 for (const m of messages) { 368 const msgTokens = estimateMessagesTokens([m]) 369 if (current.length > 0 && currentTokens + msgTokens > budgetPerChunk) { 370 chunks.push(current) 371 current = [] 372 currentTokens = 0 373 } 374 current.push(m) 375 currentTokens += msgTokens 376 } 377 if (current.length > 0) chunks.push(current) 378 return chunks 379 } 380 381 /** Compute adaptive chunk ratio based on average message size. */ 382 export function computeAdaptiveChunkRatio(messages: Message[], contextWindow: number): number { 383 if (messages.length === 0) return BASE_CHUNK_RATIO 384 const totalTokens = estimateMessagesTokens(messages) 385 const avgTokens = totalTokens / messages.length 386 const safeAvgTokens = avgTokens * COMPACTION_SAFETY_MARGIN 387 const avgRatio = safeAvgTokens / contextWindow 388 389 if (avgRatio > 0.1) { 390 const reduction = Math.min(avgRatio * 2, BASE_CHUNK_RATIO - MIN_CHUNK_RATIO) 391 return Math.max(MIN_CHUNK_RATIO, BASE_CHUNK_RATIO - reduction) 392 } 393 return BASE_CHUNK_RATIO 394 } 395 396 /** Retry an async function with exponential backoff */ 397 async function retryAsync<T>(fn: () => Promise<T>, maxAttempts = 3, baseMs = 500): Promise<T> { 398 let lastError: unknown 399 for (let attempt = 0; attempt < maxAttempts; attempt++) { 400 try { 401 return await fn() 402 } catch (err) { 403 lastError = err 404 if (attempt < maxAttempts - 1) { 405 const delay = Math.min(baseMs * Math.pow(2, attempt) + Math.random() * 200, 5000) 406 await new Promise((r) => setTimeout(r, delay)) 407 } 408 } 409 } 410 throw lastError 411 } 412 413 /** Check if a single message is too large for the summarization context */ 414 function isOversizedForSummary(msg: Message, maxChunkTokens: number): boolean { 415 return estimateMessagesTokens([msg]) > maxChunkTokens * 0.5 416 } 417 418 /** Summarize in hierarchical stages if context is very large, with retry and oversized message handling */ 419 export async function summarizeInStages(opts: { 420 messages: Message[] 421 contextWindow: number 422 summarize: LLMSummarizer 423 maxChunkTokens: number 424 }): Promise<string> { 425 const { messages, summarize, maxChunkTokens } = opts 426 const totalTokens = estimateMessagesTokens(messages) 427 428 if (totalTokens <= maxChunkTokens || messages.length < 4) { 429 return retryAsync(() => summarize(buildSummarizationPrompt(messages))) 430 } 431 432 // Separate oversized messages that would blow a single chunk 433 const normalMessages: Message[] = [] 434 const oversizedMessages: Message[] = [] 435 for (const m of messages) { 436 if (isOversizedForSummary(m, maxChunkTokens)) { 437 oversizedMessages.push(m) 438 } else { 439 normalMessages.push(m) 440 } 441 } 442 443 const chunks = splitMessagesByTokenBudget(normalMessages, maxChunkTokens) 444 if (chunks.length <= 1 && oversizedMessages.length === 0) { 445 return retryAsync(() => summarize(buildSummarizationPrompt(messages))) 446 } 447 448 const partialSummaries: string[] = [] 449 for (const chunk of chunks) { 450 try { 451 const partial = await retryAsync(() => summarize(buildSummarizationPrompt(chunk))) 452 if (partial?.trim()) partialSummaries.push(partial.trim()) 453 } catch { /* skip failed chunk after retries */ } 454 } 455 456 // Note oversized messages that were excluded from summarization 457 if (oversizedMessages.length > 0) { 458 const notes = oversizedMessages.map((m) => { 459 const preview = m.text.slice(0, 200).replace(/\n/g, ' ') 460 const toolCount = m.toolEvents?.length || 0 461 return `- [${m.role}] (oversized, ~${estimateMessagesTokens([m])} tokens${toolCount ? `, ${toolCount} tool calls` : ''}): ${preview}...` 462 }) 463 partialSummaries.push(`## Oversized Messages (excluded from detailed summarization)\n${notes.join('\n')}`) 464 } 465 466 if (partialSummaries.length === 0) return 'Summary unavailable.' 467 if (partialSummaries.length === 1) return partialSummaries[0] 468 469 return retryAsync(() => summarize(buildMergePrompt(partialSummaries))) 470 } 471 472 /** Build an OpenClaw-aligned summarization prompt for a batch of messages */ 473 function buildSummarizationPrompt(messages: Message[]): string { 474 const transcript = messages.map((m) => { 475 let line = `[${m.role}]: ${m.text}` 476 if (m.toolEvents?.length) { 477 for (const te of m.toolEvents) { 478 const inp = (te.input || '').slice(0, 500) 479 const out = (te.output || '').slice(0, 500) 480 line += `\n tool:${te.name}(${inp})${te.error ? ' [ERROR]' : ''} → ${out}` 481 } 482 } 483 return line 484 }).join('\n\n') 485 486 return [ 487 'Summarize the following conversation transcript into structured notes.', 488 '', 489 'Rules:', 490 '- Preserve all decisions, TODOs, open questions, and any constraints.', 491 `- ${IDENTIFIER_PRESERVATION_INSTRUCTIONS}`, 492 '- Note errors encountered and their resolutions.', 493 '- Keep technical details needed to continue work (versions, configs, commands).', 494 '- Aim for 20-40% of original length.', 495 '- Use structured notes with bullet points, not narrative prose.', 496 '- Group by topic/theme when possible.', 497 '', 498 '---TRANSCRIPT---', 499 transcript, 500 '---END TRANSCRIPT---', 501 ].join('\n') 502 } 503 504 /** Build a merge prompt for combining multiple partial summaries */ 505 function buildMergePrompt(partialSummaries: string[]): string { 506 const numbered = partialSummaries.map((s, i) => `--- Part ${i + 1} ---\n${s}`).join('\n\n') 507 508 return [ 509 'Merge the following partial conversation summaries into a single cohesive summary.', 510 '', 511 'Rules:', 512 '- Remove redundancy across parts while preserving all important details.', 513 `- ${MERGE_SUMMARIES_INSTRUCTIONS}`, 514 `- ${IDENTIFIER_PRESERVATION_INSTRUCTIONS}`, 515 '- Keep decisions, TODOs, open questions, constraints, and error resolutions.', 516 '- Use structured notes with bullet points.', 517 '- The result should be shorter than the combined input.', 518 '', 519 numbered, 520 ].join('\n') 521 } 522 523 // --- Compaction strategies --- 524 525 export interface CompactionResult { 526 messages: Message[] 527 prunedCount: number 528 memoriesStored: number 529 summaryAdded: boolean 530 } 531 532 /** Sliding window: keep last N messages */ 533 export function slidingWindowCompact( 534 messages: Message[], 535 keepLastN: number, 536 ): Message[] { 537 if (messages.length <= keepLastN) return messages 538 return messages.slice(-keepLastN) 539 } 540 541 /** LLM-powered compaction: summarize old messages using an LLM, with progressive fallback */ 542 export async function llmCompact(opts: { 543 messages: Message[] 544 provider: string 545 model: string 546 agentId: string | null 547 sessionId: string 548 summarize: LLMSummarizer 549 keepLastN?: number 550 }): Promise<CompactionResult> { 551 const { messages, provider, model, agentId, sessionId, summarize, keepLastN = 10 } = opts 552 553 if (messages.length <= keepLastN) { 554 return { messages, prunedCount: 0, memoriesStored: 0, summaryAdded: false } 555 } 556 557 const repaired = repairTranscriptConsistency(messages) 558 const oldMessages = repaired.slice(0, -keepLastN) 559 const recentMessages = repaired.slice(-keepLastN) 560 561 // 0. Extract facts from messages about to be dropped into RunContext (non-critical) 562 try { 563 const session = getSession(sessionId) as Session | undefined 564 if (session && oldMessages.length > 0) { 565 const extracted = extractFactsFromMessages(oldMessages) 566 if (extracted.keyFacts.length > 0 || extracted.failedApproaches.length > 0) { 567 const ctx = ensureRunContext(session.runContext) 568 ctx.keyFacts = [...ctx.keyFacts, ...extracted.keyFacts] 569 ctx.failedApproaches = [...ctx.failedApproaches, ...extracted.failedApproaches] 570 ctx.version++ 571 ctx.updatedAt = Date.now() 572 session.runContext = pruneRunContext(ctx) 573 saveSession(sessionId, session) 574 } 575 } 576 } catch { /* non-critical — compaction continues even if extraction fails */ } 577 578 // 1. Consolidate important info to memory 579 const memoriesStored = consolidateToMemory(oldMessages, agentId, sessionId) 580 581 // 2. Extract metadata 582 const toolFailures = extractToolFailures(oldMessages) 583 const fileOps = extractFileOperations(oldMessages) 584 585 // 3. Compute adaptive budget 586 const contextWindow = getContextWindowSize(provider, model) 587 const ratio = computeAdaptiveChunkRatio(oldMessages, contextWindow) 588 const chunkBudget = Math.floor((contextWindow / COMPACTION_SAFETY_MARGIN) * ratio) - COMPACTION_OVERHEAD_TOKENS 589 590 // 4. Hierarchical summarization 591 let finalSummary: string | null = null 592 try { 593 finalSummary = await summarizeInStages({ 594 messages: oldMessages, 595 contextWindow, 596 summarize, 597 maxChunkTokens: Math.max(chunkBudget, 2000), 598 }) 599 } catch { 600 finalSummary = null 601 } 602 603 // 5. Fall back to sliding window if LLM summarization failed entirely 604 if (!finalSummary?.trim()) { 605 return { 606 messages: slidingWindowCompact(repaired, keepLastN), 607 prunedCount: oldMessages.length, 608 memoriesStored, 609 summaryAdded: false, 610 } 611 } 612 613 // 6. Append metadata sections 614 const metaSections: string[] = [finalSummary.trim()] 615 616 if (toolFailures.length > 0) { 617 metaSections.push('\n## Tool Failures\n' + toolFailures.join('\n')) 618 } 619 if (fileOps.read.length > 0 || fileOps.modified.length > 0) { 620 const parts: string[] = [] 621 if (fileOps.read.length) parts.push('Read: ' + fileOps.read.join(', ')) 622 if (fileOps.modified.length) parts.push('Modified: ' + fileOps.modified.join(', ')) 623 metaSections.push('\n## File Operations\n' + parts.join('\n')) 624 } 625 626 // 7. Build context summary message 627 const summaryMessage: Message = { 628 role: 'assistant', 629 text: `[Context Summary]\n${metaSections.join('\n')}`, 630 time: Date.now(), 631 kind: 'system', 632 } 633 634 return { 635 messages: [summaryMessage, ...recentMessages], 636 prunedCount: oldMessages.length, 637 memoriesStored, 638 summaryAdded: true, 639 } 640 } 641 642 /** Summarize old messages, keep recent ones. Delegates to llmCompact for LLM-powered summarization. */ 643 export async function summarizeAndCompact(opts: { 644 messages: Message[] 645 keepLastN: number 646 agentId: string | null 647 sessionId: string 648 provider: string 649 model: string 650 generateSummary: LLMSummarizer 651 }): Promise<CompactionResult> { 652 const { messages, keepLastN, agentId, sessionId, provider, model, generateSummary } = opts 653 654 return llmCompact({ 655 messages, 656 provider, 657 model, 658 agentId, 659 sessionId, 660 summarize: generateSummary, 661 keepLastN, 662 }) 663 } 664 665 // --- Emergency context reduction (no LLM calls) --- 666 667 /** 668 * Aggressively reduce message history when the provider rejects the prompt 669 * for exceeding the context window. Uses simple slicing and truncation — 670 * no LLM summarization (which would itself risk overflow). 671 * 672 * @param messages - The effective message history 673 * @param attempt - 1-based retry attempt (controls how aggressively to cut) 674 * @returns A reduced copy of the messages array 675 */ 676 export function emergencyContextReduce(messages: Message[], attempt: number): Message[] { 677 const keepCount = attempt <= 1 ? 15 : 6 678 const maxAssistantChars = attempt <= 1 ? 4_000 : 2_000 679 680 const sliced = messages.length > keepCount ? messages.slice(-keepCount) : [...messages] 681 682 return sliced.map((m) => { 683 const copy = { ...m } 684 // Strip toolEvents to save space 685 if (copy.toolEvents) { 686 delete copy.toolEvents 687 } 688 // Truncate long assistant messages 689 if (copy.role === 'assistant' && copy.text && copy.text.length > maxAssistantChars) { 690 copy.text = copy.text.slice(0, maxAssistantChars) + '\n\n[… truncated for context overflow recovery]' 691 } 692 return copy 693 }) 694 } 695 696 /** Auto-compact: triggers when estimated tokens exceed threshold */ 697 export function shouldAutoCompact( 698 messages: Message[], 699 systemPromptTokens: number, 700 provider: string, 701 model: string, 702 triggerPercent = 80, 703 options: ContextStatusOptions = {}, 704 ): boolean { 705 const status = getContextStatus(messages, systemPromptTokens, provider, model, { 706 ...options, 707 reserveTokens: options.reserveTokens ?? resolveCompactionReserveTokens(provider, model), 708 }) 709 return status.percentUsed >= triggerPercent 710 }