index.ts
1 /** 2 * Model-Aware Compaction 3 * 4 * Adds per-model context-usage thresholds to Pi's built-in auto-compaction. 5 * Thresholds are percent-used (0–100), configured in config.json. 6 * 7 * Design constraint: extensions cannot access InteractiveMode's compaction queue 8 * (the "Queued message for after compaction" mechanism). Calling ctx.compact() 9 * directly skips the compaction-summary UI and won't auto-send queued messages. 10 * 11 * Approach: at agent_end, if a model-specific threshold is exceeded, inflate 12 * lastAssistant.usage.totalTokens past the context window. Pi's _checkCompaction() 13 * then fires its normal pipeline — loader → compact → summary → queued-message 14 * flush — preserving the full native UX. The inflated value is ephemeral; 15 * compaction rebuilds messages from the session file. 16 * 17 * Session event handlers (session_start, session_tree, session_before_compact, 18 * session_compact) reset internal state — cooldown 19 * timers, cached message references — to stay consistent across navigations. 20 * 21 * Requires compaction.enabled: true in settings.json. See README.md for 22 * threshold tuning and reserveTokens guidance. 23 */ 24 25 import { 26 buildSessionContext, 27 estimateTokens, 28 type ExtensionAPI, 29 type ExtensionContext, 30 } from "@mariozechner/pi-coding-agent"; 31 import { existsSync, readFileSync } from "node:fs"; 32 import { homedir } from "node:os"; 33 import { dirname, join } from "node:path"; 34 import { fileURLToPath } from "node:url"; 35 36 interface CompactionConfig { 37 global: number; 38 models: Record<string, number>; 39 } 40 41 const DEFAULT_THRESHOLD_PERCENT = 85; 42 const DEFAULT_CONTEXT_WINDOW = 128000; 43 44 // Prevent thrashing 45 const COMPACTION_COOLDOWN_MS = 15000; 46 47 function normalizePercent(value: unknown, fallback: number): number { 48 if (typeof value !== "number" || Number.isNaN(value)) { 49 return fallback; 50 } 51 52 return Math.max(0, Math.min(100, Math.floor(value))); 53 } 54 55 function loadConfig(): CompactionConfig { 56 try { 57 const extensionDirectory = dirname(fileURLToPath(import.meta.url)); 58 const configPath = join(extensionDirectory, "config.json"); 59 const configData = readFileSync(configPath, "utf-8"); 60 const parsedConfig = JSON.parse(configData); 61 62 return { 63 global: normalizePercent(parsedConfig.global, DEFAULT_THRESHOLD_PERCENT), 64 models: 65 typeof parsedConfig.models === "object" && parsedConfig.models !== null 66 ? (parsedConfig.models as Record<string, number>) 67 : {}, 68 }; 69 } catch { 70 return { global: DEFAULT_THRESHOLD_PERCENT, models: {} }; 71 } 72 } 73 74 function getThresholdPercent(config: CompactionConfig, modelId: string): number { 75 if (config.models[modelId] !== undefined) { 76 return normalizePercent(config.models[modelId], config.global); 77 } 78 79 for (const [pattern, threshold] of Object.entries(config.models)) { 80 if (!pattern.includes("*")) { 81 continue; 82 } 83 84 const escapedPattern = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&"); 85 const regex = new RegExp("^" + escapedPattern.replace(/\*/g, ".*") + "$"); 86 87 if (regex.test(modelId)) { 88 return normalizePercent(threshold, config.global); 89 } 90 } 91 92 return config.global; 93 } 94 95 function estimateSystemPromptTokens(ctx: ExtensionContext): number { 96 const promptText = ctx.getSystemPrompt(); 97 return Math.ceil(promptText.length / 4); // ~4 chars/token rough heuristic 98 } 99 100 function estimateLeafTokens(ctx: ExtensionContext): number { 101 const sessionContext = buildSessionContext(ctx.sessionManager.getEntries(), ctx.sessionManager.getLeafId()); 102 const messagesTokens = sessionContext.messages.reduce((sum, msg) => sum + estimateTokens(msg), 0); 103 104 // System prompt (AGENTS.md, tool descriptions, etc.) isn't in SessionContext.messages 105 return messagesTokens + estimateSystemPromptTokens(ctx); 106 } 107 108 function getLastBranchCompactionMs(ctx: ExtensionContext): number | undefined { 109 const branchEntries = ctx.sessionManager.getBranch(); 110 111 for (let i = branchEntries.length - 1; i >= 0; i -= 1) { 112 const entry = branchEntries[i]; 113 if (entry.type !== "compaction") { 114 continue; 115 } 116 117 // SessionEntry.timestamp is an ISO string 118 const ms = Date.parse(entry.timestamp); 119 return Number.isNaN(ms) ? undefined : ms; 120 } 121 122 return undefined; 123 } 124 125 function readJsonFile(filePath: string): unknown | undefined { 126 try { 127 if (!existsSync(filePath)) { 128 return undefined; 129 } 130 131 const text = readFileSync(filePath, "utf-8"); 132 return JSON.parse(text) as unknown; 133 } catch { 134 return undefined; 135 } 136 } 137 138 function getCompactionEnabledFromSettings(settings: unknown): boolean | undefined { 139 if (!settings || typeof settings !== "object") { 140 return undefined; 141 } 142 143 const maybe = (settings as any)?.compaction?.enabled; 144 return typeof maybe === "boolean" ? maybe : undefined; 145 } 146 147 function findProjectSettingsPath(startDir: string): string | undefined { 148 // Best-effort: walk up to root, looking for .pi/settings.json 149 let current = startDir; 150 151 for (let i = 0; i < 20; i += 1) { 152 const candidate = join(current, ".pi", "settings.json"); 153 if (existsSync(candidate)) { 154 return candidate; 155 } 156 157 const parent = dirname(current); 158 if (parent === current) { 159 break; 160 } 161 current = parent; 162 } 163 164 return undefined; 165 } 166 167 function isAutoCompactionEnabled(ctx: ExtensionContext): boolean { 168 // Mirrors SettingsManager.getCompactionEnabled default behavior: true if unset 169 const globalSettingsPath = join(homedir(), ".pi", "agent", "settings.json"); 170 const globalEnabled = getCompactionEnabledFromSettings(readJsonFile(globalSettingsPath)); 171 172 const projectSettingsPath = findProjectSettingsPath(ctx.cwd); 173 const projectEnabled = projectSettingsPath 174 ? getCompactionEnabledFromSettings(readJsonFile(projectSettingsPath)) 175 : undefined; 176 177 return projectEnabled ?? globalEnabled ?? true; 178 } 179 180 /** Fallback when turn_end didn't capture a reference (e.g., extension loaded mid-session) */ 181 function findLastNonErrorAssistantMessage(messages: unknown[]): any | undefined { 182 for (let i = messages.length - 1; i >= 0; i -= 1) { 183 const msg = messages[i] as any; 184 if (!msg || msg.role !== "assistant") { 185 continue; 186 } 187 188 if (msg.stopReason === "error" || msg.stopReason === "aborted") { 189 continue; 190 } 191 192 if (!msg.usage) { 193 continue; 194 } 195 196 return msg; 197 } 198 199 return undefined; 200 } 201 202 export default function (pi: ExtensionAPI) { 203 const config = loadConfig(); 204 205 let lastCompactionMs = 0; 206 let lastNudgeMs = 0; 207 208 // Best-effort reference to the last assistant message object used by Pi's internal compaction check 209 let lastAssistantMessageRef: any | undefined; 210 211 // -- Session lifecycle ------------------------------------------------------- 212 // Reset cooldowns and cached message refs on session start/navigation, and 213 // track compaction timestamps for debounce. 214 215 pi.on("session_start", async (_event, ctx) => { 216 lastAssistantMessageRef = undefined; 217 lastCompactionMs = 0; 218 lastNudgeMs = 0; 219 220 const branchCompactionMs = getLastBranchCompactionMs(ctx); 221 if (branchCompactionMs !== undefined) { 222 lastCompactionMs = Math.max(lastCompactionMs, branchCompactionMs); 223 } 224 }); 225 226 pi.on("session_tree", async (_event, ctx) => { 227 lastAssistantMessageRef = undefined; 228 lastNudgeMs = 0; 229 230 const branchCompactionMs = getLastBranchCompactionMs(ctx); 231 if (branchCompactionMs !== undefined) { 232 lastCompactionMs = Math.max(lastCompactionMs, branchCompactionMs); 233 } 234 }); 235 236 pi.on("session_before_compact", async (_event, _ctx) => { 237 lastAssistantMessageRef = undefined; 238 lastNudgeMs = 0; 239 }); 240 241 pi.on("session_compact", async (_event, _ctx) => { 242 lastCompactionMs = Date.now(); 243 lastAssistantMessageRef = undefined; 244 lastNudgeMs = 0; 245 }); 246 247 // Capture the last assistant message reference so we can mutate it reliably in agent_end 248 pi.on("turn_end", async (event, _ctx) => { 249 const msg = (event as any)?.message; 250 if (!msg || msg.role !== "assistant") { 251 return; 252 } 253 254 if (msg.stopReason === "error" || msg.stopReason === "aborted") { 255 return; 256 } 257 258 if (!msg.usage) { 259 return; 260 } 261 262 lastAssistantMessageRef = msg; 263 }); 264 265 // Trigger after an agent run completes (matches Pi built-in auto-compaction timing) 266 pi.on("agent_end", async (event, ctx) => { 267 const branchCompactionMs = getLastBranchCompactionMs(ctx); 268 if (branchCompactionMs !== undefined) { 269 lastCompactionMs = Math.max(lastCompactionMs, branchCompactionMs); 270 } 271 272 const now = Date.now(); 273 if (now - lastCompactionMs < COMPACTION_COOLDOWN_MS) { 274 return; 275 } 276 277 const model = ctx.model; 278 if (!model) { 279 return; 280 } 281 282 const contextWindow = model.contextWindow || DEFAULT_CONTEXT_WINDOW; 283 const thresholdPercent = getThresholdPercent(config, model.id); 284 const thresholdTokens = Math.floor((thresholdPercent / 100) * contextWindow); 285 286 const usage = ctx.getContextUsage(); 287 const usedTokens = usage?.tokens ?? estimateLeafTokens(ctx); 288 289 if (usedTokens < thresholdTokens) { 290 return; 291 } 292 293 if (!isAutoCompactionEnabled(ctx)) { 294 if (ctx.hasUI) { 295 ctx.ui.notify( 296 "Auto-compact is disabled. " + 297 "Enable it in /settings so model-aware-compaction can trigger Pi's built-in auto-compaction", 298 "warning", 299 ); 300 } 301 return; 302 } 303 304 // Nudge Pi's built-in auto-compaction check (which runs right after this handler) 305 const lastAssistant = 306 lastAssistantMessageRef ?? findLastNonErrorAssistantMessage((event as any)?.messages ?? []); 307 if (!lastAssistant) { 308 return; 309 } 310 311 const nudgeNow = Date.now(); 312 if (nudgeNow - lastNudgeMs < 5000) { 313 return; 314 } 315 lastNudgeMs = nudgeNow; 316 317 if (ctx.hasUI) { 318 ctx.ui.notify( 319 `Auto-compacting via model-aware threshold: ${model.id} (>= ${thresholdPercent}% used)`, 320 "info", 321 ); 322 } 323 324 // Force auto-compaction by bumping totalTokens above Pi's internal shouldCompact threshold 325 const forcedTokens = contextWindow + 1; 326 lastAssistant.usage.totalTokens = Math.max(lastAssistant.usage.totalTokens ?? 0, forcedTokens); 327 328 // Note: we deliberately don't set a footer/statusline indicator here. 329 // If Pi's auto-compaction is enabled, its own UI will show the compaction loader + result. 330 }); 331 }