index.ts
  1  /**
  2   * Model-Aware Compaction
  3   *
  4   * Adds per-model context-usage thresholds to Pi's built-in auto-compaction.
  5   * Thresholds are percent-used (0–100), configured in config.json.
  6   *
  7   * Design constraint: extensions cannot access InteractiveMode's compaction queue
  8   * (the "Queued message for after compaction" mechanism). Calling ctx.compact()
  9   * directly skips the compaction-summary UI and won't auto-send queued messages.
 10   *
 11   * Approach: at agent_end, if a model-specific threshold is exceeded, inflate
 12   * lastAssistant.usage.totalTokens past the context window. Pi's _checkCompaction()
 13   * then fires its normal pipeline — loader → compact → summary → queued-message
 14   * flush — preserving the full native UX. The inflated value is ephemeral;
 15   * compaction rebuilds messages from the session file.
 16   *
 17   * Session event handlers (session_start, session_tree, session_before_compact,
 18   * session_compact) reset internal state — cooldown
 19   * timers, cached message references — to stay consistent across navigations.
 20   *
 21   * Requires compaction.enabled: true in settings.json. See README.md for
 22   * threshold tuning and reserveTokens guidance.
 23   */
 24  
 25  import {
 26      buildSessionContext,
 27      estimateTokens,
 28      type ExtensionAPI,
 29      type ExtensionContext,
 30  } from "@mariozechner/pi-coding-agent";
 31  import { existsSync, readFileSync } from "node:fs";
 32  import { homedir } from "node:os";
 33  import { dirname, join } from "node:path";
 34  import { fileURLToPath } from "node:url";
 35  
 36  interface CompactionConfig {
 37      global: number;
 38      models: Record<string, number>;
 39  }
 40  
 41  const DEFAULT_THRESHOLD_PERCENT = 85;
 42  const DEFAULT_CONTEXT_WINDOW = 128000;
 43  
 44  // Prevent thrashing
 45  const COMPACTION_COOLDOWN_MS = 15000;
 46  
 47  function normalizePercent(value: unknown, fallback: number): number {
 48      if (typeof value !== "number" || Number.isNaN(value)) {
 49          return fallback;
 50      }
 51  
 52      return Math.max(0, Math.min(100, Math.floor(value)));
 53  }
 54  
 55  function loadConfig(): CompactionConfig {
 56      try {
 57          const extensionDirectory = dirname(fileURLToPath(import.meta.url));
 58          const configPath = join(extensionDirectory, "config.json");
 59          const configData = readFileSync(configPath, "utf-8");
 60          const parsedConfig = JSON.parse(configData);
 61  
 62          return {
 63              global: normalizePercent(parsedConfig.global, DEFAULT_THRESHOLD_PERCENT),
 64              models:
 65                  typeof parsedConfig.models === "object" && parsedConfig.models !== null
 66                      ? (parsedConfig.models as Record<string, number>)
 67                      : {},
 68          };
 69      } catch {
 70          return { global: DEFAULT_THRESHOLD_PERCENT, models: {} };
 71      }
 72  }
 73  
 74  function getThresholdPercent(config: CompactionConfig, modelId: string): number {
 75      if (config.models[modelId] !== undefined) {
 76          return normalizePercent(config.models[modelId], config.global);
 77      }
 78  
 79      for (const [pattern, threshold] of Object.entries(config.models)) {
 80          if (!pattern.includes("*")) {
 81              continue;
 82          }
 83  
 84          const escapedPattern = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&");
 85          const regex = new RegExp("^" + escapedPattern.replace(/\*/g, ".*") + "$");
 86  
 87          if (regex.test(modelId)) {
 88              return normalizePercent(threshold, config.global);
 89          }
 90      }
 91  
 92      return config.global;
 93  }
 94  
 95  function estimateSystemPromptTokens(ctx: ExtensionContext): number {
 96      const promptText = ctx.getSystemPrompt();
 97      return Math.ceil(promptText.length / 4);  // ~4 chars/token rough heuristic
 98  }
 99  
100  function estimateLeafTokens(ctx: ExtensionContext): number {
101      const sessionContext = buildSessionContext(ctx.sessionManager.getEntries(), ctx.sessionManager.getLeafId());
102      const messagesTokens = sessionContext.messages.reduce((sum, msg) => sum + estimateTokens(msg), 0);
103  
104      // System prompt (AGENTS.md, tool descriptions, etc.) isn't in SessionContext.messages
105      return messagesTokens + estimateSystemPromptTokens(ctx);
106  }
107  
108  function getLastBranchCompactionMs(ctx: ExtensionContext): number | undefined {
109      const branchEntries = ctx.sessionManager.getBranch();
110  
111      for (let i = branchEntries.length - 1; i >= 0; i -= 1) {
112          const entry = branchEntries[i];
113          if (entry.type !== "compaction") {
114              continue;
115          }
116  
117          // SessionEntry.timestamp is an ISO string
118          const ms = Date.parse(entry.timestamp);
119          return Number.isNaN(ms) ? undefined : ms;
120      }
121  
122      return undefined;
123  }
124  
125  function readJsonFile(filePath: string): unknown | undefined {
126      try {
127          if (!existsSync(filePath)) {
128              return undefined;
129          }
130  
131          const text = readFileSync(filePath, "utf-8");
132          return JSON.parse(text) as unknown;
133      } catch {
134          return undefined;
135      }
136  }
137  
138  function getCompactionEnabledFromSettings(settings: unknown): boolean | undefined {
139      if (!settings || typeof settings !== "object") {
140          return undefined;
141      }
142  
143      const maybe = (settings as any)?.compaction?.enabled;
144      return typeof maybe === "boolean" ? maybe : undefined;
145  }
146  
147  function findProjectSettingsPath(startDir: string): string | undefined {
148      // Best-effort: walk up to root, looking for .pi/settings.json
149      let current = startDir;
150  
151      for (let i = 0; i < 20; i += 1) {
152          const candidate = join(current, ".pi", "settings.json");
153          if (existsSync(candidate)) {
154              return candidate;
155          }
156  
157          const parent = dirname(current);
158          if (parent === current) {
159              break;
160          }
161          current = parent;
162      }
163  
164      return undefined;
165  }
166  
167  function isAutoCompactionEnabled(ctx: ExtensionContext): boolean {
168      // Mirrors SettingsManager.getCompactionEnabled default behavior: true if unset
169      const globalSettingsPath = join(homedir(), ".pi", "agent", "settings.json");
170      const globalEnabled = getCompactionEnabledFromSettings(readJsonFile(globalSettingsPath));
171  
172      const projectSettingsPath = findProjectSettingsPath(ctx.cwd);
173      const projectEnabled = projectSettingsPath
174          ? getCompactionEnabledFromSettings(readJsonFile(projectSettingsPath))
175          : undefined;
176  
177      return projectEnabled ?? globalEnabled ?? true;
178  }
179  
180  /** Fallback when turn_end didn't capture a reference (e.g., extension loaded mid-session) */
181  function findLastNonErrorAssistantMessage(messages: unknown[]): any | undefined {
182      for (let i = messages.length - 1; i >= 0; i -= 1) {
183          const msg = messages[i] as any;
184          if (!msg || msg.role !== "assistant") {
185              continue;
186          }
187  
188          if (msg.stopReason === "error" || msg.stopReason === "aborted") {
189              continue;
190          }
191  
192          if (!msg.usage) {
193              continue;
194          }
195  
196          return msg;
197      }
198  
199      return undefined;
200  }
201  
202  export default function (pi: ExtensionAPI) {
203      const config = loadConfig();
204  
205      let lastCompactionMs = 0;
206      let lastNudgeMs = 0;
207  
208      // Best-effort reference to the last assistant message object used by Pi's internal compaction check
209      let lastAssistantMessageRef: any | undefined;
210  
211      // -- Session lifecycle -------------------------------------------------------
212      // Reset cooldowns and cached message refs on session start/navigation, and
213      // track compaction timestamps for debounce.
214  
215      pi.on("session_start", async (_event, ctx) => {
216          lastAssistantMessageRef = undefined;
217          lastCompactionMs = 0;
218          lastNudgeMs = 0;
219  
220          const branchCompactionMs = getLastBranchCompactionMs(ctx);
221          if (branchCompactionMs !== undefined) {
222              lastCompactionMs = Math.max(lastCompactionMs, branchCompactionMs);
223          }
224      });
225  
226      pi.on("session_tree", async (_event, ctx) => {
227          lastAssistantMessageRef = undefined;
228          lastNudgeMs = 0;
229  
230          const branchCompactionMs = getLastBranchCompactionMs(ctx);
231          if (branchCompactionMs !== undefined) {
232              lastCompactionMs = Math.max(lastCompactionMs, branchCompactionMs);
233          }
234      });
235  
236      pi.on("session_before_compact", async (_event, _ctx) => {
237          lastAssistantMessageRef = undefined;
238          lastNudgeMs = 0;
239      });
240  
241      pi.on("session_compact", async (_event, _ctx) => {
242          lastCompactionMs = Date.now();
243          lastAssistantMessageRef = undefined;
244          lastNudgeMs = 0;
245      });
246  
247      // Capture the last assistant message reference so we can mutate it reliably in agent_end
248      pi.on("turn_end", async (event, _ctx) => {
249          const msg = (event as any)?.message;
250          if (!msg || msg.role !== "assistant") {
251              return;
252          }
253  
254          if (msg.stopReason === "error" || msg.stopReason === "aborted") {
255              return;
256          }
257  
258          if (!msg.usage) {
259              return;
260          }
261  
262          lastAssistantMessageRef = msg;
263      });
264  
265      // Trigger after an agent run completes (matches Pi built-in auto-compaction timing)
266      pi.on("agent_end", async (event, ctx) => {
267          const branchCompactionMs = getLastBranchCompactionMs(ctx);
268          if (branchCompactionMs !== undefined) {
269              lastCompactionMs = Math.max(lastCompactionMs, branchCompactionMs);
270          }
271  
272          const now = Date.now();
273          if (now - lastCompactionMs < COMPACTION_COOLDOWN_MS) {
274              return;
275          }
276  
277          const model = ctx.model;
278          if (!model) {
279              return;
280          }
281  
282          const contextWindow = model.contextWindow || DEFAULT_CONTEXT_WINDOW;
283          const thresholdPercent = getThresholdPercent(config, model.id);
284          const thresholdTokens = Math.floor((thresholdPercent / 100) * contextWindow);
285  
286          const usage = ctx.getContextUsage();
287          const usedTokens = usage?.tokens ?? estimateLeafTokens(ctx);
288  
289          if (usedTokens < thresholdTokens) {
290              return;
291          }
292  
293          if (!isAutoCompactionEnabled(ctx)) {
294              if (ctx.hasUI) {
295                  ctx.ui.notify(
296                      "Auto-compact is disabled. " +
297                          "Enable it in /settings so model-aware-compaction can trigger Pi's built-in auto-compaction",
298                      "warning",
299                  );
300              }
301              return;
302          }
303  
304          // Nudge Pi's built-in auto-compaction check (which runs right after this handler)
305          const lastAssistant =
306              lastAssistantMessageRef ?? findLastNonErrorAssistantMessage((event as any)?.messages ?? []);
307          if (!lastAssistant) {
308              return;
309          }
310  
311          const nudgeNow = Date.now();
312          if (nudgeNow - lastNudgeMs < 5000) {
313              return;
314          }
315          lastNudgeMs = nudgeNow;
316  
317          if (ctx.hasUI) {
318              ctx.ui.notify(
319                  `Auto-compacting via model-aware threshold: ${model.id} (>= ${thresholdPercent}% used)`,
320                  "info",
321              );
322          }
323  
324          // Force auto-compaction by bumping totalTokens above Pi's internal shouldCompact threshold
325          const forcedTokens = contextWindow + 1;
326          lastAssistant.usage.totalTokens = Math.max(lastAssistant.usage.totalTokens ?? 0, forcedTokens);
327  
328          // Note: we deliberately don't set a footer/statusline indicator here.
329          // If Pi's auto-compaction is enabled, its own UI will show the compaction loader + result.
330      });
331  }