/ services / extractMemories / extractMemories.ts
extractMemories.ts
  1  /**
  2   * Extracts durable memories from the current session transcript
  3   * and writes them to the auto-memory directory (~/.claude/projects/<path>/memory/).
  4   *
  5   * It runs once at the end of each complete query loop (when the model produces
  6   * a final response with no tool calls) via handleStopHooks in stopHooks.ts.
  7   *
  8   * Uses the forked agent pattern (runForkedAgent) — a perfect fork of the main
  9   * conversation that shares the parent's prompt cache.
 10   *
 11   * State is closure-scoped inside initExtractMemories() rather than module-level,
 12   * following the same pattern as confidenceRating.ts. Tests call
 13   * initExtractMemories() in beforeEach to get a fresh closure.
 14   */
 15  
 16  import { feature } from 'bun:bundle'
 17  import { basename } from 'path'
 18  import { getIsRemoteMode } from '../../bootstrap/state.js'
 19  import type { CanUseToolFn } from '../../hooks/useCanUseTool.js'
 20  import { ENTRYPOINT_NAME } from '../../memdir/memdir.js'
 21  import {
 22    formatMemoryManifest,
 23    scanMemoryFiles,
 24  } from '../../memdir/memoryScan.js'
 25  import {
 26    getAutoMemPath,
 27    isAutoMemoryEnabled,
 28    isAutoMemPath,
 29  } from '../../memdir/paths.js'
 30  import type { Tool } from '../../Tool.js'
 31  import { BASH_TOOL_NAME } from '../../tools/BashTool/toolName.js'
 32  import { FILE_EDIT_TOOL_NAME } from '../../tools/FileEditTool/constants.js'
 33  import { FILE_READ_TOOL_NAME } from '../../tools/FileReadTool/prompt.js'
 34  import { FILE_WRITE_TOOL_NAME } from '../../tools/FileWriteTool/prompt.js'
 35  import { GLOB_TOOL_NAME } from '../../tools/GlobTool/prompt.js'
 36  import { GREP_TOOL_NAME } from '../../tools/GrepTool/prompt.js'
 37  import { REPL_TOOL_NAME } from '../../tools/REPLTool/constants.js'
 38  import type {
 39    AssistantMessage,
 40    Message,
 41    SystemLocalCommandMessage,
 42    SystemMessage,
 43  } from '../../types/message.js'
 44  import { createAbortController } from '../../utils/abortController.js'
 45  import { count, uniq } from '../../utils/array.js'
 46  import { logForDebugging } from '../../utils/debug.js'
 47  import {
 48    createCacheSafeParams,
 49    runForkedAgent,
 50  } from '../../utils/forkedAgent.js'
 51  import type { REPLHookContext } from '../../utils/hooks/postSamplingHooks.js'
 52  import {
 53    createMemorySavedMessage,
 54    createUserMessage,
 55  } from '../../utils/messages.js'
 56  import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
 57  import { logEvent } from '../analytics/index.js'
 58  import { sanitizeToolNameForAnalytics } from '../analytics/metadata.js'
 59  import {
 60    buildExtractAutoOnlyPrompt,
 61    buildExtractCombinedPrompt,
 62  } from './prompts.js'
 63  
 64  /* eslint-disable @typescript-eslint/no-require-imports */
 65  const teamMemPaths = feature('TEAMMEM')
 66    ? (require('../../memdir/teamMemPaths.js') as typeof import('../../memdir/teamMemPaths.js'))
 67    : null
 68  /* eslint-enable @typescript-eslint/no-require-imports */
 69  
 70  // ============================================================================
 71  // Helpers
 72  // ============================================================================
 73  
 74  /**
 75   * Returns true if a message is visible to the model (sent in API calls).
 76   * Excludes progress, system, and attachment messages.
 77   */
 78  function isModelVisibleMessage(message: Message): boolean {
 79    return message.type === 'user' || message.type === 'assistant'
 80  }
 81  
 82  function countModelVisibleMessagesSince(
 83    messages: Message[],
 84    sinceUuid: string | undefined,
 85  ): number {
 86    if (sinceUuid === null || sinceUuid === undefined) {
 87      return count(messages, isModelVisibleMessage)
 88    }
 89  
 90    let foundStart = false
 91    let n = 0
 92    for (const message of messages) {
 93      if (!foundStart) {
 94        if (message.uuid === sinceUuid) {
 95          foundStart = true
 96        }
 97        continue
 98      }
 99      if (isModelVisibleMessage(message)) {
100        n++
101      }
102    }
103    // If sinceUuid was not found (e.g., removed by context compaction),
104    // fall back to counting all model-visible messages rather than returning 0
105    // which would permanently disable extraction for the rest of the session.
106    if (!foundStart) {
107      return count(messages, isModelVisibleMessage)
108    }
109    return n
110  }
111  
112  /**
113   * Returns true if any assistant message after the cursor UUID contains a
114   * Write/Edit tool_use block targeting an auto-memory path.
115   *
116   * The main agent's prompt has full save instructions — when it writes
117   * memories, the forked extraction is redundant. runExtraction skips the
118   * agent and advances the cursor past this range, making the main agent
119   * and the background agent mutually exclusive per turn.
120   */
121  function hasMemoryWritesSince(
122    messages: Message[],
123    sinceUuid: string | undefined,
124  ): boolean {
125    let foundStart = sinceUuid === undefined
126    for (const message of messages) {
127      if (!foundStart) {
128        if (message.uuid === sinceUuid) {
129          foundStart = true
130        }
131        continue
132      }
133      if (message.type !== 'assistant') {
134        continue
135      }
136      const content = (message as AssistantMessage).message.content
137      if (!Array.isArray(content)) {
138        continue
139      }
140      for (const block of content) {
141        const filePath = getWrittenFilePath(block)
142        if (filePath !== undefined && isAutoMemPath(filePath)) {
143          return true
144        }
145      }
146    }
147    return false
148  }
149  
150  // ============================================================================
151  // Tool Permissions
152  // ============================================================================
153  
154  function denyAutoMemTool(tool: Tool, reason: string) {
155    logForDebugging(`[autoMem] denied ${tool.name}: ${reason}`)
156    logEvent('tengu_auto_mem_tool_denied', {
157      tool_name: sanitizeToolNameForAnalytics(tool.name),
158    })
159    return {
160      behavior: 'deny' as const,
161      message: reason,
162      decisionReason: { type: 'other' as const, reason },
163    }
164  }
165  
166  /**
167   * Creates a canUseTool function that allows Read/Grep/Glob (unrestricted),
168   * read-only Bash commands, and Edit/Write only for paths within the
169   * auto-memory directory. Shared by extractMemories and autoDream.
170   */
171  export function createAutoMemCanUseTool(memoryDir: string): CanUseToolFn {
172    return async (tool: Tool, input: Record<string, unknown>) => {
173      // Allow REPL — when REPL mode is enabled (ant-default), primitive tools
174      // are hidden from the tool list so the forked agent calls REPL instead.
175      // REPL's VM context re-invokes this canUseTool for each inner primitive
176      // (toolWrappers.ts createToolWrapper), so the Read/Bash/Edit/Write checks
177      // below still gate the actual file and shell operations. Giving the fork a
178      // different tool list would break prompt cache sharing (tools are part of
179      // the cache key — see CacheSafeParams in forkedAgent.ts).
180      if (tool.name === REPL_TOOL_NAME) {
181        return { behavior: 'allow' as const, updatedInput: input }
182      }
183  
184      // Allow Read/Grep/Glob unrestricted — all inherently read-only
185      if (
186        tool.name === FILE_READ_TOOL_NAME ||
187        tool.name === GREP_TOOL_NAME ||
188        tool.name === GLOB_TOOL_NAME
189      ) {
190        return { behavior: 'allow' as const, updatedInput: input }
191      }
192  
193      // Allow Bash only for commands that pass BashTool.isReadOnly.
194      // `tool` IS BashTool here — no static import needed.
195      if (tool.name === BASH_TOOL_NAME) {
196        const parsed = tool.inputSchema.safeParse(input)
197        if (parsed.success && tool.isReadOnly(parsed.data)) {
198          return { behavior: 'allow' as const, updatedInput: input }
199        }
200        return denyAutoMemTool(
201          tool,
202          'Only read-only shell commands are permitted in this context (ls, find, grep, cat, stat, wc, head, tail, and similar)',
203        )
204      }
205  
206      if (
207        (tool.name === FILE_EDIT_TOOL_NAME ||
208          tool.name === FILE_WRITE_TOOL_NAME) &&
209        'file_path' in input
210      ) {
211        const filePath = input.file_path
212        if (typeof filePath === 'string' && isAutoMemPath(filePath)) {
213          return { behavior: 'allow' as const, updatedInput: input }
214        }
215      }
216  
217      return denyAutoMemTool(
218        tool,
219        `only ${FILE_READ_TOOL_NAME}, ${GREP_TOOL_NAME}, ${GLOB_TOOL_NAME}, read-only ${BASH_TOOL_NAME}, and ${FILE_EDIT_TOOL_NAME}/${FILE_WRITE_TOOL_NAME} within ${memoryDir} are allowed`,
220      )
221    }
222  }
223  
224  // ============================================================================
225  // Extract file paths from agent output
226  // ============================================================================
227  
228  /**
229   * Extract file_path from a tool_use block's input, if present.
230   * Returns undefined when the block is not an Edit/Write tool use or has no file_path.
231   */
232  function getWrittenFilePath(block: {
233    type: string
234    name?: string
235    input?: unknown
236  }): string | undefined {
237    if (
238      block.type !== 'tool_use' ||
239      (block.name !== FILE_EDIT_TOOL_NAME && block.name !== FILE_WRITE_TOOL_NAME)
240    ) {
241      return undefined
242    }
243    const input = block.input
244    if (typeof input === 'object' && input !== null && 'file_path' in input) {
245      const fp = (input as { file_path: unknown }).file_path
246      return typeof fp === 'string' ? fp : undefined
247    }
248    return undefined
249  }
250  
251  function extractWrittenPaths(agentMessages: Message[]): string[] {
252    const paths: string[] = []
253    for (const message of agentMessages) {
254      if (message.type !== 'assistant') {
255        continue
256      }
257      const content = (message as AssistantMessage).message.content
258      if (!Array.isArray(content)) {
259        continue
260      }
261      for (const block of content) {
262        const filePath = getWrittenFilePath(block)
263        if (filePath !== undefined) {
264          paths.push(filePath)
265        }
266      }
267    }
268    return uniq(paths)
269  }
270  
271  // ============================================================================
272  // Initialization & Closure-scoped State
273  // ============================================================================
274  
275  type AppendSystemMessageFn = (
276    msg: Exclude<SystemMessage, SystemLocalCommandMessage>,
277  ) => void
278  
279  /** The active extractor function, set by initExtractMemories(). */
280  let extractor:
281    | ((
282        context: REPLHookContext,
283        appendSystemMessage?: AppendSystemMessageFn,
284      ) => Promise<void>)
285    | null = null
286  
287  /** The active drain function, set by initExtractMemories(). No-op until init. */
288  let drainer: (timeoutMs?: number) => Promise<void> = async () => {}
289  
290  /**
291   * Initialize the memory extraction system.
292   * Creates a fresh closure that captures all mutable state (cursor position,
293   * overlap guard, pending context). Call once at startup alongside
294   * initConfidenceRating/initPromptCoaching, or per-test in beforeEach.
295   */
296  export function initExtractMemories(): void {
297    // --- Closure-scoped mutable state ---
298  
299    /** Every promise handed out by the extractor that hasn't settled yet.
300     *  Coalesced calls that stash-and-return add fast-resolving promises
301     *  (harmless); the call that starts real work adds a promise covering the
302     *  full trailing-run chain via runExtraction's recursive finally. */
303    const inFlightExtractions = new Set<Promise<void>>()
304  
305    /** UUID of the last message processed — cursor so each run only
306     *  considers messages added since the previous extraction. */
307    let lastMemoryMessageUuid: string | undefined
308  
309    /** One-shot flag: once we log that the gate is disabled, don't repeat. */
310    let hasLoggedGateFailure = false
311  
312    /** True while runExtraction is executing — prevents overlapping runs. */
313    let inProgress = false
314  
315    /** Counts eligible turns since the last extraction run. Resets to 0 after each run. */
316    let turnsSinceLastExtraction = 0
317  
318    /** When a call arrives during an in-progress run, we stash the context here
319     *  and run one trailing extraction after the current one finishes. */
320    let pendingContext:
321      | {
322          context: REPLHookContext
323          appendSystemMessage?: AppendSystemMessageFn
324        }
325      | undefined
326  
327    // --- Inner extraction logic ---
328  
329    async function runExtraction({
330      context,
331      appendSystemMessage,
332      isTrailingRun,
333    }: {
334      context: REPLHookContext
335      appendSystemMessage?: AppendSystemMessageFn
336      isTrailingRun?: boolean
337    }): Promise<void> {
338      const { messages } = context
339      const memoryDir = getAutoMemPath()
340      const newMessageCount = countModelVisibleMessagesSince(
341        messages,
342        lastMemoryMessageUuid,
343      )
344  
345      // Mutual exclusion: when the main agent wrote memories, skip the
346      // forked agent and advance the cursor past this range so the next
347      // extraction only considers messages after the main agent's write.
348      if (hasMemoryWritesSince(messages, lastMemoryMessageUuid)) {
349        logForDebugging(
350          '[extractMemories] skipping — conversation already wrote to memory files',
351        )
352        const lastMessage = messages.at(-1)
353        if (lastMessage?.uuid) {
354          lastMemoryMessageUuid = lastMessage.uuid
355        }
356        logEvent('tengu_extract_memories_skipped_direct_write', {
357          message_count: newMessageCount,
358        })
359        return
360      }
361  
362      const teamMemoryEnabled = feature('TEAMMEM')
363        ? teamMemPaths!.isTeamMemoryEnabled()
364        : false
365  
366      const skipIndex = getFeatureValue_CACHED_MAY_BE_STALE(
367        'tengu_moth_copse',
368        false,
369      )
370  
371      const canUseTool = createAutoMemCanUseTool(memoryDir)
372      const cacheSafeParams = createCacheSafeParams(context)
373  
374      // Only run extraction every N eligible turns (tengu_bramble_lintel, default 1).
375      // Trailing extractions (from stashed contexts) skip this check since they
376      // process already-committed work that should not be throttled.
377      if (!isTrailingRun) {
378        turnsSinceLastExtraction++
379        if (
380          turnsSinceLastExtraction <
381          (getFeatureValue_CACHED_MAY_BE_STALE('tengu_bramble_lintel', null) ?? 1)
382        ) {
383          return
384        }
385      }
386      turnsSinceLastExtraction = 0
387  
388      inProgress = true
389      const startTime = Date.now()
390      try {
391        logForDebugging(
392          `[extractMemories] starting — ${newMessageCount} new messages, memoryDir=${memoryDir}`,
393        )
394  
395        // Pre-inject the memory directory manifest so the agent doesn't spend
396        // a turn on `ls`. Reuses findRelevantMemories' frontmatter scan.
397        // Placed after the throttle gate so skipped turns don't pay the scan cost.
398        const existingMemories = formatMemoryManifest(
399          await scanMemoryFiles(memoryDir, createAbortController().signal),
400        )
401  
402        const userPrompt =
403          feature('TEAMMEM') && teamMemoryEnabled
404            ? buildExtractCombinedPrompt(
405                newMessageCount,
406                existingMemories,
407                skipIndex,
408              )
409            : buildExtractAutoOnlyPrompt(
410                newMessageCount,
411                existingMemories,
412                skipIndex,
413              )
414  
415        const result = await runForkedAgent({
416          promptMessages: [createUserMessage({ content: userPrompt })],
417          cacheSafeParams,
418          canUseTool,
419          querySource: 'extract_memories',
420          forkLabel: 'extract_memories',
421          // The extractMemories subagent does not need to record to transcript.
422          // Doing so can create race conditions with the main thread.
423          skipTranscript: true,
424          // Well-behaved extractions complete in 2-4 turns (read → write).
425          // A hard cap prevents verification rabbit-holes from burning turns.
426          maxTurns: 5,
427        })
428  
429        // Advance the cursor only after a successful run. If the agent errors
430        // out (caught below), the cursor stays put so those messages are
431        // reconsidered on the next extraction.
432        const lastMessage = messages.at(-1)
433        if (lastMessage?.uuid) {
434          lastMemoryMessageUuid = lastMessage.uuid
435        }
436  
437        const writtenPaths = extractWrittenPaths(result.messages)
438        const turnCount = count(result.messages, m => m.type === 'assistant')
439  
440        const totalInput =
441          result.totalUsage.input_tokens +
442          result.totalUsage.cache_creation_input_tokens +
443          result.totalUsage.cache_read_input_tokens
444        const hitPct =
445          totalInput > 0
446            ? (
447                (result.totalUsage.cache_read_input_tokens / totalInput) *
448                100
449              ).toFixed(1)
450            : '0.0'
451        logForDebugging(
452          `[extractMemories] finished — ${writtenPaths.length} files written, cache: read=${result.totalUsage.cache_read_input_tokens} create=${result.totalUsage.cache_creation_input_tokens} input=${result.totalUsage.input_tokens} (${hitPct}% hit)`,
453        )
454  
455        if (writtenPaths.length > 0) {
456          logForDebugging(
457            `[extractMemories] memories saved: ${writtenPaths.join(', ')}`,
458          )
459        } else {
460          logForDebugging('[extractMemories] no memories saved this run')
461        }
462  
463        // Index file updates are mechanical — the agent touches MEMORY.md to add
464        // a topic link, but the user-visible "memory" is the topic file itself.
465        const memoryPaths = writtenPaths.filter(
466          p => basename(p) !== ENTRYPOINT_NAME,
467        )
468        const teamCount = feature('TEAMMEM')
469          ? count(memoryPaths, teamMemPaths!.isTeamMemPath)
470          : 0
471  
472        // Log extraction event with usage from the forked agent
473        logEvent('tengu_extract_memories_extraction', {
474          input_tokens: result.totalUsage.input_tokens,
475          output_tokens: result.totalUsage.output_tokens,
476          cache_read_input_tokens: result.totalUsage.cache_read_input_tokens,
477          cache_creation_input_tokens:
478            result.totalUsage.cache_creation_input_tokens,
479          message_count: newMessageCount,
480          turn_count: turnCount,
481          files_written: writtenPaths.length,
482          memories_saved: memoryPaths.length,
483          team_memories_saved: teamCount,
484          duration_ms: Date.now() - startTime,
485        })
486  
487        logForDebugging(
488          `[extractMemories] writtenPaths=${writtenPaths.length} memoryPaths=${memoryPaths.length} appendSystemMessage defined=${appendSystemMessage != null}`,
489        )
490        if (memoryPaths.length > 0) {
491          const msg = createMemorySavedMessage(memoryPaths)
492          if (feature('TEAMMEM')) {
493            msg.teamCount = teamCount
494          }
495          appendSystemMessage?.(msg)
496        }
497      } catch (error) {
498        // Extraction is best-effort — log but don't notify on error
499        logForDebugging(`[extractMemories] error: ${error}`)
500        logEvent('tengu_extract_memories_error', {
501          duration_ms: Date.now() - startTime,
502        })
503      } finally {
504        inProgress = false
505  
506        // If a call arrived while we were running, run a trailing extraction
507        // with the latest stashed context. The trailing run will compute its
508        // newMessageCount relative to the cursor we just advanced — so it only
509        // picks up messages added between the two calls, not the full history.
510        const trailing = pendingContext
511        pendingContext = undefined
512        if (trailing) {
513          logForDebugging(
514            '[extractMemories] running trailing extraction for stashed context',
515          )
516          await runExtraction({
517            context: trailing.context,
518            appendSystemMessage: trailing.appendSystemMessage,
519            isTrailingRun: true,
520          })
521        }
522      }
523    }
524  
525    // --- Public entry point (captured by extractor) ---
526  
527    async function executeExtractMemoriesImpl(
528      context: REPLHookContext,
529      appendSystemMessage?: AppendSystemMessageFn,
530    ): Promise<void> {
531      // Only run for the main agent, not subagents
532      if (context.toolUseContext.agentId) {
533        return
534      }
535  
536      if (!getFeatureValue_CACHED_MAY_BE_STALE('tengu_passport_quail', false)) {
537        if (process.env.USER_TYPE === 'ant' && !hasLoggedGateFailure) {
538          hasLoggedGateFailure = true
539          logEvent('tengu_extract_memories_gate_disabled', {})
540        }
541        return
542      }
543  
544      // Check auto-memory is enabled
545      if (!isAutoMemoryEnabled()) {
546        return
547      }
548  
549      // Skip in remote mode
550      if (getIsRemoteMode()) {
551        return
552      }
553  
554      // If an extraction is already in progress, stash this context for a
555      // trailing run (overwrites any previously stashed context — only the
556      // latest matters since it has the most messages).
557      if (inProgress) {
558        logForDebugging(
559          '[extractMemories] extraction in progress — stashing for trailing run',
560        )
561        logEvent('tengu_extract_memories_coalesced', {})
562        pendingContext = { context, appendSystemMessage }
563        return
564      }
565  
566      await runExtraction({ context, appendSystemMessage })
567    }
568  
569    extractor = async (context, appendSystemMessage) => {
570      const p = executeExtractMemoriesImpl(context, appendSystemMessage)
571      inFlightExtractions.add(p)
572      try {
573        await p
574      } finally {
575        inFlightExtractions.delete(p)
576      }
577    }
578  
579    drainer = async (timeoutMs = 60_000) => {
580      if (inFlightExtractions.size === 0) return
581      await Promise.race([
582        Promise.all(inFlightExtractions).catch(() => {}),
583        // eslint-disable-next-line no-restricted-syntax -- sleep() has no .unref(); timer must not block exit
584        new Promise<void>(r => setTimeout(r, timeoutMs).unref()),
585      ])
586    }
587  }
588  
589  // ============================================================================
590  // Public API
591  // ============================================================================
592  
593  /**
594   * Run memory extraction at the end of a query loop.
595   * Called fire-and-forget from handleStopHooks, alongside prompt suggestion/coaching.
596   * No-ops until initExtractMemories() has been called.
597   */
598  export async function executeExtractMemories(
599    context: REPLHookContext,
600    appendSystemMessage?: AppendSystemMessageFn,
601  ): Promise<void> {
602    await extractor?.(context, appendSystemMessage)
603  }
604  
605  /**
606   * Awaits all in-flight extractions (including trailing stashed runs) with a
607   * soft timeout. Called by print.ts after the response is flushed but before
608   * gracefulShutdownSync, so the forked agent completes before the 5s shutdown
609   * failsafe kills it. No-op until initExtractMemories() has been called.
610   */
611  export async function drainPendingExtraction(
612    timeoutMs?: number,
613  ): Promise<void> {
614    await drainer(timeoutMs)
615  }