/ memdir / memdir.ts
memdir.ts
  1  import { feature } from 'bun:bundle'
  2  import { join } from 'path'
  3  import { getFsImplementation } from '../utils/fsOperations.js'
  4  import { getAutoMemPath, isAutoMemoryEnabled } from './paths.js'
  5  
  6  /* eslint-disable @typescript-eslint/no-require-imports */
  7  const teamMemPaths = feature('TEAMMEM')
  8    ? (require('./teamMemPaths.js') as typeof import('./teamMemPaths.js'))
  9    : null
 10  
 11  import { getKairosActive, getOriginalCwd } from '../bootstrap/state.js'
 12  import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
 13  /* eslint-enable @typescript-eslint/no-require-imports */
 14  import {
 15    type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 16    logEvent,
 17  } from '../services/analytics/index.js'
 18  import { GREP_TOOL_NAME } from '../tools/GrepTool/prompt.js'
 19  import { isReplModeEnabled } from '../tools/REPLTool/constants.js'
 20  import { logForDebugging } from '../utils/debug.js'
 21  import { hasEmbeddedSearchTools } from '../utils/embeddedTools.js'
 22  import { isEnvTruthy } from '../utils/envUtils.js'
 23  import { formatFileSize } from '../utils/format.js'
 24  import { getProjectDir } from '../utils/sessionStorage.js'
 25  import { getInitialSettings } from '../utils/settings/settings.js'
 26  import {
 27    MEMORY_FRONTMATTER_EXAMPLE,
 28    TRUSTING_RECALL_SECTION,
 29    TYPES_SECTION_INDIVIDUAL,
 30    WHAT_NOT_TO_SAVE_SECTION,
 31    WHEN_TO_ACCESS_SECTION,
 32  } from './memoryTypes.js'
 33  
 34  export const ENTRYPOINT_NAME = 'MEMORY.md'
 35  export const MAX_ENTRYPOINT_LINES = 200
 36  // ~125 chars/line at 200 lines. At p97 today; catches long-line indexes that
 37  // slip past the line cap (p100 observed: 197KB under 200 lines).
 38  export const MAX_ENTRYPOINT_BYTES = 25_000
 39  const AUTO_MEM_DISPLAY_NAME = 'auto memory'
 40  
 41  export type EntrypointTruncation = {
 42    content: string
 43    lineCount: number
 44    byteCount: number
 45    wasLineTruncated: boolean
 46    wasByteTruncated: boolean
 47  }
 48  
 49  /**
 50   * Truncate MEMORY.md content to the line AND byte caps, appending a warning
 51   * that names which cap fired. Line-truncates first (natural boundary), then
 52   * byte-truncates at the last newline before the cap so we don't cut mid-line.
 53   *
 54   * Shared by buildMemoryPrompt and claudemd getMemoryFiles (previously
 55   * duplicated the line-only logic).
 56   */
 57  export function truncateEntrypointContent(raw: string): EntrypointTruncation {
 58    const trimmed = raw.trim()
 59    const contentLines = trimmed.split('\n')
 60    const lineCount = contentLines.length
 61    const byteCount = trimmed.length
 62  
 63    const wasLineTruncated = lineCount > MAX_ENTRYPOINT_LINES
 64    // Check original byte count — long lines are the failure mode the byte cap
 65    // targets, so post-line-truncation size would understate the warning.
 66    const wasByteTruncated = byteCount > MAX_ENTRYPOINT_BYTES
 67  
 68    if (!wasLineTruncated && !wasByteTruncated) {
 69      return {
 70        content: trimmed,
 71        lineCount,
 72        byteCount,
 73        wasLineTruncated,
 74        wasByteTruncated,
 75      }
 76    }
 77  
 78    let truncated = wasLineTruncated
 79      ? contentLines.slice(0, MAX_ENTRYPOINT_LINES).join('\n')
 80      : trimmed
 81  
 82    if (truncated.length > MAX_ENTRYPOINT_BYTES) {
 83      const cutAt = truncated.lastIndexOf('\n', MAX_ENTRYPOINT_BYTES)
 84      truncated = truncated.slice(0, cutAt > 0 ? cutAt : MAX_ENTRYPOINT_BYTES)
 85    }
 86  
 87    const reason =
 88      wasByteTruncated && !wasLineTruncated
 89        ? `${formatFileSize(byteCount)} (limit: ${formatFileSize(MAX_ENTRYPOINT_BYTES)}) — index entries are too long`
 90        : wasLineTruncated && !wasByteTruncated
 91          ? `${lineCount} lines (limit: ${MAX_ENTRYPOINT_LINES})`
 92          : `${lineCount} lines and ${formatFileSize(byteCount)}`
 93  
 94    return {
 95      content:
 96        truncated +
 97        `\n\n> WARNING: ${ENTRYPOINT_NAME} is ${reason}. Only part of it was loaded. Keep index entries to one line under ~200 chars; move detail into topic files.`,
 98      lineCount,
 99      byteCount,
100      wasLineTruncated,
101      wasByteTruncated,
102    }
103  }
104  
105  /* eslint-disable @typescript-eslint/no-require-imports */
106  const teamMemPrompts = feature('TEAMMEM')
107    ? (require('./teamMemPrompts.js') as typeof import('./teamMemPrompts.js'))
108    : null
109  /* eslint-enable @typescript-eslint/no-require-imports */
110  
111  /**
112   * Shared guidance text appended to each memory directory prompt line.
113   * Shipped because Claude was burning turns on `ls`/`mkdir -p` before writing.
114   * Harness guarantees the directory exists via ensureMemoryDirExists().
115   */
116  export const DIR_EXISTS_GUIDANCE =
117    'This directory already exists — write to it directly with the Write tool (do not run mkdir or check for its existence).'
118  export const DIRS_EXIST_GUIDANCE =
119    'Both directories already exist — write to them directly with the Write tool (do not run mkdir or check for their existence).'
120  
121  /**
122   * Ensure a memory directory exists. Idempotent — called from loadMemoryPrompt
123   * (once per session via systemPromptSection cache) so the model can always
124   * write without checking existence first. FsOperations.mkdir is recursive
125   * by default and already swallows EEXIST, so the full parent chain
126   * (~/.claude/projects/<slug>/memory/) is created in one call with no
127   * try/catch needed for the happy path.
128   */
129  export async function ensureMemoryDirExists(memoryDir: string): Promise<void> {
130    const fs = getFsImplementation()
131    try {
132      await fs.mkdir(memoryDir)
133    } catch (e) {
134      // fs.mkdir already handles EEXIST internally. Anything reaching here is
135      // a real problem (EACCES/EPERM/EROFS) — log so --debug shows why. Prompt
136      // building continues either way; the model's Write will surface the
137      // real perm error (and FileWriteTool does its own mkdir of the parent).
138      const code =
139        e instanceof Error && 'code' in e && typeof e.code === 'string'
140          ? e.code
141          : undefined
142      logForDebugging(
143        `ensureMemoryDirExists failed for ${memoryDir}: ${code ?? String(e)}`,
144        { level: 'debug' },
145      )
146    }
147  }
148  
149  /**
150   * Log memory directory file/subdir counts asynchronously.
151   * Fire-and-forget — doesn't block prompt building.
152   */
153  function logMemoryDirCounts(
154    memoryDir: string,
155    baseMetadata: Record<
156      string,
157      | number
158      | boolean
159      | AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
160    >,
161  ): void {
162    const fs = getFsImplementation()
163    void fs.readdir(memoryDir).then(
164      dirents => {
165        let fileCount = 0
166        let subdirCount = 0
167        for (const d of dirents) {
168          if (d.isFile()) {
169            fileCount++
170          } else if (d.isDirectory()) {
171            subdirCount++
172          }
173        }
174        logEvent('tengu_memdir_loaded', {
175          ...baseMetadata,
176          total_file_count: fileCount,
177          total_subdir_count: subdirCount,
178        })
179      },
180      () => {
181        // Directory unreadable — log without counts
182        logEvent('tengu_memdir_loaded', baseMetadata)
183      },
184    )
185  }
186  
187  /**
188   * Build the typed-memory behavioral instructions (without MEMORY.md content).
189   * Constrains memories to a closed four-type taxonomy (user / feedback / project /
190   * reference) — content that is derivable from the current project state (code
191   * patterns, architecture, git history) is explicitly excluded.
192   *
193   * Individual-only variant: no `## Memory scope` section, no <scope> tags
194   * in type blocks, and team/private qualifiers stripped from examples.
195   *
196   * Used by both buildMemoryPrompt (agent memory, includes content) and
197   * loadMemoryPrompt (system prompt, content injected via user context instead).
198   */
199  export function buildMemoryLines(
200    displayName: string,
201    memoryDir: string,
202    extraGuidelines?: string[],
203    skipIndex = false,
204  ): string[] {
205    const howToSave = skipIndex
206      ? [
207          '## How to save memories',
208          '',
209          'Write each memory to its own file (e.g., `user_role.md`, `feedback_testing.md`) using this frontmatter format:',
210          '',
211          ...MEMORY_FRONTMATTER_EXAMPLE,
212          '',
213          '- Keep the name, description, and type fields in memory files up-to-date with the content',
214          '- Organize memory semantically by topic, not chronologically',
215          '- Update or remove memories that turn out to be wrong or outdated',
216          '- Do not write duplicate memories. First check if there is an existing memory you can update before writing a new one.',
217        ]
218      : [
219          '## How to save memories',
220          '',
221          'Saving a memory is a two-step process:',
222          '',
223          '**Step 1** — write the memory to its own file (e.g., `user_role.md`, `feedback_testing.md`) using this frontmatter format:',
224          '',
225          ...MEMORY_FRONTMATTER_EXAMPLE,
226          '',
227          `**Step 2** — add a pointer to that file in \`${ENTRYPOINT_NAME}\`. \`${ENTRYPOINT_NAME}\` is an index, not a memory — each entry should be one line, under ~150 characters: \`- [Title](file.md) — one-line hook\`. It has no frontmatter. Never write memory content directly into \`${ENTRYPOINT_NAME}\`.`,
228          '',
229          `- \`${ENTRYPOINT_NAME}\` is always loaded into your conversation context — lines after ${MAX_ENTRYPOINT_LINES} will be truncated, so keep the index concise`,
230          '- Keep the name, description, and type fields in memory files up-to-date with the content',
231          '- Organize memory semantically by topic, not chronologically',
232          '- Update or remove memories that turn out to be wrong or outdated',
233          '- Do not write duplicate memories. First check if there is an existing memory you can update before writing a new one.',
234        ]
235  
236    const lines: string[] = [
237      `# ${displayName}`,
238      '',
239      `You have a persistent, file-based memory system at \`${memoryDir}\`. ${DIR_EXISTS_GUIDANCE}`,
240      '',
241      "You should build up this memory system over time so that future conversations can have a complete picture of who the user is, how they'd like to collaborate with you, what behaviors to avoid or repeat, and the context behind the work the user gives you.",
242      '',
243      'If the user explicitly asks you to remember something, save it immediately as whichever type fits best. If they ask you to forget something, find and remove the relevant entry.',
244      '',
245      ...TYPES_SECTION_INDIVIDUAL,
246      ...WHAT_NOT_TO_SAVE_SECTION,
247      '',
248      ...howToSave,
249      '',
250      ...WHEN_TO_ACCESS_SECTION,
251      '',
252      ...TRUSTING_RECALL_SECTION,
253      '',
254      '## Memory and other forms of persistence',
255      'Memory is one of several persistence mechanisms available to you as you assist the user in a given conversation. The distinction is often that memory can be recalled in future conversations and should not be used for persisting information that is only useful within the scope of the current conversation.',
256      '- When to use or update a plan instead of memory: If you are about to start a non-trivial implementation task and would like to reach alignment with the user on your approach you should use a Plan rather than saving this information to memory. Similarly, if you already have a plan within the conversation and you have changed your approach persist that change by updating the plan rather than saving a memory.',
257      '- When to use or update tasks instead of memory: When you need to break your work in current conversation into discrete steps or keep track of your progress use tasks instead of saving to memory. Tasks are great for persisting information about the work that needs to be done in the current conversation, but memory should be reserved for information that will be useful in future conversations.',
258      '',
259      ...(extraGuidelines ?? []),
260      '',
261    ]
262  
263    lines.push(...buildSearchingPastContextSection(memoryDir))
264  
265    return lines
266  }
267  
268  /**
269   * Build the typed-memory prompt with MEMORY.md content included.
270   * Used by agent memory (which has no getClaudeMds() equivalent).
271   */
272  export function buildMemoryPrompt(params: {
273    displayName: string
274    memoryDir: string
275    extraGuidelines?: string[]
276  }): string {
277    const { displayName, memoryDir, extraGuidelines } = params
278    const fs = getFsImplementation()
279    const entrypoint = memoryDir + ENTRYPOINT_NAME
280  
281    // Directory creation is the caller's responsibility (loadMemoryPrompt /
282    // loadAgentMemoryPrompt). Builders only read, they don't mkdir.
283  
284    // Read existing memory entrypoint (sync: prompt building is synchronous)
285    let entrypointContent = ''
286    try {
287      // eslint-disable-next-line custom-rules/no-sync-fs
288      entrypointContent = fs.readFileSync(entrypoint, { encoding: 'utf-8' })
289    } catch {
290      // No memory file yet
291    }
292  
293    const lines = buildMemoryLines(displayName, memoryDir, extraGuidelines)
294  
295    if (entrypointContent.trim()) {
296      const t = truncateEntrypointContent(entrypointContent)
297      const memoryType = displayName === AUTO_MEM_DISPLAY_NAME ? 'auto' : 'agent'
298      logMemoryDirCounts(memoryDir, {
299        content_length: t.byteCount,
300        line_count: t.lineCount,
301        was_truncated: t.wasLineTruncated,
302        was_byte_truncated: t.wasByteTruncated,
303        memory_type:
304          memoryType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
305      })
306      lines.push(`## ${ENTRYPOINT_NAME}`, '', t.content)
307    } else {
308      lines.push(
309        `## ${ENTRYPOINT_NAME}`,
310        '',
311        `Your ${ENTRYPOINT_NAME} is currently empty. When you save new memories, they will appear here.`,
312      )
313    }
314  
315    return lines.join('\n')
316  }
317  
318  /**
319   * Assistant-mode daily-log prompt. Gated behind feature('KAIROS').
320   *
321   * Assistant sessions are effectively perpetual, so the agent writes memories
322   * append-only to a date-named log file rather than maintaining MEMORY.md as
323   * a live index. A separate nightly /dream skill distills logs into topic
324   * files + MEMORY.md. MEMORY.md is still loaded into context (via claudemd.ts)
325   * as the distilled index — this prompt only changes where NEW memories go.
326   */
327  function buildAssistantDailyLogPrompt(skipIndex = false): string {
328    const memoryDir = getAutoMemPath()
329    // Describe the path as a pattern rather than inlining today's literal path:
330    // this prompt is cached by systemPromptSection('memory', ...) and NOT
331    // invalidated on date change. The model derives the current date from the
332    // date_change attachment (appended at the tail on midnight rollover) rather
333    // than the user-context message — the latter is intentionally left stale to
334    // preserve the prompt cache prefix across midnight.
335    const logPathPattern = join(memoryDir, 'logs', 'YYYY', 'MM', 'YYYY-MM-DD.md')
336  
337    const lines: string[] = [
338      '# auto memory',
339      '',
340      `You have a persistent, file-based memory system found at: \`${memoryDir}\``,
341      '',
342      "This session is long-lived. As you work, record anything worth remembering by **appending** to today's daily log file:",
343      '',
344      `\`${logPathPattern}\``,
345      '',
346      "Substitute today's date (from `currentDate` in your context) for `YYYY-MM-DD`. When the date rolls over mid-session, start appending to the new day's file.",
347      '',
348      'Write each entry as a short timestamped bullet. Create the file (and parent directories) on first write if it does not exist. Do not rewrite or reorganize the log — it is append-only. A separate nightly process distills these logs into `MEMORY.md` and topic files.',
349      '',
350      '## What to log',
351      '- User corrections and preferences ("use bun, not npm"; "stop summarizing diffs")',
352      '- Facts about the user, their role, or their goals',
353      '- Project context that is not derivable from the code (deadlines, incidents, decisions and their rationale)',
354      '- Pointers to external systems (dashboards, Linear projects, Slack channels)',
355      '- Anything the user explicitly asks you to remember',
356      '',
357      ...WHAT_NOT_TO_SAVE_SECTION,
358      '',
359      ...(skipIndex
360        ? []
361        : [
362            `## ${ENTRYPOINT_NAME}`,
363            `\`${ENTRYPOINT_NAME}\` is the distilled index (maintained nightly from your logs) and is loaded into your context automatically. Read it for orientation, but do not edit it directly — record new information in today's log instead.`,
364            '',
365          ]),
366      ...buildSearchingPastContextSection(memoryDir),
367    ]
368  
369    return lines.join('\n')
370  }
371  
372  /**
373   * Build the "Searching past context" section if the feature gate is enabled.
374   */
375  export function buildSearchingPastContextSection(autoMemDir: string): string[] {
376    if (!getFeatureValue_CACHED_MAY_BE_STALE('tengu_coral_fern', false)) {
377      return []
378    }
379    const projectDir = getProjectDir(getOriginalCwd())
380    // Ant-native builds alias grep to embedded ugrep and remove the dedicated
381    // Grep tool, so give the model a real shell invocation there.
382    // In REPL mode, both Grep and Bash are hidden from direct use — the model
383    // calls them from inside REPL scripts, so the grep shell form is what it
384    // will write in the script anyway.
385    const embedded = hasEmbeddedSearchTools() || isReplModeEnabled()
386    const memSearch = embedded
387      ? `grep -rn "<search term>" ${autoMemDir} --include="*.md"`
388      : `${GREP_TOOL_NAME} with pattern="<search term>" path="${autoMemDir}" glob="*.md"`
389    const transcriptSearch = embedded
390      ? `grep -rn "<search term>" ${projectDir}/ --include="*.jsonl"`
391      : `${GREP_TOOL_NAME} with pattern="<search term>" path="${projectDir}/" glob="*.jsonl"`
392    return [
393      '## Searching past context',
394      '',
395      'When looking for past context:',
396      '1. Search topic files in your memory directory:',
397      '```',
398      memSearch,
399      '```',
400      '2. Session transcript logs (last resort — large files, slow):',
401      '```',
402      transcriptSearch,
403      '```',
404      'Use narrow search terms (error messages, file paths, function names) rather than broad keywords.',
405      '',
406    ]
407  }
408  
409  /**
410   * Load the unified memory prompt for inclusion in the system prompt.
411   * Dispatches based on which memory systems are enabled:
412   *   - auto + team: combined prompt (both directories)
413   *   - auto only: memory lines (single directory)
414   * Team memory requires auto memory (enforced by isTeamMemoryEnabled), so
415   * there is no team-only branch.
416   *
417   * Returns null when auto memory is disabled.
418   */
419  export async function loadMemoryPrompt(): Promise<string | null> {
420    const autoEnabled = isAutoMemoryEnabled()
421  
422    const skipIndex = getFeatureValue_CACHED_MAY_BE_STALE(
423      'tengu_moth_copse',
424      false,
425    )
426  
427    // KAIROS daily-log mode takes precedence over TEAMMEM: the append-only
428    // log paradigm does not compose with team sync (which expects a shared
429    // MEMORY.md that both sides read + write). Gating on `autoEnabled` here
430    // means the !autoEnabled case falls through to the tengu_memdir_disabled
431    // telemetry block below, matching the non-KAIROS path.
432    if (feature('KAIROS') && autoEnabled && getKairosActive()) {
433      logMemoryDirCounts(getAutoMemPath(), {
434        memory_type:
435          'auto' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
436      })
437      return buildAssistantDailyLogPrompt(skipIndex)
438    }
439  
440    // Cowork injects memory-policy text via env var; thread into all builders.
441    const coworkExtraGuidelines =
442      process.env.CLAUDE_COWORK_MEMORY_EXTRA_GUIDELINES
443    const extraGuidelines =
444      coworkExtraGuidelines && coworkExtraGuidelines.trim().length > 0
445        ? [coworkExtraGuidelines]
446        : undefined
447  
448    if (feature('TEAMMEM')) {
449      if (teamMemPaths!.isTeamMemoryEnabled()) {
450        const autoDir = getAutoMemPath()
451        const teamDir = teamMemPaths!.getTeamMemPath()
452        // Harness guarantees these directories exist so the model can write
453        // without checking. The prompt text reflects this ("already exists").
454        // Only creating teamDir is sufficient: getTeamMemPath() is defined as
455        // join(getAutoMemPath(), 'team'), so recursive mkdir of the team dir
456        // creates the auto dir as a side effect. If the team dir ever moves
457        // out from under the auto dir, add a second ensureMemoryDirExists call
458        // for autoDir here.
459        await ensureMemoryDirExists(teamDir)
460        logMemoryDirCounts(autoDir, {
461          memory_type:
462            'auto' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
463        })
464        logMemoryDirCounts(teamDir, {
465          memory_type:
466            'team' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
467        })
468        return teamMemPrompts!.buildCombinedMemoryPrompt(
469          extraGuidelines,
470          skipIndex,
471        )
472      }
473    }
474  
475    if (autoEnabled) {
476      const autoDir = getAutoMemPath()
477      // Harness guarantees the directory exists so the model can write without
478      // checking. The prompt text reflects this ("already exists").
479      await ensureMemoryDirExists(autoDir)
480      logMemoryDirCounts(autoDir, {
481        memory_type:
482          'auto' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
483      })
484      return buildMemoryLines(
485        'auto memory',
486        autoDir,
487        extraGuidelines,
488        skipIndex,
489      ).join('\n')
490    }
491  
492    logEvent('tengu_memdir_disabled', {
493      disabled_by_env_var: isEnvTruthy(
494        process.env.CLAUDE_CODE_DISABLE_AUTO_MEMORY,
495      ),
496      disabled_by_setting:
497        !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_AUTO_MEMORY) &&
498        getInitialSettings().autoMemoryEnabled === false,
499    })
500    // Gate on the GB flag directly, not isTeamMemoryEnabled() — that function
501    // checks isAutoMemoryEnabled() first, which is definitionally false in this
502    // branch. We want "was this user in the team-memory cohort at all."
503    if (getFeatureValue_CACHED_MAY_BE_STALE('tengu_herring_clock', false)) {
504      logEvent('tengu_team_memdir_disabled', {})
505    }
506    return null
507  }