/ src / memdir / memoryScan.ts
memoryScan.ts
 1  /**
 2   * Memory-directory scanning primitives. Split out of findRelevantMemories.ts
 3   * so extractMemories can import the scan without pulling in sideQuery and
 4   * the API-client chain (which closed a cycle through memdir.ts — #25372).
 5   */
 6  
 7  import { readdir } from 'fs/promises'
 8  import { basename, join } from 'path'
 9  import { parseFrontmatter } from '../utils/frontmatterParser.js'
10  import { readFileInRange } from '../utils/readFileInRange.js'
11  import { type MemoryType, parseMemoryType } from './memoryTypes.js'
12  
13  export type MemoryHeader = {
14    filename: string
15    filePath: string
16    mtimeMs: number
17    description: string | null
18    type: MemoryType | undefined
19  }
20  
21  const MAX_MEMORY_FILES = 200
22  const FRONTMATTER_MAX_LINES = 30
23  
24  /**
25   * Scan a memory directory for .md files, read their frontmatter, and return
26   * a header list sorted newest-first (capped at MAX_MEMORY_FILES). Shared by
27   * findRelevantMemories (query-time recall) and extractMemories (pre-injects
28   * the listing so the extraction agent doesn't spend a turn on `ls`).
29   *
30   * Single-pass: readFileInRange stats internally and returns mtimeMs, so we
31   * read-then-sort rather than stat-sort-read. For the common case (N ≤ 200)
32   * this halves syscalls vs a separate stat round; for large N we read a few
33   * extra small files but still avoid the double-stat on the surviving 200.
34   */
35  export async function scanMemoryFiles(
36    memoryDir: string,
37    signal: AbortSignal,
38  ): Promise<MemoryHeader[]> {
39    try {
40      const entries = await readdir(memoryDir, { recursive: true })
41      const mdFiles = entries.filter(
42        f => f.endsWith('.md') && basename(f) !== 'MEMORY.md',
43      )
44  
45      const headerResults = await Promise.allSettled(
46        mdFiles.map(async (relativePath): Promise<MemoryHeader> => {
47          const filePath = join(memoryDir, relativePath)
48          const { content, mtimeMs } = await readFileInRange(
49            filePath,
50            0,
51            FRONTMATTER_MAX_LINES,
52            undefined,
53            signal,
54          )
55          const { frontmatter } = parseFrontmatter(content, filePath)
56          return {
57            filename: relativePath,
58            filePath,
59            mtimeMs,
60            description: frontmatter.description || null,
61            type: parseMemoryType(frontmatter.type),
62          }
63        }),
64      )
65  
66      return headerResults
67        .filter(
68          (r): r is PromiseFulfilledResult<MemoryHeader> =>
69            r.status === 'fulfilled',
70        )
71        .map(r => r.value)
72        .sort((a, b) => b.mtimeMs - a.mtimeMs)
73        .slice(0, MAX_MEMORY_FILES)
74    } catch {
75      return []
76    }
77  }
78  
79  /**
80   * Format memory headers as a text manifest: one line per file with
81   * [type] filename (timestamp): description. Used by both the recall
82   * selector prompt and the extraction-agent prompt.
83   */
84  export function formatMemoryManifest(memories: MemoryHeader[]): string {
85    return memories
86      .map(m => {
87        const tag = m.type ? `[${m.type}] ` : ''
88        const ts = new Date(m.mtimeMs).toISOString()
89        return m.description
90          ? `- ${tag}${m.filename} (${ts}): ${m.description}`
91          : `- ${tag}${m.filename} (${ts})`
92      })
93      .join('\n')
94  }