/ utils / memoryFileDetection.ts
memoryFileDetection.ts
  1  import { feature } from 'bun:bundle'
  2  import { normalize, posix, win32 } from 'path'
  3  import {
  4    getAutoMemPath,
  5    getMemoryBaseDir,
  6    isAutoMemoryEnabled,
  7    isAutoMemPath,
  8  } from '../memdir/paths.js'
  9  import { isAgentMemoryPath } from '../tools/AgentTool/agentMemory.js'
 10  import { getClaudeConfigHomeDir } from './envUtils.js'
 11  import {
 12    posixPathToWindowsPath,
 13    windowsPathToPosixPath,
 14  } from './windowsPaths.js'
 15  
 16  /* eslint-disable @typescript-eslint/no-require-imports */
 17  const teamMemPaths = feature('TEAMMEM')
 18    ? (require('../memdir/teamMemPaths.js') as typeof import('../memdir/teamMemPaths.js'))
 19    : null
 20  /* eslint-enable @typescript-eslint/no-require-imports */
 21  
 22  const IS_WINDOWS = process.platform === 'win32'
 23  
 24  // Normalize path separators to posix (/). Does NOT translate drive encoding.
 25  function toPosix(p: string): string {
 26    return p.split(win32.sep).join(posix.sep)
 27  }
 28  
 29  // Convert a path to a stable string-comparable form: forward-slash separated,
 30  // and on Windows, lowercased (Windows filesystems are case-insensitive).
 31  function toComparable(p: string): string {
 32    const posixForm = toPosix(p)
 33    return IS_WINDOWS ? posixForm.toLowerCase() : posixForm
 34  }
 35  
 36  /**
 37   * Detects if a file path is a session-related file under ~/.claude.
 38   * Returns the type of session file or null if not a session file.
 39   */
 40  export function detectSessionFileType(
 41    filePath: string,
 42  ): 'session_memory' | 'session_transcript' | null {
 43    const configDir = getClaudeConfigHomeDir()
 44    // Compare in forward-slash form; on Windows also case-fold. The caller
 45    // (isShellCommandTargetingMemory) converts MinGW /c/... → native before
 46    // reaching here, so we only need separator + case normalization.
 47    const normalized = toComparable(filePath)
 48    const configDirCmp = toComparable(configDir)
 49    if (!normalized.startsWith(configDirCmp)) {
 50      return null
 51    }
 52    if (normalized.includes('/session-memory/') && normalized.endsWith('.md')) {
 53      return 'session_memory'
 54    }
 55    if (normalized.includes('/projects/') && normalized.endsWith('.jsonl')) {
 56      return 'session_transcript'
 57    }
 58    return null
 59  }
 60  
 61  /**
 62   * Checks if a glob/pattern string indicates session file access intent.
 63   * Used for Grep/Glob tools where we check patterns, not actual file paths.
 64   */
 65  export function detectSessionPatternType(
 66    pattern: string,
 67  ): 'session_memory' | 'session_transcript' | null {
 68    const normalized = pattern.split(win32.sep).join(posix.sep)
 69    if (
 70      normalized.includes('session-memory') &&
 71      (normalized.includes('.md') || normalized.endsWith('*'))
 72    ) {
 73      return 'session_memory'
 74    }
 75    if (
 76      normalized.includes('.jsonl') ||
 77      (normalized.includes('projects') && normalized.includes('*.jsonl'))
 78    ) {
 79      return 'session_transcript'
 80    }
 81    return null
 82  }
 83  
 84  /**
 85   * Check if a file path is within the memdir directory.
 86   */
 87  export function isAutoMemFile(filePath: string): boolean {
 88    if (isAutoMemoryEnabled()) {
 89      return isAutoMemPath(filePath)
 90    }
 91    return false
 92  }
 93  
 94  export type MemoryScope = 'personal' | 'team'
 95  
 96  /**
 97   * Determine which memory store (if any) a path belongs to.
 98   *
 99   * Team dir is a subdirectory of memdir (getTeamMemPath = join(getAutoMemPath, 'team')),
100   * so a team path matches both isTeamMemFile and isAutoMemFile. Check team first.
101   *
102   * Use this for scope-keyed telemetry where a single event name distinguishes
103   * by scope field — the existing tengu_memdir_* / tengu_team_mem_* event-name
104   * hierarchy handles the overlap differently (team writes intentionally fire both).
105   */
106  export function memoryScopeForPath(filePath: string): MemoryScope | null {
107    if (feature('TEAMMEM') && teamMemPaths!.isTeamMemFile(filePath)) {
108      return 'team'
109    }
110    if (isAutoMemFile(filePath)) {
111      return 'personal'
112    }
113    return null
114  }
115  
116  /**
117   * Check if a file path is within an agent memory directory.
118   */
119  function isAgentMemFile(filePath: string): boolean {
120    if (isAutoMemoryEnabled()) {
121      return isAgentMemoryPath(filePath)
122    }
123    return false
124  }
125  
126  /**
127   * Check if a file is a Claude-managed memory file (NOT user-managed instruction files).
128   * Includes: auto-memory (memdir), agent memory, session memory/transcripts.
129   * Excludes: CLAUDE.md, CLAUDE.local.md, .claude/rules/*.md (user-managed).
130   *
131   * Use this for collapse/badge logic where user-managed files should show full diffs.
132   */
133  export function isAutoManagedMemoryFile(filePath: string): boolean {
134    if (isAutoMemFile(filePath)) {
135      return true
136    }
137    if (feature('TEAMMEM') && teamMemPaths!.isTeamMemFile(filePath)) {
138      return true
139    }
140    if (detectSessionFileType(filePath) !== null) {
141      return true
142    }
143    if (isAgentMemFile(filePath)) {
144      return true
145    }
146    return false
147  }
148  
149  // Check if a directory path is a memory-related directory.
150  // Used by Grep/Glob which take a directory `path` rather than a specific file.
151  // Checks both configDir and memoryBaseDir to handle custom memory dir paths.
152  export function isMemoryDirectory(dirPath: string): boolean {
153    // SECURITY: Normalize to prevent path traversal bypasses via .. segments.
154    // On Windows this produces backslashes; toComparable flips them back for
155    // string matching. MinGW /c/... paths are converted to native before
156    // reaching here (extraction-time in isShellCommandTargetingMemory), so
157    // normalize() never sees them.
158    const normalizedPath = normalize(dirPath)
159    const normalizedCmp = toComparable(normalizedPath)
160    // Agent memory directories can be under cwd (project scope), configDir, or memoryBaseDir
161    if (
162      isAutoMemoryEnabled() &&
163      (normalizedCmp.includes('/agent-memory/') ||
164        normalizedCmp.includes('/agent-memory-local/'))
165    ) {
166      return true
167    }
168    // Team memory directories live under <autoMemPath>/team/
169    if (
170      feature('TEAMMEM') &&
171      teamMemPaths!.isTeamMemoryEnabled() &&
172      teamMemPaths!.isTeamMemPath(normalizedPath)
173    ) {
174      return true
175    }
176    // Check the auto-memory path override (CLAUDE_COWORK_MEMORY_PATH_OVERRIDE)
177    if (isAutoMemoryEnabled()) {
178      const autoMemPath = getAutoMemPath()
179      const autoMemDirCmp = toComparable(autoMemPath.replace(/[/\\]+$/, ''))
180      const autoMemPathCmp = toComparable(autoMemPath)
181      if (
182        normalizedCmp === autoMemDirCmp ||
183        normalizedCmp.startsWith(autoMemPathCmp)
184      ) {
185        return true
186      }
187    }
188  
189    const configDirCmp = toComparable(getClaudeConfigHomeDir())
190    const memoryBaseCmp = toComparable(getMemoryBaseDir())
191    const underConfig = normalizedCmp.startsWith(configDirCmp)
192    const underMemoryBase = normalizedCmp.startsWith(memoryBaseCmp)
193  
194    if (!underConfig && !underMemoryBase) {
195      return false
196    }
197    if (normalizedCmp.includes('/session-memory/')) {
198      return true
199    }
200    if (underConfig && normalizedCmp.includes('/projects/')) {
201      return true
202    }
203    if (isAutoMemoryEnabled() && normalizedCmp.includes('/memory/')) {
204      return true
205    }
206    return false
207  }
208  
209  /**
210   * Check if a shell command string (Bash or PowerShell) targets memory files
211   * by extracting absolute path tokens and checking them against memory
212   * detection functions. Used for Bash/PowerShell grep/search commands in the
213   * collapse logic.
214   */
215  export function isShellCommandTargetingMemory(command: string): boolean {
216    const configDir = getClaudeConfigHomeDir()
217    const memoryBase = getMemoryBaseDir()
218    const autoMemDir = isAutoMemoryEnabled()
219      ? getAutoMemPath().replace(/[/\\]+$/, '')
220      : ''
221  
222    // Quick check: does the command mention the config, memory base, or
223    // auto-mem directory? Compare in forward-slash form (PowerShell on Windows
224    // may use either separator while configDir uses the platform-native one).
225    // On Windows also check the MinGW form (/c/...) since BashTool runs under
226    // Git Bash which emits that encoding. On Linux/Mac, configDir is already
227    // posix so only one form to check — and crucially, windowsPathToPosixPath
228    // is NOT called, so Linux paths like /m/foo aren't misinterpreted as MinGW.
229    const commandCmp = toComparable(command)
230    const dirs = [configDir, memoryBase, autoMemDir].filter(Boolean)
231    const matchesAnyDir = dirs.some(d => {
232      if (commandCmp.includes(toComparable(d))) return true
233      if (IS_WINDOWS) {
234        // BashTool on Windows (Git Bash) emits /c/Users/... — check MinGW form too
235        return commandCmp.includes(windowsPathToPosixPath(d).toLowerCase())
236      }
237      return false
238    })
239    if (!matchesAnyDir) {
240      return false
241    }
242  
243    // Extract absolute path-like tokens. Matches Unix absolute paths (/foo/bar),
244    // Windows drive-letter paths (C:\foo, C:/foo), and MinGW paths (/c/foo —
245    // they're /-prefixed so the regex already captures them). Bare backslash
246    // tokens (\foo) are intentionally excluded — they appear in regex/grep
247    // patterns and would cause false-positive memory classification after
248    // normalization flips backslashes to forward slashes.
249    const matches = command.match(/(?:[A-Za-z]:[/\\]|\/)[^\s'"]+/g)
250    if (!matches) {
251      return false
252    }
253  
254    for (const match of matches) {
255      // Strip trailing shell metacharacters that could be adjacent to a path
256      const cleanPath = match.replace(/[,;|&>]+$/, '')
257      // On Windows, convert MinGW /c/... → native C:\... at this single
258      // point. Downstream predicates (isAutoManagedMemoryFile, isMemoryDirectory,
259      // isAutoMemPath, isAgentMemoryPath) then receive native paths and only
260      // need toComparable() for matching. On other platforms, paths are already
261      // native — no conversion, so /m/foo etc. pass through unmodified.
262      const nativePath = IS_WINDOWS
263        ? posixPathToWindowsPath(cleanPath)
264        : cleanPath
265      if (isAutoManagedMemoryFile(nativePath) || isMemoryDirectory(nativePath)) {
266        return true
267      }
268    }
269  
270    return false
271  }
272  
273  // Check if a glob/pattern targets auto-managed memory files only.
274  // Excludes CLAUDE.md, CLAUDE.local.md, .claude/rules/ (user-managed).
275  // Used for collapse badge logic where user-managed files should not be
276  // counted as "memory" operations.
277  export function isAutoManagedMemoryPattern(pattern: string): boolean {
278    if (detectSessionPatternType(pattern) !== null) {
279      return true
280    }
281    if (
282      isAutoMemoryEnabled() &&
283      (pattern.replace(/\\/g, '/').includes('agent-memory/') ||
284        pattern.replace(/\\/g, '/').includes('agent-memory-local/'))
285    ) {
286      return true
287    }
288    return false
289  }