/ utils / cleanup.ts
cleanup.ts
  1  import * as fs from 'fs/promises'
  2  import { homedir } from 'os'
  3  import { join } from 'path'
  4  import { logEvent } from '../services/analytics/index.js'
  5  import { CACHE_PATHS } from './cachePaths.js'
  6  import { logForDebugging } from './debug.js'
  7  import { getClaudeConfigHomeDir } from './envUtils.js'
  8  import { type FsOperations, getFsImplementation } from './fsOperations.js'
  9  import { cleanupOldImageCaches } from './imageStore.js'
 10  import * as lockfile from './lockfile.js'
 11  import { logError } from './log.js'
 12  import { cleanupOldVersions } from './nativeInstaller/index.js'
 13  import { cleanupOldPastes } from './pasteStore.js'
 14  import { getProjectsDir } from './sessionStorage.js'
 15  import { getSettingsWithAllErrors } from './settings/allErrors.js'
 16  import {
 17    getSettings_DEPRECATED,
 18    rawSettingsContainsKey,
 19  } from './settings/settings.js'
 20  import { TOOL_RESULTS_SUBDIR } from './toolResultStorage.js'
 21  import { cleanupStaleAgentWorktrees } from './worktree.js'
 22  
 23  const DEFAULT_CLEANUP_PERIOD_DAYS = 30
 24  
 25  function getCutoffDate(): Date {
 26    const settings = getSettings_DEPRECATED() || {}
 27    const cleanupPeriodDays =
 28      settings.cleanupPeriodDays ?? DEFAULT_CLEANUP_PERIOD_DAYS
 29    const cleanupPeriodMs = cleanupPeriodDays * 24 * 60 * 60 * 1000
 30    return new Date(Date.now() - cleanupPeriodMs)
 31  }
 32  
 33  export type CleanupResult = {
 34    messages: number
 35    errors: number
 36  }
 37  
 38  export function addCleanupResults(
 39    a: CleanupResult,
 40    b: CleanupResult,
 41  ): CleanupResult {
 42    return {
 43      messages: a.messages + b.messages,
 44      errors: a.errors + b.errors,
 45    }
 46  }
 47  
 48  export function convertFileNameToDate(filename: string): Date {
 49    const isoStr = filename
 50      .split('.')[0]!
 51      .replace(/T(\d{2})-(\d{2})-(\d{2})-(\d{3})Z/, 'T$1:$2:$3.$4Z')
 52    return new Date(isoStr)
 53  }
 54  
 55  async function cleanupOldFilesInDirectory(
 56    dirPath: string,
 57    cutoffDate: Date,
 58    isMessagePath: boolean,
 59  ): Promise<CleanupResult> {
 60    const result: CleanupResult = { messages: 0, errors: 0 }
 61  
 62    try {
 63      const files = await getFsImplementation().readdir(dirPath)
 64  
 65      for (const file of files) {
 66        try {
 67          // Convert filename format where all ':.' were replaced with '-'
 68          const timestamp = convertFileNameToDate(file.name)
 69          if (timestamp < cutoffDate) {
 70            await getFsImplementation().unlink(join(dirPath, file.name))
 71            // Increment the appropriate counter
 72            if (isMessagePath) {
 73              result.messages++
 74            } else {
 75              result.errors++
 76            }
 77          }
 78        } catch (error) {
 79          // Log but continue processing other files
 80          logError(error as Error)
 81        }
 82      }
 83    } catch (error: unknown) {
 84      // Ignore if directory doesn't exist
 85      if (error instanceof Error && 'code' in error && error.code !== 'ENOENT') {
 86        logError(error)
 87      }
 88    }
 89  
 90    return result
 91  }
 92  
 93  export async function cleanupOldMessageFiles(): Promise<CleanupResult> {
 94    const fsImpl = getFsImplementation()
 95    const cutoffDate = getCutoffDate()
 96    const errorPath = CACHE_PATHS.errors()
 97    const baseCachePath = CACHE_PATHS.baseLogs()
 98  
 99    // Clean up message and error logs
100    let result = await cleanupOldFilesInDirectory(errorPath, cutoffDate, false)
101  
102    // Clean up MCP logs
103    try {
104      let dirents
105      try {
106        dirents = await fsImpl.readdir(baseCachePath)
107      } catch {
108        return result
109      }
110  
111      const mcpLogDirs = dirents
112        .filter(
113          dirent => dirent.isDirectory() && dirent.name.startsWith('mcp-logs-'),
114        )
115        .map(dirent => join(baseCachePath, dirent.name))
116  
117      for (const mcpLogDir of mcpLogDirs) {
118        // Clean up files in MCP log directory
119        result = addCleanupResults(
120          result,
121          await cleanupOldFilesInDirectory(mcpLogDir, cutoffDate, true),
122        )
123        await tryRmdir(mcpLogDir, fsImpl)
124      }
125    } catch (error: unknown) {
126      if (error instanceof Error && 'code' in error && error.code !== 'ENOENT') {
127        logError(error)
128      }
129    }
130  
131    return result
132  }
133  
134  async function unlinkIfOld(
135    filePath: string,
136    cutoffDate: Date,
137    fsImpl: FsOperations,
138  ): Promise<boolean> {
139    const stats = await fsImpl.stat(filePath)
140    if (stats.mtime < cutoffDate) {
141      await fsImpl.unlink(filePath)
142      return true
143    }
144    return false
145  }
146  
147  async function tryRmdir(dirPath: string, fsImpl: FsOperations): Promise<void> {
148    try {
149      await fsImpl.rmdir(dirPath)
150    } catch {
151      // not empty / doesn't exist
152    }
153  }
154  
155  export async function cleanupOldSessionFiles(): Promise<CleanupResult> {
156    const cutoffDate = getCutoffDate()
157    const result: CleanupResult = { messages: 0, errors: 0 }
158    const projectsDir = getProjectsDir()
159    const fsImpl = getFsImplementation()
160  
161    let projectDirents
162    try {
163      projectDirents = await fsImpl.readdir(projectsDir)
164    } catch {
165      return result
166    }
167  
168    for (const projectDirent of projectDirents) {
169      if (!projectDirent.isDirectory()) continue
170      const projectDir = join(projectsDir, projectDirent.name)
171  
172      // Single readdir per project directory — partition into files and session dirs
173      let entries
174      try {
175        entries = await fsImpl.readdir(projectDir)
176      } catch {
177        result.errors++
178        continue
179      }
180  
181      for (const entry of entries) {
182        if (entry.isFile()) {
183          if (!entry.name.endsWith('.jsonl') && !entry.name.endsWith('.cast')) {
184            continue
185          }
186          try {
187            if (
188              await unlinkIfOld(join(projectDir, entry.name), cutoffDate, fsImpl)
189            ) {
190              result.messages++
191            }
192          } catch {
193            result.errors++
194          }
195        } else if (entry.isDirectory()) {
196          // Session directory — clean up tool-results/<toolDir>/* beneath it
197          const sessionDir = join(projectDir, entry.name)
198          const toolResultsDir = join(sessionDir, TOOL_RESULTS_SUBDIR)
199          let toolDirs
200          try {
201            toolDirs = await fsImpl.readdir(toolResultsDir)
202          } catch {
203            // No tool-results dir — still try to remove an empty session dir
204            await tryRmdir(sessionDir, fsImpl)
205            continue
206          }
207          for (const toolEntry of toolDirs) {
208            if (toolEntry.isFile()) {
209              try {
210                if (
211                  await unlinkIfOld(
212                    join(toolResultsDir, toolEntry.name),
213                    cutoffDate,
214                    fsImpl,
215                  )
216                ) {
217                  result.messages++
218                }
219              } catch {
220                result.errors++
221              }
222            } else if (toolEntry.isDirectory()) {
223              const toolDirPath = join(toolResultsDir, toolEntry.name)
224              let toolFiles
225              try {
226                toolFiles = await fsImpl.readdir(toolDirPath)
227              } catch {
228                continue
229              }
230              for (const tf of toolFiles) {
231                if (!tf.isFile()) continue
232                try {
233                  if (
234                    await unlinkIfOld(
235                      join(toolDirPath, tf.name),
236                      cutoffDate,
237                      fsImpl,
238                    )
239                  ) {
240                    result.messages++
241                  }
242                } catch {
243                  result.errors++
244                }
245              }
246              await tryRmdir(toolDirPath, fsImpl)
247            }
248          }
249          await tryRmdir(toolResultsDir, fsImpl)
250          await tryRmdir(sessionDir, fsImpl)
251        }
252      }
253  
254      await tryRmdir(projectDir, fsImpl)
255    }
256  
257    return result
258  }
259  
260  /**
261   * Generic helper for cleaning up old files in a single directory
262   * @param dirPath Path to the directory to clean
263   * @param extension File extension to filter (e.g., '.md', '.jsonl')
264   * @param removeEmptyDir Whether to remove the directory if empty after cleanup
265   */
266  async function cleanupSingleDirectory(
267    dirPath: string,
268    extension: string,
269    removeEmptyDir: boolean = true,
270  ): Promise<CleanupResult> {
271    const cutoffDate = getCutoffDate()
272    const result: CleanupResult = { messages: 0, errors: 0 }
273    const fsImpl = getFsImplementation()
274  
275    let dirents
276    try {
277      dirents = await fsImpl.readdir(dirPath)
278    } catch {
279      return result
280    }
281  
282    for (const dirent of dirents) {
283      if (!dirent.isFile() || !dirent.name.endsWith(extension)) continue
284      try {
285        if (await unlinkIfOld(join(dirPath, dirent.name), cutoffDate, fsImpl)) {
286          result.messages++
287        }
288      } catch {
289        result.errors++
290      }
291    }
292  
293    if (removeEmptyDir) {
294      await tryRmdir(dirPath, fsImpl)
295    }
296  
297    return result
298  }
299  
300  export function cleanupOldPlanFiles(): Promise<CleanupResult> {
301    const plansDir = join(getClaudeConfigHomeDir(), 'plans')
302    return cleanupSingleDirectory(plansDir, '.md')
303  }
304  
305  export async function cleanupOldFileHistoryBackups(): Promise<CleanupResult> {
306    const cutoffDate = getCutoffDate()
307    const result: CleanupResult = { messages: 0, errors: 0 }
308    const fsImpl = getFsImplementation()
309  
310    try {
311      const configDir = getClaudeConfigHomeDir()
312      const fileHistoryStorageDir = join(configDir, 'file-history')
313  
314      let dirents
315      try {
316        dirents = await fsImpl.readdir(fileHistoryStorageDir)
317      } catch {
318        return result
319      }
320  
321      const fileHistorySessionsDirs = dirents
322        .filter(dirent => dirent.isDirectory())
323        .map(dirent => join(fileHistoryStorageDir, dirent.name))
324  
325      await Promise.all(
326        fileHistorySessionsDirs.map(async fileHistorySessionDir => {
327          try {
328            const stats = await fsImpl.stat(fileHistorySessionDir)
329            if (stats.mtime < cutoffDate) {
330              await fsImpl.rm(fileHistorySessionDir, {
331                recursive: true,
332                force: true,
333              })
334              result.messages++
335            }
336          } catch {
337            result.errors++
338          }
339        }),
340      )
341  
342      await tryRmdir(fileHistoryStorageDir, fsImpl)
343    } catch (error) {
344      logError(error as Error)
345    }
346  
347    return result
348  }
349  
350  export async function cleanupOldSessionEnvDirs(): Promise<CleanupResult> {
351    const cutoffDate = getCutoffDate()
352    const result: CleanupResult = { messages: 0, errors: 0 }
353    const fsImpl = getFsImplementation()
354  
355    try {
356      const configDir = getClaudeConfigHomeDir()
357      const sessionEnvBaseDir = join(configDir, 'session-env')
358  
359      let dirents
360      try {
361        dirents = await fsImpl.readdir(sessionEnvBaseDir)
362      } catch {
363        return result
364      }
365  
366      const sessionEnvDirs = dirents
367        .filter(dirent => dirent.isDirectory())
368        .map(dirent => join(sessionEnvBaseDir, dirent.name))
369  
370      for (const sessionEnvDir of sessionEnvDirs) {
371        try {
372          const stats = await fsImpl.stat(sessionEnvDir)
373          if (stats.mtime < cutoffDate) {
374            await fsImpl.rm(sessionEnvDir, { recursive: true, force: true })
375            result.messages++
376          }
377        } catch {
378          result.errors++
379        }
380      }
381  
382      await tryRmdir(sessionEnvBaseDir, fsImpl)
383    } catch (error) {
384      logError(error as Error)
385    }
386  
387    return result
388  }
389  
390  /**
391   * Cleans up old debug log files from ~/.claude/debug/
392   * Preserves the 'latest' symlink which points to the current session's log.
393   * Debug logs can grow very large (especially with the infinite logging loop bug)
394   * and accumulate indefinitely without this cleanup.
395   */
396  export async function cleanupOldDebugLogs(): Promise<CleanupResult> {
397    const cutoffDate = getCutoffDate()
398    const result: CleanupResult = { messages: 0, errors: 0 }
399    const fsImpl = getFsImplementation()
400    const debugDir = join(getClaudeConfigHomeDir(), 'debug')
401  
402    let dirents
403    try {
404      dirents = await fsImpl.readdir(debugDir)
405    } catch {
406      return result
407    }
408  
409    for (const dirent of dirents) {
410      // Preserve the 'latest' symlink
411      if (
412        !dirent.isFile() ||
413        !dirent.name.endsWith('.txt') ||
414        dirent.name === 'latest'
415      ) {
416        continue
417      }
418      try {
419        if (await unlinkIfOld(join(debugDir, dirent.name), cutoffDate, fsImpl)) {
420          result.messages++
421        }
422      } catch {
423        result.errors++
424      }
425    }
426  
427    // Intentionally do NOT remove debugDir even if empty — needed for future logs
428    return result
429  }
430  
431  const ONE_DAY_MS = 24 * 60 * 60 * 1000
432  
433  /**
434   * Clean up old npm cache entries for Anthropic packages.
435   * This helps reduce disk usage since we publish many dev versions per day.
436   * Only runs once per day for Ant users.
437   */
438  export async function cleanupNpmCacheForAnthropicPackages(): Promise<void> {
439    const markerPath = join(getClaudeConfigHomeDir(), '.npm-cache-cleanup')
440  
441    try {
442      const stat = await fs.stat(markerPath)
443      if (Date.now() - stat.mtimeMs < ONE_DAY_MS) {
444        logForDebugging('npm cache cleanup: skipping, ran recently')
445        return
446      }
447    } catch {
448      // File doesn't exist, proceed with cleanup
449    }
450  
451    try {
452      await lockfile.lock(markerPath, { retries: 0, realpath: false })
453    } catch {
454      logForDebugging('npm cache cleanup: skipping, lock held')
455      return
456    }
457  
458    logForDebugging('npm cache cleanup: starting')
459  
460    const npmCachePath = join(homedir(), '.npm', '_cacache')
461  
462    const NPM_CACHE_RETENTION_COUNT = 5
463  
464    const startTime = Date.now()
465    try {
466      const cacache = await import('cacache')
467      const cutoff = startTime - ONE_DAY_MS
468  
469      // Stream index entries and collect all Anthropic package entries.
470      // Previous implementation used cacache.verify() which does a full
471      // integrity check + GC of the ENTIRE cache — O(all content blobs).
472      // On large caches this took 60+ seconds and blocked the event loop.
473      const stream = cacache.ls.stream(npmCachePath)
474      const anthropicEntries: { key: string; time: number }[] = []
475      for await (const entry of stream as AsyncIterable<{
476        key: string
477        time: number
478      }>) {
479        if (entry.key.includes('@anthropic-ai/claude-')) {
480          anthropicEntries.push({ key: entry.key, time: entry.time })
481        }
482      }
483  
484      // Group by package name (everything before the last @version separator)
485      const byPackage = new Map<string, { key: string; time: number }[]>()
486      for (const entry of anthropicEntries) {
487        const atVersionIdx = entry.key.lastIndexOf('@')
488        const pkgName =
489          atVersionIdx > 0 ? entry.key.slice(0, atVersionIdx) : entry.key
490        const existing = byPackage.get(pkgName) ?? []
491        existing.push(entry)
492        byPackage.set(pkgName, existing)
493      }
494  
495      // Remove entries older than 1 day OR beyond the top N most recent per package
496      const keysToRemove: string[] = []
497      for (const [, entries] of byPackage) {
498        entries.sort((a, b) => b.time - a.time) // newest first
499        for (let i = 0; i < entries.length; i++) {
500          const entry = entries[i]!
501          if (entry.time < cutoff || i >= NPM_CACHE_RETENTION_COUNT) {
502            keysToRemove.push(entry.key)
503          }
504        }
505      }
506  
507      await Promise.all(
508        keysToRemove.map(key => cacache.rm.entry(npmCachePath, key)),
509      )
510  
511      await fs.writeFile(markerPath, new Date().toISOString())
512  
513      const durationMs = Date.now() - startTime
514      if (keysToRemove.length > 0) {
515        logForDebugging(
516          `npm cache cleanup: Removed ${keysToRemove.length} old @anthropic-ai entries in ${durationMs}ms`,
517        )
518      } else {
519        logForDebugging(`npm cache cleanup: completed in ${durationMs}ms`)
520      }
521      logEvent('tengu_npm_cache_cleanup', {
522        success: true,
523        durationMs,
524        entriesRemoved: keysToRemove.length,
525      })
526    } catch (error) {
527      logError(error as Error)
528      logEvent('tengu_npm_cache_cleanup', {
529        success: false,
530        durationMs: Date.now() - startTime,
531      })
532    } finally {
533      await lockfile.unlock(markerPath, { realpath: false }).catch(() => {})
534    }
535  }
536  
537  /**
538   * Throttled wrapper around cleanupOldVersions for recurring cleanup in long-running sessions.
539   * Uses a marker file and lock to ensure it runs at most once per 24 hours,
540   * and does not block if another process is already running cleanup.
541   * The regular cleanupOldVersions() should still be used for installer flows.
542   */
543  export async function cleanupOldVersionsThrottled(): Promise<void> {
544    const markerPath = join(getClaudeConfigHomeDir(), '.version-cleanup')
545  
546    try {
547      const stat = await fs.stat(markerPath)
548      if (Date.now() - stat.mtimeMs < ONE_DAY_MS) {
549        logForDebugging('version cleanup: skipping, ran recently')
550        return
551      }
552    } catch {
553      // File doesn't exist, proceed with cleanup
554    }
555  
556    try {
557      await lockfile.lock(markerPath, { retries: 0, realpath: false })
558    } catch {
559      logForDebugging('version cleanup: skipping, lock held')
560      return
561    }
562  
563    logForDebugging('version cleanup: starting (throttled)')
564  
565    try {
566      await cleanupOldVersions()
567      await fs.writeFile(markerPath, new Date().toISOString())
568    } catch (error) {
569      logError(error as Error)
570    } finally {
571      await lockfile.unlock(markerPath, { realpath: false }).catch(() => {})
572    }
573  }
574  
575  export async function cleanupOldMessageFilesInBackground(): Promise<void> {
576    // If settings have validation errors but the user explicitly set cleanupPeriodDays,
577    // skip cleanup entirely rather than falling back to the default (30 days).
578    // This prevents accidentally deleting files when the user intended a different retention period.
579    const { errors } = getSettingsWithAllErrors()
580    if (errors.length > 0 && rawSettingsContainsKey('cleanupPeriodDays')) {
581      logForDebugging(
582        'Skipping cleanup: settings have validation errors but cleanupPeriodDays was explicitly set. Fix settings errors to enable cleanup.',
583      )
584      return
585    }
586  
587    await cleanupOldMessageFiles()
588    await cleanupOldSessionFiles()
589    await cleanupOldPlanFiles()
590    await cleanupOldFileHistoryBackups()
591    await cleanupOldSessionEnvDirs()
592    await cleanupOldDebugLogs()
593    await cleanupOldImageCaches()
594    await cleanupOldPastes(getCutoffDate())
595    const removedWorktrees = await cleanupStaleAgentWorktrees(getCutoffDate())
596    if (removedWorktrees > 0) {
597      logEvent('tengu_worktree_cleanup', { removed: removedWorktrees })
598    }
599    if (process.env.USER_TYPE === 'ant') {
600      await cleanupNpmCacheForAnthropicPackages()
601    }
602  }