/ utils / filePersistence / outputsScanner.ts
outputsScanner.ts
  1  /**
  2   * Outputs directory scanner for file persistence
  3   *
  4   * This module provides utilities to:
  5   * - Detect the session type from environment variables
  6   * - Capture turn start timestamp
  7   * - Find modified files by comparing file mtimes against turn start time
  8   */
  9  
 10  import * as fs from 'fs/promises'
 11  import * as path from 'path'
 12  import { logForDebugging } from '../debug.js'
 13  import type { EnvironmentKind } from '../teleport/environments.js'
 14  import type { TurnStartTime } from './types.js'
 15  
 16  /** Shared debug logger for file persistence modules */
 17  export function logDebug(message: string): void {
 18    logForDebugging(`[file-persistence] ${message}`)
 19  }
 20  
 21  /**
 22   * Get the environment kind from CLAUDE_CODE_ENVIRONMENT_KIND.
 23   * Returns null if not set or not a recognized value.
 24   */
 25  export function getEnvironmentKind(): EnvironmentKind | null {
 26    const kind = process.env.CLAUDE_CODE_ENVIRONMENT_KIND
 27    if (kind === 'byoc' || kind === 'anthropic_cloud') {
 28      return kind
 29    }
 30    return null
 31  }
 32  
 33  function hasParentPath(
 34    entry: object,
 35  ): entry is { parentPath: string; name: string } {
 36    return 'parentPath' in entry && typeof entry.parentPath === 'string'
 37  }
 38  
 39  function hasPath(entry: object): entry is { path: string; name: string } {
 40    return 'path' in entry && typeof entry.path === 'string'
 41  }
 42  
 43  function getEntryParentPath(entry: object, fallback: string): string {
 44    if (hasParentPath(entry)) {
 45      return entry.parentPath
 46    }
 47    if (hasPath(entry)) {
 48      return entry.path
 49    }
 50    return fallback
 51  }
 52  
 53  /**
 54   * Find files that have been modified since the turn started.
 55   * Returns paths of files with mtime >= turnStartTime.
 56   *
 57   * Uses recursive directory listing and parallelized stat calls for efficiency.
 58   *
 59   * @param turnStartTime - The timestamp when the turn started
 60   * @param outputsDir - The directory to scan for modified files
 61   */
 62  export async function findModifiedFiles(
 63    turnStartTime: TurnStartTime,
 64    outputsDir: string,
 65  ): Promise<string[]> {
 66    // Use recursive flag to get all entries in one call
 67    let entries: Awaited<ReturnType<typeof fs.readdir>>
 68    try {
 69      entries = await fs.readdir(outputsDir, {
 70        withFileTypes: true,
 71        recursive: true,
 72      })
 73    } catch {
 74      // Directory doesn't exist or is not accessible
 75      return []
 76    }
 77  
 78    // Filter to regular files only (skip symlinks for security) and build full paths
 79    const filePaths: string[] = []
 80    for (const entry of entries) {
 81      if (entry.isSymbolicLink()) {
 82        continue
 83      }
 84      if (entry.isFile()) {
 85        // entry.parentPath is available in Node 20+, fallback to entry.path for older versions
 86        const parentPath = getEntryParentPath(entry, outputsDir)
 87        filePaths.push(path.join(parentPath, entry.name))
 88      }
 89    }
 90  
 91    if (filePaths.length === 0) {
 92      logDebug('No files found in outputs directory')
 93      return []
 94    }
 95  
 96    // Parallelize stat calls for all files
 97    const statResults = await Promise.all(
 98      filePaths.map(async filePath => {
 99        try {
100          const stat = await fs.lstat(filePath)
101          // Skip if it became a symlink between readdir and stat (race condition)
102          if (stat.isSymbolicLink()) {
103            return null
104          }
105          return { filePath, mtimeMs: stat.mtimeMs }
106        } catch {
107          // File may have been deleted between readdir and stat
108          return null
109        }
110      }),
111    )
112  
113    // Filter to files modified since turn start
114    const modifiedFiles: string[] = []
115    for (const result of statResults) {
116      if (result && result.mtimeMs >= turnStartTime) {
117        modifiedFiles.push(result.filePath)
118      }
119    }
120  
121    logDebug(
122      `Found ${modifiedFiles.length} modified files since turn start (scanned ${filePaths.length} total)`,
123    )
124  
125    return modifiedFiles
126  }