/ utils / filePersistence / filePersistence.ts
filePersistence.ts
  1  /**
  2   * File persistence orchestrator
  3   *
  4   * This module provides the main orchestration logic for persisting files
  5   * at the end of each turn:
  6   * - BYOC mode: Upload files to Files API and collect file IDs
  7   * - 1P/Cloud mode: Query Files API listDirectory for file IDs (rclone handles sync)
  8   */
  9  
 10  import { feature } from 'bun:bundle'
 11  import { join, relative } from 'path'
 12  import {
 13    type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 14    logEvent,
 15  } from '../../services/analytics/index.js'
 16  import {
 17    type FilesApiConfig,
 18    uploadSessionFiles,
 19  } from '../../services/api/filesApi.js'
 20  import { getCwd } from '../cwd.js'
 21  import { errorMessage } from '../errors.js'
 22  import { logError } from '../log.js'
 23  import { getSessionIngressAuthToken } from '../sessionIngressAuth.js'
 24  import {
 25    findModifiedFiles,
 26    getEnvironmentKind,
 27    logDebug,
 28  } from './outputsScanner.js'
 29  import {
 30    DEFAULT_UPLOAD_CONCURRENCY,
 31    type FailedPersistence,
 32    FILE_COUNT_LIMIT,
 33    type FilesPersistedEventData,
 34    OUTPUTS_SUBDIR,
 35    type PersistedFile,
 36    type TurnStartTime,
 37  } from './types.js'
 38  
 39  /**
 40   * Execute file persistence for modified files in the outputs directory.
 41   *
 42   * Assembles all config internally:
 43   * - Checks environment kind (CLAUDE_CODE_ENVIRONMENT_KIND)
 44   * - Retrieves session access token
 45   * - Requires CLAUDE_CODE_REMOTE_SESSION_ID for session ID
 46   *
 47   * @param turnStartTime - The timestamp when the turn started
 48   * @param signal - Optional abort signal for cancellation
 49   * @returns Event data, or null if not enabled or no files to persist
 50   */
 51  export async function runFilePersistence(
 52    turnStartTime: TurnStartTime,
 53    signal?: AbortSignal,
 54  ): Promise<FilesPersistedEventData | null> {
 55    const environmentKind = getEnvironmentKind()
 56    if (environmentKind !== 'byoc') {
 57      return null
 58    }
 59  
 60    const sessionAccessToken = getSessionIngressAuthToken()
 61    if (!sessionAccessToken) {
 62      return null
 63    }
 64  
 65    const sessionId = process.env.CLAUDE_CODE_REMOTE_SESSION_ID
 66    if (!sessionId) {
 67      logError(
 68        new Error(
 69          'File persistence enabled but CLAUDE_CODE_REMOTE_SESSION_ID is not set',
 70        ),
 71      )
 72      return null
 73    }
 74  
 75    const config: FilesApiConfig = {
 76      oauthToken: sessionAccessToken,
 77      sessionId,
 78    }
 79  
 80    const outputsDir = join(getCwd(), sessionId, OUTPUTS_SUBDIR)
 81  
 82    // Check if aborted
 83    if (signal?.aborted) {
 84      logDebug('Persistence aborted before processing')
 85      return null
 86    }
 87  
 88    const startTime = Date.now()
 89    logEvent('tengu_file_persistence_started', {
 90      mode: environmentKind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 91    })
 92  
 93    try {
 94      let result: FilesPersistedEventData
 95      if (environmentKind === 'byoc') {
 96        result = await executeBYOCPersistence(
 97          turnStartTime,
 98          config,
 99          outputsDir,
100          signal,
101        )
102      } else {
103        result = await executeCloudPersistence()
104      }
105  
106      // Nothing to report
107      if (result.files.length === 0 && result.failed.length === 0) {
108        return null
109      }
110  
111      const durationMs = Date.now() - startTime
112      logEvent('tengu_file_persistence_completed', {
113        success_count: result.files.length,
114        failure_count: result.failed.length,
115        duration_ms: durationMs,
116        mode: environmentKind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
117      })
118  
119      return result
120    } catch (error) {
121      logError(error)
122      logDebug(`File persistence failed: ${error}`)
123  
124      const durationMs = Date.now() - startTime
125      logEvent('tengu_file_persistence_completed', {
126        success_count: 0,
127        failure_count: 0,
128        duration_ms: durationMs,
129        mode: environmentKind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
130        error:
131          'exception' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
132      })
133  
134      return {
135        files: [],
136        failed: [
137          {
138            filename: outputsDir,
139            error: errorMessage(error),
140          },
141        ],
142      }
143    }
144  }
145  
146  /**
147   * Execute BYOC mode persistence: scan local filesystem for modified files,
148   * then upload to Files API.
149   */
150  async function executeBYOCPersistence(
151    turnStartTime: TurnStartTime,
152    config: FilesApiConfig,
153    outputsDir: string,
154    signal?: AbortSignal,
155  ): Promise<FilesPersistedEventData> {
156    // Find modified files via local filesystem scan
157    // Uses same directory structure as downloads: {cwd}/{sessionId}/outputs
158    const modifiedFiles = await findModifiedFiles(turnStartTime, outputsDir)
159  
160    if (modifiedFiles.length === 0) {
161      logDebug('No modified files to persist')
162      return { files: [], failed: [] }
163    }
164  
165    logDebug(`Found ${modifiedFiles.length} modified files`)
166  
167    if (signal?.aborted) {
168      return { files: [], failed: [] }
169    }
170  
171    // Enforce file count limit
172    if (modifiedFiles.length > FILE_COUNT_LIMIT) {
173      logDebug(
174        `File count limit exceeded: ${modifiedFiles.length} > ${FILE_COUNT_LIMIT}`,
175      )
176      logEvent('tengu_file_persistence_limit_exceeded', {
177        file_count: modifiedFiles.length,
178        limit: FILE_COUNT_LIMIT,
179      })
180      return {
181        files: [],
182        failed: [
183          {
184            filename: outputsDir,
185            error: `Too many files modified (${modifiedFiles.length}). Maximum: ${FILE_COUNT_LIMIT}.`,
186          },
187        ],
188      }
189    }
190  
191    const filesToProcess = modifiedFiles
192      .map(filePath => ({
193        path: filePath,
194        relativePath: relative(outputsDir, filePath),
195      }))
196      .filter(({ relativePath }) => {
197        // Security: skip files that resolve outside the outputs directory
198        if (relativePath.startsWith('..')) {
199          logDebug(`Skipping file outside outputs directory: ${relativePath}`)
200          return false
201        }
202        return true
203      })
204  
205    logDebug(`BYOC mode: uploading ${filesToProcess.length} files`)
206  
207    // Upload files in parallel
208    const results = await uploadSessionFiles(
209      filesToProcess,
210      config,
211      DEFAULT_UPLOAD_CONCURRENCY,
212    )
213  
214    // Separate successful and failed uploads
215    const persistedFiles: PersistedFile[] = []
216    const failedFiles: FailedPersistence[] = []
217  
218    for (const result of results) {
219      if (result.success) {
220        persistedFiles.push({
221          filename: result.path,
222          file_id: result.fileId,
223        })
224      } else {
225        failedFiles.push({
226          filename: result.path,
227          error: result.error,
228        })
229      }
230    }
231  
232    logDebug(
233      `BYOC persistence complete: ${persistedFiles.length} uploaded, ${failedFiles.length} failed`,
234    )
235  
236    return {
237      files: persistedFiles,
238      failed: failedFiles,
239    }
240  }
241  
242  /**
243   * Execute Cloud (1P) mode persistence.
244   * TODO: Read file_id from xattr on output files. xattr-based file IDs are
245   * currently being added for 1P environments.
246   */
247  function executeCloudPersistence(): FilesPersistedEventData {
248    logDebug('Cloud mode: xattr-based file ID reading not yet implemented')
249    return { files: [], failed: [] }
250  }
251  
252  /**
253   * Execute file persistence and emit result via callback.
254   * Handles errors internally.
255   */
256  export async function executeFilePersistence(
257    turnStartTime: TurnStartTime,
258    signal: AbortSignal,
259    onResult: (result: FilesPersistedEventData) => void,
260  ): Promise<void> {
261    try {
262      const result = await runFilePersistence(turnStartTime, signal)
263      if (result) {
264        onResult(result)
265      }
266    } catch (error) {
267      logError(error)
268    }
269  }
270  
271  /**
272   * Check if file persistence is enabled.
273   * Requires: feature flag ON, valid environment kind, session access token,
274   * and CLAUDE_CODE_REMOTE_SESSION_ID.
275   * This ensures only public-api/sessions users trigger file persistence,
276   * not normal Claude Code CLI users.
277   */
278  export function isFilePersistenceEnabled(): boolean {
279    if (feature('FILE_PERSISTENCE')) {
280      return (
281        getEnvironmentKind() === 'byoc' &&
282        !!getSessionIngressAuthToken() &&
283        !!process.env.CLAUDE_CODE_REMOTE_SESSION_ID
284      )
285    }
286    return false
287  }