/ services / api / filesApi.ts
filesApi.ts
  1  /**
  2   * Files API client for managing files
  3   *
  4   * This module provides functionality to download and upload files to Anthropic Public Files API.
  5   * Used by the Claude Code agent to download file attachments at session startup.
  6   *
  7   * API Reference: https://docs.anthropic.com/en/api/files-content
  8   */
  9  
 10  import axios from 'axios'
 11  import { randomUUID } from 'crypto'
 12  import * as fs from 'fs/promises'
 13  import * as path from 'path'
 14  import { count } from '../../utils/array.js'
 15  import { getCwd } from '../../utils/cwd.js'
 16  import { logForDebugging } from '../../utils/debug.js'
 17  import { errorMessage } from '../../utils/errors.js'
 18  import { logError } from '../../utils/log.js'
 19  import { sleep } from '../../utils/sleep.js'
 20  import {
 21    type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 22    logEvent,
 23  } from '../analytics/index.js'
 24  
 25  // Files API is currently in beta. oauth-2025-04-20 enables Bearer OAuth
 26  // on public-api routes (auth.py: "oauth_auth" not in beta_versions → 404).
 27  const FILES_API_BETA_HEADER = 'files-api-2025-04-14,oauth-2025-04-20'
 28  const ANTHROPIC_VERSION = '2023-06-01'
 29  
 30  // API base URL - uses ANTHROPIC_BASE_URL set by env-manager for the appropriate environment
 31  // Falls back to public API for standalone usage
 32  function getDefaultApiBaseUrl(): string {
 33    return (
 34      process.env.ANTHROPIC_BASE_URL ||
 35      process.env.CLAUDE_CODE_API_BASE_URL ||
 36      'https://api.anthropic.com'
 37    )
 38  }
 39  
 40  function logDebugError(message: string): void {
 41    logForDebugging(`[files-api] ${message}`, { level: 'error' })
 42  }
 43  
 44  function logDebug(message: string): void {
 45    logForDebugging(`[files-api] ${message}`)
 46  }
 47  
 48  /**
 49   * File specification parsed from CLI args
 50   * Format: --file=<file_id>:<relative_path>
 51   */
 52  export type File = {
 53    fileId: string
 54    relativePath: string
 55  }
 56  
 57  /**
 58   * Configuration for the files API client
 59   */
 60  export type FilesApiConfig = {
 61    /** OAuth token for authentication (from session JWT) */
 62    oauthToken: string
 63    /** Base URL for the API (default: https://api.anthropic.com) */
 64    baseUrl?: string
 65    /** Session ID for creating session-specific directories */
 66    sessionId: string
 67  }
 68  
 69  /**
 70   * Result of a file download operation
 71   */
 72  export type DownloadResult = {
 73    fileId: string
 74    path: string
 75    success: boolean
 76    error?: string
 77    bytesWritten?: number
 78  }
 79  
 80  const MAX_RETRIES = 3
 81  const BASE_DELAY_MS = 500
 82  const MAX_FILE_SIZE_BYTES = 500 * 1024 * 1024 // 500MB
 83  
 84  /**
 85   * Result type for retry operations - signals whether to continue retrying
 86   */
 87  type RetryResult<T> = { done: true; value: T } | { done: false; error?: string }
 88  
 89  /**
 90   * Executes an operation with exponential backoff retry logic
 91   *
 92   * @param operation - Operation name for logging
 93   * @param attemptFn - Function to execute on each attempt, returns RetryResult
 94   * @returns The successful result value
 95   * @throws Error if all retries exhausted
 96   */
 97  async function retryWithBackoff<T>(
 98    operation: string,
 99    attemptFn: (attempt: number) => Promise<RetryResult<T>>,
100  ): Promise<T> {
101    let lastError = ''
102  
103    for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
104      const result = await attemptFn(attempt)
105  
106      if (result.done) {
107        return result.value
108      }
109  
110      lastError = result.error || `${operation} failed`
111      logDebug(
112        `${operation} attempt ${attempt}/${MAX_RETRIES} failed: ${lastError}`,
113      )
114  
115      if (attempt < MAX_RETRIES) {
116        const delayMs = BASE_DELAY_MS * Math.pow(2, attempt - 1)
117        logDebug(`Retrying ${operation} in ${delayMs}ms...`)
118        await sleep(delayMs)
119      }
120    }
121  
122    throw new Error(`${lastError} after ${MAX_RETRIES} attempts`)
123  }
124  
125  /**
126   * Downloads a single file from the Anthropic Public Files API
127   *
128   * @param fileId - The file ID (e.g., "file_011CNha8iCJcU1wXNR6q4V8w")
129   * @param config - Files API configuration
130   * @returns The file content as a Buffer
131   */
132  export async function downloadFile(
133    fileId: string,
134    config: FilesApiConfig,
135  ): Promise<Buffer> {
136    const baseUrl = config.baseUrl || getDefaultApiBaseUrl()
137    const url = `${baseUrl}/v1/files/${fileId}/content`
138  
139    const headers = {
140      Authorization: `Bearer ${config.oauthToken}`,
141      'anthropic-version': ANTHROPIC_VERSION,
142      'anthropic-beta': FILES_API_BETA_HEADER,
143    }
144  
145    logDebug(`Downloading file ${fileId} from ${url}`)
146  
147    return retryWithBackoff(`Download file ${fileId}`, async () => {
148      try {
149        const response = await axios.get(url, {
150          headers,
151          responseType: 'arraybuffer',
152          timeout: 60000, // 60 second timeout for large files
153          validateStatus: status => status < 500,
154        })
155  
156        if (response.status === 200) {
157          logDebug(`Downloaded file ${fileId} (${response.data.length} bytes)`)
158          return { done: true, value: Buffer.from(response.data) }
159        }
160  
161        // Non-retriable errors - throw immediately
162        if (response.status === 404) {
163          throw new Error(`File not found: ${fileId}`)
164        }
165        if (response.status === 401) {
166          throw new Error('Authentication failed: invalid or missing API key')
167        }
168        if (response.status === 403) {
169          throw new Error(`Access denied to file: ${fileId}`)
170        }
171  
172        return { done: false, error: `status ${response.status}` }
173      } catch (error) {
174        if (!axios.isAxiosError(error)) {
175          throw error
176        }
177        return { done: false, error: error.message }
178      }
179    })
180  }
181  
182  /**
183   * Normalizes a relative path, strips redundant prefixes, and builds the full
184   * download path under {basePath}/{session_id}/uploads/.
185   * Returns null if the path is invalid (e.g., path traversal).
186   */
187  export function buildDownloadPath(
188    basePath: string,
189    sessionId: string,
190    relativePath: string,
191  ): string | null {
192    const normalized = path.normalize(relativePath)
193    if (normalized.startsWith('..')) {
194      logDebugError(
195        `Invalid file path: ${relativePath}. Path must not traverse above workspace`,
196      )
197      return null
198    }
199  
200    const uploadsBase = path.join(basePath, sessionId, 'uploads')
201    const redundantPrefixes = [
202      path.join(basePath, sessionId, 'uploads') + path.sep,
203      path.sep + 'uploads' + path.sep,
204    ]
205    const matchedPrefix = redundantPrefixes.find(p => normalized.startsWith(p))
206    const cleanPath = matchedPrefix
207      ? normalized.slice(matchedPrefix.length)
208      : normalized
209    return path.join(uploadsBase, cleanPath)
210  }
211  
212  /**
213   * Downloads a file and saves it to the session-specific workspace directory
214   *
215   * @param attachment - The file attachment to download
216   * @param config - Files API configuration
217   * @returns Download result with success/failure status
218   */
219  export async function downloadAndSaveFile(
220    attachment: File,
221    config: FilesApiConfig,
222  ): Promise<DownloadResult> {
223    const { fileId, relativePath } = attachment
224    const fullPath = buildDownloadPath(getCwd(), config.sessionId, relativePath)
225  
226    if (!fullPath) {
227      return {
228        fileId,
229        path: '',
230        success: false,
231        error: `Invalid file path: ${relativePath}`,
232      }
233    }
234  
235    try {
236      // Download the file content
237      const content = await downloadFile(fileId, config)
238  
239      // Ensure the parent directory exists
240      const parentDir = path.dirname(fullPath)
241      await fs.mkdir(parentDir, { recursive: true })
242  
243      // Write the file
244      await fs.writeFile(fullPath, content)
245  
246      logDebug(`Saved file ${fileId} to ${fullPath} (${content.length} bytes)`)
247  
248      return {
249        fileId,
250        path: fullPath,
251        success: true,
252        bytesWritten: content.length,
253      }
254    } catch (error) {
255      logDebugError(`Failed to download file ${fileId}: ${errorMessage(error)}`)
256      if (error instanceof Error) {
257        logError(error)
258      }
259  
260      return {
261        fileId,
262        path: fullPath,
263        success: false,
264        error: errorMessage(error),
265      }
266    }
267  }
268  
269  // Default concurrency limit for parallel downloads
270  const DEFAULT_CONCURRENCY = 5
271  
272  /**
273   * Execute promises with limited concurrency
274   *
275   * @param items - Items to process
276   * @param fn - Async function to apply to each item
277   * @param concurrency - Maximum concurrent operations
278   * @returns Results in the same order as input items
279   */
280  async function parallelWithLimit<T, R>(
281    items: T[],
282    fn: (item: T, index: number) => Promise<R>,
283    concurrency: number,
284  ): Promise<R[]> {
285    const results: R[] = new Array(items.length)
286    let currentIndex = 0
287  
288    async function worker(): Promise<void> {
289      while (currentIndex < items.length) {
290        const index = currentIndex++
291        const item = items[index]
292        if (item !== undefined) {
293          results[index] = await fn(item, index)
294        }
295      }
296    }
297  
298    // Start workers up to the concurrency limit
299    const workers: Promise<void>[] = []
300    const workerCount = Math.min(concurrency, items.length)
301    for (let i = 0; i < workerCount; i++) {
302      workers.push(worker())
303    }
304  
305    await Promise.all(workers)
306    return results
307  }
308  
309  /**
310   * Downloads all file attachments for a session in parallel
311   *
312   * @param attachments - List of file attachments to download
313   * @param config - Files API configuration
314   * @param concurrency - Maximum concurrent downloads (default: 5)
315   * @returns Array of download results in the same order as input
316   */
317  export async function downloadSessionFiles(
318    files: File[],
319    config: FilesApiConfig,
320    concurrency: number = DEFAULT_CONCURRENCY,
321  ): Promise<DownloadResult[]> {
322    if (files.length === 0) {
323      return []
324    }
325  
326    logDebug(
327      `Downloading ${files.length} file(s) for session ${config.sessionId}`,
328    )
329    const startTime = Date.now()
330  
331    // Download files in parallel with concurrency limit
332    const results = await parallelWithLimit(
333      files,
334      file => downloadAndSaveFile(file, config),
335      concurrency,
336    )
337  
338    const elapsedMs = Date.now() - startTime
339    const successCount = count(results, r => r.success)
340    logDebug(
341      `Downloaded ${successCount}/${files.length} file(s) in ${elapsedMs}ms`,
342    )
343  
344    return results
345  }
346  
347  // ============================================================================
348  // Upload Functions (BYOC mode)
349  // ============================================================================
350  
351  /**
352   * Result of a file upload operation
353   */
354  export type UploadResult =
355    | {
356        path: string
357        fileId: string
358        size: number
359        success: true
360      }
361    | {
362        path: string
363        error: string
364        success: false
365      }
366  
367  /**
368   * Upload a single file to the Files API (BYOC mode)
369   *
370   * Size validation is performed after reading the file to avoid TOCTOU race
371   * conditions where the file size could change between initial check and upload.
372   *
373   * @param filePath - Absolute path to the file to upload
374   * @param relativePath - Relative path for the file (used as filename in API)
375   * @param config - Files API configuration
376   * @returns Upload result with success/failure status
377   */
378  export async function uploadFile(
379    filePath: string,
380    relativePath: string,
381    config: FilesApiConfig,
382    opts?: { signal?: AbortSignal },
383  ): Promise<UploadResult> {
384    const baseUrl = config.baseUrl || getDefaultApiBaseUrl()
385    const url = `${baseUrl}/v1/files`
386  
387    const headers = {
388      Authorization: `Bearer ${config.oauthToken}`,
389      'anthropic-version': ANTHROPIC_VERSION,
390      'anthropic-beta': FILES_API_BETA_HEADER,
391    }
392  
393    logDebug(`Uploading file ${filePath} as ${relativePath}`)
394  
395    // Read file content first (outside retry loop since it's not a network operation)
396    let content: Buffer
397    try {
398      content = await fs.readFile(filePath)
399    } catch (error) {
400      logEvent('tengu_file_upload_failed', {
401        error_type:
402          'file_read' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
403      })
404      return {
405        path: relativePath,
406        error: errorMessage(error),
407        success: false,
408      }
409    }
410  
411    const fileSize = content.length
412  
413    if (fileSize > MAX_FILE_SIZE_BYTES) {
414      logEvent('tengu_file_upload_failed', {
415        error_type:
416          'file_too_large' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
417      })
418      return {
419        path: relativePath,
420        error: `File exceeds maximum size of ${MAX_FILE_SIZE_BYTES} bytes (actual: ${fileSize})`,
421        success: false,
422      }
423    }
424  
425    // Use crypto.randomUUID for boundary to avoid collisions when uploads start same millisecond
426    const boundary = `----FormBoundary${randomUUID()}`
427    const filename = path.basename(relativePath)
428  
429    // Build the multipart body
430    const bodyParts: Buffer[] = []
431  
432    // File part
433    bodyParts.push(
434      Buffer.from(
435        `--${boundary}\r\n` +
436          `Content-Disposition: form-data; name="file"; filename="${filename}"\r\n` +
437          `Content-Type: application/octet-stream\r\n\r\n`,
438      ),
439    )
440    bodyParts.push(content)
441    bodyParts.push(Buffer.from('\r\n'))
442  
443    // Purpose part
444    bodyParts.push(
445      Buffer.from(
446        `--${boundary}\r\n` +
447          `Content-Disposition: form-data; name="purpose"\r\n\r\n` +
448          `user_data\r\n`,
449      ),
450    )
451  
452    // End boundary
453    bodyParts.push(Buffer.from(`--${boundary}--\r\n`))
454  
455    const body = Buffer.concat(bodyParts)
456  
457    try {
458      return await retryWithBackoff(`Upload file ${relativePath}`, async () => {
459        try {
460          const response = await axios.post(url, body, {
461            headers: {
462              ...headers,
463              'Content-Type': `multipart/form-data; boundary=${boundary}`,
464              'Content-Length': body.length.toString(),
465            },
466            timeout: 120000, // 2 minute timeout for uploads
467            signal: opts?.signal,
468            validateStatus: status => status < 500,
469          })
470  
471          if (response.status === 200 || response.status === 201) {
472            const fileId = response.data?.id
473            if (!fileId) {
474              return {
475                done: false,
476                error: 'Upload succeeded but no file ID returned',
477              }
478            }
479            logDebug(`Uploaded file ${filePath} -> ${fileId} (${fileSize} bytes)`)
480            return {
481              done: true,
482              value: {
483                path: relativePath,
484                fileId,
485                size: fileSize,
486                success: true as const,
487              },
488            }
489          }
490  
491          // Non-retriable errors - throw to exit retry loop
492          if (response.status === 401) {
493            logEvent('tengu_file_upload_failed', {
494              error_type:
495                'auth' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
496            })
497            throw new UploadNonRetriableError(
498              'Authentication failed: invalid or missing API key',
499            )
500          }
501  
502          if (response.status === 403) {
503            logEvent('tengu_file_upload_failed', {
504              error_type:
505                'forbidden' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
506            })
507            throw new UploadNonRetriableError('Access denied for upload')
508          }
509  
510          if (response.status === 413) {
511            logEvent('tengu_file_upload_failed', {
512              error_type:
513                'size' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
514            })
515            throw new UploadNonRetriableError('File too large for upload')
516          }
517  
518          return { done: false, error: `status ${response.status}` }
519        } catch (error) {
520          // Non-retriable errors propagate up
521          if (error instanceof UploadNonRetriableError) {
522            throw error
523          }
524          if (axios.isCancel(error)) {
525            throw new UploadNonRetriableError('Upload canceled')
526          }
527          // Network errors are retriable
528          if (axios.isAxiosError(error)) {
529            return { done: false, error: error.message }
530          }
531          throw error
532        }
533      })
534    } catch (error) {
535      if (error instanceof UploadNonRetriableError) {
536        return {
537          path: relativePath,
538          error: error.message,
539          success: false,
540        }
541      }
542      logEvent('tengu_file_upload_failed', {
543        error_type:
544          'network' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
545      })
546      return {
547        path: relativePath,
548        error: errorMessage(error),
549        success: false,
550      }
551    }
552  }
553  
554  /** Error class for non-retriable upload failures */
555  class UploadNonRetriableError extends Error {
556    constructor(message: string) {
557      super(message)
558      this.name = 'UploadNonRetriableError'
559    }
560  }
561  
562  /**
563   * Upload multiple files in parallel with concurrency limit (BYOC mode)
564   *
565   * @param files - Array of files to upload (path and relativePath)
566   * @param config - Files API configuration
567   * @param concurrency - Maximum concurrent uploads (default: 5)
568   * @returns Array of upload results in the same order as input
569   */
570  export async function uploadSessionFiles(
571    files: Array<{ path: string; relativePath: string }>,
572    config: FilesApiConfig,
573    concurrency: number = DEFAULT_CONCURRENCY,
574  ): Promise<UploadResult[]> {
575    if (files.length === 0) {
576      return []
577    }
578  
579    logDebug(`Uploading ${files.length} file(s) for session ${config.sessionId}`)
580    const startTime = Date.now()
581  
582    const results = await parallelWithLimit(
583      files,
584      file => uploadFile(file.path, file.relativePath, config),
585      concurrency,
586    )
587  
588    const elapsedMs = Date.now() - startTime
589    const successCount = count(results, r => r.success)
590    logDebug(`Uploaded ${successCount}/${files.length} file(s) in ${elapsedMs}ms`)
591  
592    return results
593  }
594  
595  // ============================================================================
596  // List Files Functions (1P/Cloud mode)
597  // ============================================================================
598  
599  /**
600   * File metadata returned from listFilesCreatedAfter
601   */
602  export type FileMetadata = {
603    filename: string
604    fileId: string
605    size: number
606  }
607  
608  /**
609   * List files created after a given timestamp (1P/Cloud mode).
610   * Uses the public GET /v1/files endpoint with after_created_at query param.
611   * Handles pagination via after_id cursor when has_more is true.
612   *
613   * @param afterCreatedAt - ISO 8601 timestamp to filter files created after
614   * @param config - Files API configuration
615   * @returns Array of file metadata for files created after the timestamp
616   */
617  export async function listFilesCreatedAfter(
618    afterCreatedAt: string,
619    config: FilesApiConfig,
620  ): Promise<FileMetadata[]> {
621    const baseUrl = config.baseUrl || getDefaultApiBaseUrl()
622    const headers = {
623      Authorization: `Bearer ${config.oauthToken}`,
624      'anthropic-version': ANTHROPIC_VERSION,
625      'anthropic-beta': FILES_API_BETA_HEADER,
626    }
627  
628    logDebug(`Listing files created after ${afterCreatedAt}`)
629  
630    const allFiles: FileMetadata[] = []
631    let afterId: string | undefined
632  
633    // Paginate through results
634    while (true) {
635      const params: Record<string, string> = {
636        after_created_at: afterCreatedAt,
637      }
638      if (afterId) {
639        params.after_id = afterId
640      }
641  
642      const page = await retryWithBackoff(
643        `List files after ${afterCreatedAt}`,
644        async () => {
645          try {
646            const response = await axios.get(`${baseUrl}/v1/files`, {
647              headers,
648              params,
649              timeout: 60000,
650              validateStatus: status => status < 500,
651            })
652  
653            if (response.status === 200) {
654              return { done: true, value: response.data }
655            }
656  
657            if (response.status === 401) {
658              logEvent('tengu_file_list_failed', {
659                error_type:
660                  'auth' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
661              })
662              throw new Error('Authentication failed: invalid or missing API key')
663            }
664            if (response.status === 403) {
665              logEvent('tengu_file_list_failed', {
666                error_type:
667                  'forbidden' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
668              })
669              throw new Error('Access denied to list files')
670            }
671  
672            return { done: false, error: `status ${response.status}` }
673          } catch (error) {
674            if (!axios.isAxiosError(error)) {
675              throw error
676            }
677            logEvent('tengu_file_list_failed', {
678              error_type:
679                'network' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
680            })
681            return { done: false, error: error.message }
682          }
683        },
684      )
685  
686      const files = page.data || []
687      for (const f of files) {
688        allFiles.push({
689          filename: f.filename,
690          fileId: f.id,
691          size: f.size_bytes,
692        })
693      }
694  
695      if (!page.has_more) {
696        break
697      }
698  
699      // Use the last file's ID as cursor for next page
700      const lastFile = files.at(-1)
701      if (!lastFile?.id) {
702        break
703      }
704      afterId = lastFile.id
705    }
706  
707    logDebug(`Listed ${allFiles.length} files created after ${afterCreatedAt}`)
708    return allFiles
709  }
710  
711  // ============================================================================
712  // Parse Functions
713  // ============================================================================
714  
715  /**
716   * Parse file attachment specs from CLI arguments
717   * Format: <file_id>:<relative_path>
718   *
719   * @param fileSpecs - Array of file spec strings
720   * @returns Parsed file attachments
721   */
722  export function parseFileSpecs(fileSpecs: string[]): File[] {
723    const files: File[] = []
724  
725    // Sandbox-gateway may pass multiple specs as a single space-separated string
726    const expandedSpecs = fileSpecs.flatMap(s => s.split(' ').filter(Boolean))
727  
728    for (const spec of expandedSpecs) {
729      const colonIndex = spec.indexOf(':')
730      if (colonIndex === -1) {
731        continue
732      }
733  
734      const fileId = spec.substring(0, colonIndex)
735      const relativePath = spec.substring(colonIndex + 1)
736  
737      if (!fileId || !relativePath) {
738        logDebugError(
739          `Invalid file spec: ${spec}. Both file_id and path are required`,
740        )
741        continue
742      }
743  
744      files.push({ fileId, relativePath })
745    }
746  
747    return files
748  }