Cradicle Explorer

/ tools / FileReadTool / FileReadTool.ts
FileReadTool.ts
   1  import type { Base64ImageSource } from '@anthropic-ai/sdk/resources/index.mjs'
   2  import { readdir, readFile as readFileAsync } from 'fs/promises'
   3  import * as path from 'path'
   4  import { posix, win32 } from 'path'
   5  import { z } from 'zod/v4'
   6  import {
   7    PDF_AT_MENTION_INLINE_THRESHOLD,
   8    PDF_EXTRACT_SIZE_THRESHOLD,
   9    PDF_MAX_PAGES_PER_READ,
  10  } from '../../constants/apiLimits.js'
  11  import { hasBinaryExtension } from '../../constants/files.js'
  12  import { memoryFreshnessNote } from '../../memdir/memoryAge.js'
  13  import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js'
  14  import { logEvent } from '../../services/analytics/index.js'
  15  import {
  16    type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  17    getFileExtensionForAnalytics,
  18  } from '../../services/analytics/metadata.js'
  19  import {
  20    countTokensWithAPI,
  21    roughTokenCountEstimationForFileType,
  22  } from '../../services/tokenEstimation.js'
  23  import {
  24    activateConditionalSkillsForPaths,
  25    addSkillDirectories,
  26    discoverSkillDirsForPaths,
  27  } from '../../skills/loadSkillsDir.js'
  28  import type { ToolUseContext } from '../../Tool.js'
  29  import { buildTool, type ToolDef } from '../../Tool.js'
  30  import { getCwd } from '../../utils/cwd.js'
  31  import { getClaudeConfigHomeDir, isEnvTruthy } from '../../utils/envUtils.js'
  32  import { getErrnoCode, isENOENT } from '../../utils/errors.js'
  33  import {
  34    addLineNumbers,
  35    FILE_NOT_FOUND_CWD_NOTE,
  36    findSimilarFile,
  37    getFileModificationTimeAsync,
  38    suggestPathUnderCwd,
  39  } from '../../utils/file.js'
  40  import { logFileOperation } from '../../utils/fileOperationAnalytics.js'
  41  import { formatFileSize } from '../../utils/format.js'
  42  import { getFsImplementation } from '../../utils/fsOperations.js'
  43  import {
  44    compressImageBufferWithTokenLimit,
  45    createImageMetadataText,
  46    detectImageFormatFromBuffer,
  47    type ImageDimensions,
  48    ImageResizeError,
  49    maybeResizeAndDownsampleImageBuffer,
  50  } from '../../utils/imageResizer.js'
  51  import { lazySchema } from '../../utils/lazySchema.js'
  52  import { logError } from '../../utils/log.js'
  53  import { isAutoMemFile } from '../../utils/memoryFileDetection.js'
  54  import { createUserMessage } from '../../utils/messages.js'
  55  import { getCanonicalName, getMainLoopModel } from '../../utils/model/model.js'
  56  import {
  57    mapNotebookCellsToToolResult,
  58    readNotebook,
  59  } from '../../utils/notebook.js'
  60  import { expandPath } from '../../utils/path.js'
  61  import { extractPDFPages, getPDFPageCount, readPDF } from '../../utils/pdf.js'
  62  import {
  63    isPDFExtension,
  64    isPDFSupported,
  65    parsePDFPageRange,
  66  } from '../../utils/pdfUtils.js'
  67  import {
  68    checkReadPermissionForTool,
  69    matchingRuleForInput,
  70  } from '../../utils/permissions/filesystem.js'
  71  import type { PermissionDecision } from '../../utils/permissions/PermissionResult.js'
  72  import { matchWildcardPattern } from '../../utils/permissions/shellRuleMatching.js'
  73  import { readFileInRange } from '../../utils/readFileInRange.js'
  74  import { semanticNumber } from '../../utils/semanticNumber.js'
  75  import { jsonStringify } from '../../utils/slowOperations.js'
  76  import { BASH_TOOL_NAME } from '../BashTool/toolName.js'
  77  import { getDefaultFileReadingLimits } from './limits.js'
  78  import {
  79    DESCRIPTION,
  80    FILE_READ_TOOL_NAME,
  81    FILE_UNCHANGED_STUB,
  82    LINE_FORMAT_INSTRUCTION,
  83    OFFSET_INSTRUCTION_DEFAULT,
  84    OFFSET_INSTRUCTION_TARGETED,
  85    renderPromptTemplate,
  86  } from './prompt.js'
  87  import {
  88    getToolUseSummary,
  89    renderToolResultMessage,
  90    renderToolUseErrorMessage,
  91    renderToolUseMessage,
  92    renderToolUseTag,
  93    userFacingName,
  94  } from './UI.js'
  95  
  96  // Device files that would hang the process: infinite output or blocking input.
  97  // Checked by path only (no I/O). Safe devices like /dev/null are intentionally omitted.
  98  const BLOCKED_DEVICE_PATHS = new Set([
  99    // Infinite output — never reach EOF
 100    '/dev/zero',
 101    '/dev/random',
 102    '/dev/urandom',
 103    '/dev/full',
 104    // Blocks waiting for input
 105    '/dev/stdin',
 106    '/dev/tty',
 107    '/dev/console',
 108    // Nonsensical to read
 109    '/dev/stdout',
 110    '/dev/stderr',
 111    // fd aliases for stdin/stdout/stderr
 112    '/dev/fd/0',
 113    '/dev/fd/1',
 114    '/dev/fd/2',
 115  ])
 116  
 117  function isBlockedDevicePath(filePath: string): boolean {
 118    if (BLOCKED_DEVICE_PATHS.has(filePath)) return true
 119    // /proc/self/fd/0-2 and /proc/<pid>/fd/0-2 are Linux aliases for stdio
 120    if (
 121      filePath.startsWith('/proc/') &&
 122      (filePath.endsWith('/fd/0') ||
 123        filePath.endsWith('/fd/1') ||
 124        filePath.endsWith('/fd/2'))
 125    )
 126      return true
 127    return false
 128  }
 129  
 130  // Narrow no-break space (U+202F) used by some macOS versions in screenshot filenames
 131  const THIN_SPACE = String.fromCharCode(8239)
 132  
 133  /**
 134   * Resolves macOS screenshot paths that may have different space characters.
 135   * macOS uses either regular space or thin space (U+202F) before AM/PM in screenshot
 136   * filenames depending on the macOS version. This function tries the alternate space
 137   * character if the file doesn't exist with the given path.
 138   *
 139   * @param filePath - The normalized file path to resolve
 140   * @returns The path to the actual file on disk (may differ in space character)
 141   */
 142  /**
 143   * For macOS screenshot paths with AM/PM, the space before AM/PM may be a
 144   * regular space or a thin space depending on the macOS version.  Returns
 145   * the alternate path to try if the original doesn't exist, or undefined.
 146   */
 147  function getAlternateScreenshotPath(filePath: string): string | undefined {
 148    const filename = path.basename(filePath)
 149    const amPmPattern = /^(.+)([ \u202F])(AM|PM)(\.png)$/
 150    const match = filename.match(amPmPattern)
 151    if (!match) return undefined
 152  
 153    const currentSpace = match[2]
 154    const alternateSpace = currentSpace === ' ' ? THIN_SPACE : ' '
 155    return filePath.replace(
 156      `${currentSpace}${match[3]}${match[4]}`,
 157      `${alternateSpace}${match[3]}${match[4]}`,
 158    )
 159  }
 160  
 161  // File read listeners - allows other services to be notified when files are read
 162  type FileReadListener = (filePath: string, content: string) => void
 163  const fileReadListeners: FileReadListener[] = []
 164  
 165  export function registerFileReadListener(
 166    listener: FileReadListener,
 167  ): () => void {
 168    fileReadListeners.push(listener)
 169    return () => {
 170      const i = fileReadListeners.indexOf(listener)
 171      if (i >= 0) fileReadListeners.splice(i, 1)
 172    }
 173  }
 174  
 175  export class MaxFileReadTokenExceededError extends Error {
 176    constructor(
 177      public tokenCount: number,
 178      public maxTokens: number,
 179    ) {
 180      super(
 181        `File content (${tokenCount} tokens) exceeds maximum allowed tokens (${maxTokens}). Use offset and limit parameters to read specific portions of the file, or search for specific content instead of reading the whole file.`,
 182      )
 183      this.name = 'MaxFileReadTokenExceededError'
 184    }
 185  }
 186  
 187  // Common image extensions
 188  const IMAGE_EXTENSIONS = new Set(['png', 'jpg', 'jpeg', 'gif', 'webp'])
 189  
 190  /**
 191   * Detects if a file path is a session-related file for analytics logging.
 192   * Only matches files within the Claude config directory (e.g., ~/.claude).
 193   * Returns the type of session file or null if not a session file.
 194   */
 195  function detectSessionFileType(
 196    filePath: string,
 197  ): 'session_memory' | 'session_transcript' | null {
 198    const configDir = getClaudeConfigHomeDir()
 199  
 200    // Only match files within the Claude config directory
 201    if (!filePath.startsWith(configDir)) {
 202      return null
 203    }
 204  
 205    // Normalize path to use forward slashes for consistent matching across platforms
 206    const normalizedPath = filePath.split(win32.sep).join(posix.sep)
 207  
 208    // Session memory files: ~/.claude/session-memory/*.md (including summary.md)
 209    if (
 210      normalizedPath.includes('/session-memory/') &&
 211      normalizedPath.endsWith('.md')
 212    ) {
 213      return 'session_memory'
 214    }
 215  
 216    // Session JSONL transcript files: ~/.claude/projects/*/*.jsonl
 217    if (
 218      normalizedPath.includes('/projects/') &&
 219      normalizedPath.endsWith('.jsonl')
 220    ) {
 221      return 'session_transcript'
 222    }
 223  
 224    return null
 225  }
 226  
 227  const inputSchema = lazySchema(() =>
 228    z.strictObject({
 229      file_path: z.string().describe('The absolute path to the file to read'),
 230      offset: semanticNumber(z.number().int().nonnegative().optional()).describe(
 231        'The line number to start reading from. Only provide if the file is too large to read at once',
 232      ),
 233      limit: semanticNumber(z.number().int().positive().optional()).describe(
 234        'The number of lines to read. Only provide if the file is too large to read at once.',
 235      ),
 236      pages: z
 237        .string()
 238        .optional()
 239        .describe(
 240          `Page range for PDF files (e.g., "1-5", "3", "10-20"). Only applicable to PDF files. Maximum ${PDF_MAX_PAGES_PER_READ} pages per request.`,
 241        ),
 242    }),
 243  )
 244  type InputSchema = ReturnType<typeof inputSchema>
 245  
 246  export type Input = z.infer<InputSchema>
 247  
 248  const outputSchema = lazySchema(() => {
 249    // Define the media types supported for images
 250    const imageMediaTypes = z.enum([
 251      'image/jpeg',
 252      'image/png',
 253      'image/gif',
 254      'image/webp',
 255    ])
 256  
 257    return z.discriminatedUnion('type', [
 258      z.object({
 259        type: z.literal('text'),
 260        file: z.object({
 261          filePath: z.string().describe('The path to the file that was read'),
 262          content: z.string().describe('The content of the file'),
 263          numLines: z
 264            .number()
 265            .describe('Number of lines in the returned content'),
 266          startLine: z.number().describe('The starting line number'),
 267          totalLines: z.number().describe('Total number of lines in the file'),
 268        }),
 269      }),
 270      z.object({
 271        type: z.literal('image'),
 272        file: z.object({
 273          base64: z.string().describe('Base64-encoded image data'),
 274          type: imageMediaTypes.describe('The MIME type of the image'),
 275          originalSize: z.number().describe('Original file size in bytes'),
 276          dimensions: z
 277            .object({
 278              originalWidth: z
 279                .number()
 280                .optional()
 281                .describe('Original image width in pixels'),
 282              originalHeight: z
 283                .number()
 284                .optional()
 285                .describe('Original image height in pixels'),
 286              displayWidth: z
 287                .number()
 288                .optional()
 289                .describe('Displayed image width in pixels (after resizing)'),
 290              displayHeight: z
 291                .number()
 292                .optional()
 293                .describe('Displayed image height in pixels (after resizing)'),
 294            })
 295            .optional()
 296            .describe('Image dimension info for coordinate mapping'),
 297        }),
 298      }),
 299      z.object({
 300        type: z.literal('notebook'),
 301        file: z.object({
 302          filePath: z.string().describe('The path to the notebook file'),
 303          cells: z.array(z.any()).describe('Array of notebook cells'),
 304        }),
 305      }),
 306      z.object({
 307        type: z.literal('pdf'),
 308        file: z.object({
 309          filePath: z.string().describe('The path to the PDF file'),
 310          base64: z.string().describe('Base64-encoded PDF data'),
 311          originalSize: z.number().describe('Original file size in bytes'),
 312        }),
 313      }),
 314      z.object({
 315        type: z.literal('parts'),
 316        file: z.object({
 317          filePath: z.string().describe('The path to the PDF file'),
 318          originalSize: z.number().describe('Original file size in bytes'),
 319          count: z.number().describe('Number of pages extracted'),
 320          outputDir: z
 321            .string()
 322            .describe('Directory containing extracted page images'),
 323        }),
 324      }),
 325      z.object({
 326        type: z.literal('file_unchanged'),
 327        file: z.object({
 328          filePath: z.string().describe('The path to the file'),
 329        }),
 330      }),
 331    ])
 332  })
 333  type OutputSchema = ReturnType<typeof outputSchema>
 334  
 335  export type Output = z.infer<OutputSchema>
 336  
 337  export const FileReadTool = buildTool({
 338    name: FILE_READ_TOOL_NAME,
 339    searchHint: 'read files, images, PDFs, notebooks',
 340    // Output is bounded by maxTokens (validateContentTokens). Persisting to a
 341    // file the model reads back with Read is circular — never persist.
 342    maxResultSizeChars: Infinity,
 343    strict: true,
 344    async description() {
 345      return DESCRIPTION
 346    },
 347    async prompt() {
 348      const limits = getDefaultFileReadingLimits()
 349      const maxSizeInstruction = limits.includeMaxSizeInPrompt
 350        ? `. Files larger than ${formatFileSize(limits.maxSizeBytes)} will return an error; use offset and limit for larger files`
 351        : ''
 352      const offsetInstruction = limits.targetedRangeNudge
 353        ? OFFSET_INSTRUCTION_TARGETED
 354        : OFFSET_INSTRUCTION_DEFAULT
 355      return renderPromptTemplate(
 356        pickLineFormatInstruction(),
 357        maxSizeInstruction,
 358        offsetInstruction,
 359      )
 360    },
 361    get inputSchema(): InputSchema {
 362      return inputSchema()
 363    },
 364    get outputSchema(): OutputSchema {
 365      return outputSchema()
 366    },
 367    userFacingName,
 368    getToolUseSummary,
 369    getActivityDescription(input) {
 370      const summary = getToolUseSummary(input)
 371      return summary ? `Reading ${summary}` : 'Reading file'
 372    },
 373    isConcurrencySafe() {
 374      return true
 375    },
 376    isReadOnly() {
 377      return true
 378    },
 379    toAutoClassifierInput(input) {
 380      return input.file_path
 381    },
 382    isSearchOrReadCommand() {
 383      return { isSearch: false, isRead: true }
 384    },
 385    getPath({ file_path }): string {
 386      return file_path || getCwd()
 387    },
 388    backfillObservableInput(input) {
 389      // hooks.mdx documents file_path as absolute; expand so hook allowlists
 390      // can't be bypassed via ~ or relative paths.
 391      if (typeof input.file_path === 'string') {
 392        input.file_path = expandPath(input.file_path)
 393      }
 394    },
 395    async preparePermissionMatcher({ file_path }) {
 396      return pattern => matchWildcardPattern(pattern, file_path)
 397    },
 398    async checkPermissions(input, context): Promise<PermissionDecision> {
 399      const appState = context.getAppState()
 400      return checkReadPermissionForTool(
 401        FileReadTool,
 402        input,
 403        appState.toolPermissionContext,
 404      )
 405    },
 406    renderToolUseMessage,
 407    renderToolUseTag,
 408    renderToolResultMessage,
 409    // UI.tsx:140 — ALL types render summary chrome only: "Read N lines",
 410    // "Read image (42KB)". Never the content itself. The model-facing
 411    // serialization (below) sends content + CYBER_RISK_MITIGATION_REMINDER
 412    // + line prefixes; UI shows none of it. Nothing to index. Caught by
 413    // the render-fidelity test when this initially claimed file.content.
 414    extractSearchText() {
 415      return ''
 416    },
 417    renderToolUseErrorMessage,
 418    async validateInput({ file_path, pages }, toolUseContext: ToolUseContext) {
 419      // Validate pages parameter (pure string parsing, no I/O)
 420      if (pages !== undefined) {
 421        const parsed = parsePDFPageRange(pages)
 422        if (!parsed) {
 423          return {
 424            result: false,
 425            message: `Invalid pages parameter: "${pages}". Use formats like "1-5", "3", or "10-20". Pages are 1-indexed.`,
 426            errorCode: 7,
 427          }
 428        }
 429        const rangeSize =
 430          parsed.lastPage === Infinity
 431            ? PDF_MAX_PAGES_PER_READ + 1
 432            : parsed.lastPage - parsed.firstPage + 1
 433        if (rangeSize > PDF_MAX_PAGES_PER_READ) {
 434          return {
 435            result: false,
 436            message: `Page range "${pages}" exceeds maximum of ${PDF_MAX_PAGES_PER_READ} pages per request. Please use a smaller range.`,
 437            errorCode: 8,
 438          }
 439        }
 440      }
 441  
 442      // Path expansion + deny rule check (no I/O)
 443      const fullFilePath = expandPath(file_path)
 444  
 445      const appState = toolUseContext.getAppState()
 446      const denyRule = matchingRuleForInput(
 447        fullFilePath,
 448        appState.toolPermissionContext,
 449        'read',
 450        'deny',
 451      )
 452      if (denyRule !== null) {
 453        return {
 454          result: false,
 455          message:
 456            'File is in a directory that is denied by your permission settings.',
 457          errorCode: 1,
 458        }
 459      }
 460  
 461      // SECURITY: UNC path check (no I/O) — defer filesystem operations
 462      // until after user grants permission to prevent NTLM credential leaks
 463      const isUncPath =
 464        fullFilePath.startsWith('\\\\') || fullFilePath.startsWith('//')
 465      if (isUncPath) {
 466        return { result: true }
 467      }
 468  
 469      // Binary extension check (string check on extension only, no I/O).
 470      // PDF, images, and SVG are excluded - this tool renders them natively.
 471      const ext = path.extname(fullFilePath).toLowerCase()
 472      if (
 473        hasBinaryExtension(fullFilePath) &&
 474        !isPDFExtension(ext) &&
 475        !IMAGE_EXTENSIONS.has(ext.slice(1))
 476      ) {
 477        return {
 478          result: false,
 479          message: `This tool cannot read binary files. The file appears to be a binary ${ext} file. Please use appropriate tools for binary file analysis.`,
 480          errorCode: 4,
 481        }
 482      }
 483  
 484      // Block specific device files that would hang (infinite output or blocking input).
 485      // This is a path-based check with no I/O — safe special files like /dev/null are allowed.
 486      if (isBlockedDevicePath(fullFilePath)) {
 487        return {
 488          result: false,
 489          message: `Cannot read '${file_path}': this device file would block or produce infinite output.`,
 490          errorCode: 9,
 491        }
 492      }
 493  
 494      return { result: true }
 495    },
 496    async call(
 497      { file_path, offset = 1, limit = undefined, pages },
 498      context,
 499      _canUseTool?,
 500      parentMessage?,
 501    ) {
 502      const { readFileState, fileReadingLimits } = context
 503  
 504      const defaults = getDefaultFileReadingLimits()
 505      const maxSizeBytes =
 506        fileReadingLimits?.maxSizeBytes ?? defaults.maxSizeBytes
 507      const maxTokens = fileReadingLimits?.maxTokens ?? defaults.maxTokens
 508  
 509      // Telemetry: track when callers override default read limits.
 510      // Only fires on override (low volume) — event count = override frequency.
 511      if (fileReadingLimits !== undefined) {
 512        logEvent('tengu_file_read_limits_override', {
 513          hasMaxTokens: fileReadingLimits.maxTokens !== undefined,
 514          hasMaxSizeBytes: fileReadingLimits.maxSizeBytes !== undefined,
 515        })
 516      }
 517  
 518      const ext = path.extname(file_path).toLowerCase().slice(1)
 519      // Use expandPath for consistent path normalization with FileEditTool/FileWriteTool
 520      // (especially handles whitespace trimming and Windows path separators)
 521      const fullFilePath = expandPath(file_path)
 522  
 523      // Dedup: if we've already read this exact range and the file hasn't
 524      // changed on disk, return a stub instead of re-sending the full content.
 525      // The earlier Read tool_result is still in context — two full copies
 526      // waste cache_creation tokens on every subsequent turn. BQ proxy shows
 527      // ~18% of Read calls are same-file collisions (up to 2.64% of fleet
 528      // cache_creation). Only applies to text/notebook reads — images/PDFs
 529      // aren't cached in readFileState so won't match here.
 530      //
 531      // Ant soak: 1,734 dedup hits in 2h, no Read error regression.
 532      // Killswitch pattern: GB can disable if the stub message confuses
 533      // the model externally.
 534      // 3P default: killswitch off = dedup enabled. Client-side only — no
 535      // server support needed, safe for Bedrock/Vertex/Foundry.
 536      const dedupKillswitch = getFeatureValue_CACHED_MAY_BE_STALE(
 537        'tengu_read_dedup_killswitch',
 538        false,
 539      )
 540      const existingState = dedupKillswitch
 541        ? undefined
 542        : readFileState.get(fullFilePath)
 543      // Only dedup entries that came from a prior Read (offset is always set
 544      // by Read). Edit/Write store offset=undefined — their readFileState
 545      // entry reflects post-edit mtime, so deduping against it would wrongly
 546      // point the model at the pre-edit Read content.
 547      if (
 548        existingState &&
 549        !existingState.isPartialView &&
 550        existingState.offset !== undefined
 551      ) {
 552        const rangeMatch =
 553          existingState.offset === offset && existingState.limit === limit
 554        if (rangeMatch) {
 555          try {
 556            const mtimeMs = await getFileModificationTimeAsync(fullFilePath)
 557            if (mtimeMs === existingState.timestamp) {
 558              const analyticsExt = getFileExtensionForAnalytics(fullFilePath)
 559              logEvent('tengu_file_read_dedup', {
 560                ...(analyticsExt !== undefined && { ext: analyticsExt }),
 561              })
 562              return {
 563                data: {
 564                  type: 'file_unchanged' as const,
 565                  file: { filePath: file_path },
 566                },
 567              }
 568            }
 569          } catch {
 570            // stat failed — fall through to full read
 571          }
 572        }
 573      }
 574  
 575      // Discover skills from this file's path (fire-and-forget, non-blocking)
 576      // Skip in simple mode - no skills available
 577      const cwd = getCwd()
 578      if (!isEnvTruthy(process.env.CLAUDE_CODE_SIMPLE)) {
 579        const newSkillDirs = await discoverSkillDirsForPaths([fullFilePath], cwd)
 580        if (newSkillDirs.length > 0) {
 581          // Store discovered dirs for attachment display
 582          for (const dir of newSkillDirs) {
 583            context.dynamicSkillDirTriggers?.add(dir)
 584          }
 585          // Don't await - let skill loading happen in the background
 586          addSkillDirectories(newSkillDirs).catch(() => {})
 587        }
 588  
 589        // Activate conditional skills whose path patterns match this file
 590        activateConditionalSkillsForPaths([fullFilePath], cwd)
 591      }
 592  
 593      try {
 594        return await callInner(
 595          file_path,
 596          fullFilePath,
 597          fullFilePath,
 598          ext,
 599          offset,
 600          limit,
 601          pages,
 602          maxSizeBytes,
 603          maxTokens,
 604          readFileState,
 605          context,
 606          parentMessage?.message.id,
 607        )
 608      } catch (error) {
 609        // Handle file-not-found: suggest similar files
 610        const code = getErrnoCode(error)
 611        if (code === 'ENOENT') {
 612          // macOS screenshots may use a thin space or regular space before
 613          // AM/PM — try the alternate before giving up.
 614          const altPath = getAlternateScreenshotPath(fullFilePath)
 615          if (altPath) {
 616            try {
 617              return await callInner(
 618                file_path,
 619                fullFilePath,
 620                altPath,
 621                ext,
 622                offset,
 623                limit,
 624                pages,
 625                maxSizeBytes,
 626                maxTokens,
 627                readFileState,
 628                context,
 629                parentMessage?.message.id,
 630              )
 631            } catch (altError) {
 632              if (!isENOENT(altError)) {
 633                throw altError
 634              }
 635              // Alt path also missing — fall through to friendly error
 636            }
 637          }
 638  
 639          const similarFilename = findSimilarFile(fullFilePath)
 640          const cwdSuggestion = await suggestPathUnderCwd(fullFilePath)
 641          let message = `File does not exist. ${FILE_NOT_FOUND_CWD_NOTE} ${getCwd()}.`
 642          if (cwdSuggestion) {
 643            message += ` Did you mean ${cwdSuggestion}?`
 644          } else if (similarFilename) {
 645            message += ` Did you mean ${similarFilename}?`
 646          }
 647          throw new Error(message)
 648        }
 649        throw error
 650      }
 651    },
 652    mapToolResultToToolResultBlockParam(data, toolUseID) {
 653      switch (data.type) {
 654        case 'image': {
 655          return {
 656            tool_use_id: toolUseID,
 657            type: 'tool_result',
 658            content: [
 659              {
 660                type: 'image',
 661                source: {
 662                  type: 'base64',
 663                  data: data.file.base64,
 664                  media_type: data.file.type,
 665                },
 666              },
 667            ],
 668          }
 669        }
 670        case 'notebook':
 671          return mapNotebookCellsToToolResult(data.file.cells, toolUseID)
 672        case 'pdf':
 673          // Return PDF metadata only - the actual content is sent as a supplemental DocumentBlockParam
 674          return {
 675            tool_use_id: toolUseID,
 676            type: 'tool_result',
 677            content: `PDF file read: ${data.file.filePath} (${formatFileSize(data.file.originalSize)})`,
 678          }
 679        case 'parts':
 680          // Extracted page images are read and sent as image blocks in mapToolResultToAPIMessage
 681          return {
 682            tool_use_id: toolUseID,
 683            type: 'tool_result',
 684            content: `PDF pages extracted: ${data.file.count} page(s) from ${data.file.filePath} (${formatFileSize(data.file.originalSize)})`,
 685          }
 686        case 'file_unchanged':
 687          return {
 688            tool_use_id: toolUseID,
 689            type: 'tool_result',
 690            content: FILE_UNCHANGED_STUB,
 691          }
 692        case 'text': {
 693          let content: string
 694  
 695          if (data.file.content) {
 696            content =
 697              memoryFileFreshnessPrefix(data) +
 698              formatFileLines(data.file) +
 699              (shouldIncludeFileReadMitigation()
 700                ? CYBER_RISK_MITIGATION_REMINDER
 701                : '')
 702          } else {
 703            // Determine the appropriate warning message
 704            content =
 705              data.file.totalLines === 0
 706                ? '<system-reminder>Warning: the file exists but the contents are empty.</system-reminder>'
 707                : `<system-reminder>Warning: the file exists but is shorter than the provided offset (${data.file.startLine}). The file has ${data.file.totalLines} lines.</system-reminder>`
 708          }
 709  
 710          return {
 711            tool_use_id: toolUseID,
 712            type: 'tool_result',
 713            content,
 714          }
 715        }
 716      }
 717    },
 718  } satisfies ToolDef<InputSchema, Output>)
 719  
 720  function pickLineFormatInstruction(): string {
 721    return LINE_FORMAT_INSTRUCTION
 722  }
 723  
 724  /** Format file content with line numbers. */
 725  function formatFileLines(file: { content: string; startLine: number }): string {
 726    return addLineNumbers(file)
 727  }
 728  
 729  export const CYBER_RISK_MITIGATION_REMINDER =
 730    '\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n'
 731  
 732  // Models where cyber risk mitigation should be skipped
 733  const MITIGATION_EXEMPT_MODELS = new Set(['claude-opus-4-6'])
 734  
 735  function shouldIncludeFileReadMitigation(): boolean {
 736    const shortName = getCanonicalName(getMainLoopModel())
 737    return !MITIGATION_EXEMPT_MODELS.has(shortName)
 738  }
 739  
 740  /**
 741   * Side-channel from call() to mapToolResultToToolResultBlockParam: mtime
 742   * of auto-memory files, keyed by the `data` object identity. Avoids
 743   * adding a presentation-only field to the output schema (which flows
 744   * into SDK types) and avoids sync fs in the mapper. WeakMap auto-GCs
 745   * when the data object becomes unreachable after rendering.
 746   */
 747  const memoryFileMtimes = new WeakMap<object, number>()
 748  
 749  function memoryFileFreshnessPrefix(data: object): string {
 750    const mtimeMs = memoryFileMtimes.get(data)
 751    if (mtimeMs === undefined) return ''
 752    return memoryFreshnessNote(mtimeMs)
 753  }
 754  
 755  async function validateContentTokens(
 756    content: string,
 757    ext: string,
 758    maxTokens?: number,
 759  ): Promise<void> {
 760    const effectiveMaxTokens =
 761      maxTokens ?? getDefaultFileReadingLimits().maxTokens
 762  
 763    const tokenEstimate = roughTokenCountEstimationForFileType(content, ext)
 764    if (!tokenEstimate || tokenEstimate <= effectiveMaxTokens / 4) return
 765  
 766    const tokenCount = await countTokensWithAPI(content)
 767    const effectiveCount = tokenCount ?? tokenEstimate
 768  
 769    if (effectiveCount > effectiveMaxTokens) {
 770      throw new MaxFileReadTokenExceededError(effectiveCount, effectiveMaxTokens)
 771    }
 772  }
 773  
 774  type ImageResult = {
 775    type: 'image'
 776    file: {
 777      base64: string
 778      type: Base64ImageSource['media_type']
 779      originalSize: number
 780      dimensions?: ImageDimensions
 781    }
 782  }
 783  
 784  function createImageResponse(
 785    buffer: Buffer,
 786    mediaType: string,
 787    originalSize: number,
 788    dimensions?: ImageDimensions,
 789  ): ImageResult {
 790    return {
 791      type: 'image',
 792      file: {
 793        base64: buffer.toString('base64'),
 794        type: `image/${mediaType}` as Base64ImageSource['media_type'],
 795        originalSize,
 796        dimensions,
 797      },
 798    }
 799  }
 800  
 801  /**
 802   * Inner implementation of call, separated to allow ENOENT handling in the outer call.
 803   */
 804  async function callInner(
 805    file_path: string,
 806    fullFilePath: string,
 807    resolvedFilePath: string,
 808    ext: string,
 809    offset: number,
 810    limit: number | undefined,
 811    pages: string | undefined,
 812    maxSizeBytes: number,
 813    maxTokens: number,
 814    readFileState: ToolUseContext['readFileState'],
 815    context: ToolUseContext,
 816    messageId: string | undefined,
 817  ): Promise<{
 818    data: Output
 819    newMessages?: ReturnType<typeof createUserMessage>[]
 820  }> {
 821    // --- Notebook ---
 822    if (ext === 'ipynb') {
 823      const cells = await readNotebook(resolvedFilePath)
 824      const cellsJson = jsonStringify(cells)
 825  
 826      const cellsJsonBytes = Buffer.byteLength(cellsJson)
 827      if (cellsJsonBytes > maxSizeBytes) {
 828        throw new Error(
 829          `Notebook content (${formatFileSize(cellsJsonBytes)}) exceeds maximum allowed size (${formatFileSize(maxSizeBytes)}). ` +
 830            `Use ${BASH_TOOL_NAME} with jq to read specific portions:\n` +
 831            `  cat "${file_path}" | jq '.cells[:20]' # First 20 cells\n` +
 832            `  cat "${file_path}" | jq '.cells[100:120]' # Cells 100-120\n` +
 833            `  cat "${file_path}" | jq '.cells | length' # Count total cells\n` +
 834            `  cat "${file_path}" | jq '.cells[] | select(.cell_type=="code") | .source' # All code sources`,
 835        )
 836      }
 837  
 838      await validateContentTokens(cellsJson, ext, maxTokens)
 839  
 840      // Get mtime via async stat (single call, no prior existence check)
 841      const stats = await getFsImplementation().stat(resolvedFilePath)
 842      readFileState.set(fullFilePath, {
 843        content: cellsJson,
 844        timestamp: Math.floor(stats.mtimeMs),
 845        offset,
 846        limit,
 847      })
 848      context.nestedMemoryAttachmentTriggers?.add(fullFilePath)
 849  
 850      const data = {
 851        type: 'notebook' as const,
 852        file: { filePath: file_path, cells },
 853      }
 854  
 855      logFileOperation({
 856        operation: 'read',
 857        tool: 'FileReadTool',
 858        filePath: fullFilePath,
 859        content: cellsJson,
 860      })
 861  
 862      return { data }
 863    }
 864  
 865    // --- Image (single read, no double-read) ---
 866    if (IMAGE_EXTENSIONS.has(ext)) {
 867      // Images have their own size limits (token budget + compression) —
 868      // don't apply the text maxSizeBytes cap.
 869      const data = await readImageWithTokenBudget(resolvedFilePath, maxTokens)
 870      context.nestedMemoryAttachmentTriggers?.add(fullFilePath)
 871  
 872      logFileOperation({
 873        operation: 'read',
 874        tool: 'FileReadTool',
 875        filePath: fullFilePath,
 876        content: data.file.base64,
 877      })
 878  
 879      const metadataText = data.file.dimensions
 880        ? createImageMetadataText(data.file.dimensions)
 881        : null
 882  
 883      return {
 884        data,
 885        ...(metadataText && {
 886          newMessages: [
 887            createUserMessage({ content: metadataText, isMeta: true }),
 888          ],
 889        }),
 890      }
 891    }
 892  
 893    // --- PDF ---
 894    if (isPDFExtension(ext)) {
 895      if (pages) {
 896        const parsedRange = parsePDFPageRange(pages)
 897        const extractResult = await extractPDFPages(
 898          resolvedFilePath,
 899          parsedRange ?? undefined,
 900        )
 901        if (!extractResult.success) {
 902          throw new Error(extractResult.error.message)
 903        }
 904        logEvent('tengu_pdf_page_extraction', {
 905          success: true,
 906          pageCount: extractResult.data.file.count,
 907          fileSize: extractResult.data.file.originalSize,
 908          hasPageRange: true,
 909        })
 910        logFileOperation({
 911          operation: 'read',
 912          tool: 'FileReadTool',
 913          filePath: fullFilePath,
 914          content: `PDF pages ${pages}`,
 915        })
 916        const entries = await readdir(extractResult.data.file.outputDir)
 917        const imageFiles = entries.filter(f => f.endsWith('.jpg')).sort()
 918        const imageBlocks = await Promise.all(
 919          imageFiles.map(async f => {
 920            const imgPath = path.join(extractResult.data.file.outputDir, f)
 921            const imgBuffer = await readFileAsync(imgPath)
 922            const resized = await maybeResizeAndDownsampleImageBuffer(
 923              imgBuffer,
 924              imgBuffer.length,
 925              'jpeg',
 926            )
 927            return {
 928              type: 'image' as const,
 929              source: {
 930                type: 'base64' as const,
 931                media_type:
 932                  `image/${resized.mediaType}` as Base64ImageSource['media_type'],
 933                data: resized.buffer.toString('base64'),
 934              },
 935            }
 936          }),
 937        )
 938        return {
 939          data: extractResult.data,
 940          ...(imageBlocks.length > 0 && {
 941            newMessages: [
 942              createUserMessage({ content: imageBlocks, isMeta: true }),
 943            ],
 944          }),
 945        }
 946      }
 947  
 948      const pageCount = await getPDFPageCount(resolvedFilePath)
 949      if (pageCount !== null && pageCount > PDF_AT_MENTION_INLINE_THRESHOLD) {
 950        throw new Error(
 951          `This PDF has ${pageCount} pages, which is too many to read at once. ` +
 952            `Use the pages parameter to read specific page ranges (e.g., pages: "1-5"). ` +
 953            `Maximum ${PDF_MAX_PAGES_PER_READ} pages per request.`,
 954        )
 955      }
 956  
 957      const fs = getFsImplementation()
 958      const stats = await fs.stat(resolvedFilePath)
 959      const shouldExtractPages =
 960        !isPDFSupported() || stats.size > PDF_EXTRACT_SIZE_THRESHOLD
 961  
 962      if (shouldExtractPages) {
 963        const extractResult = await extractPDFPages(resolvedFilePath)
 964        if (extractResult.success) {
 965          logEvent('tengu_pdf_page_extraction', {
 966            success: true,
 967            pageCount: extractResult.data.file.count,
 968            fileSize: extractResult.data.file.originalSize,
 969          })
 970        } else {
 971          logEvent('tengu_pdf_page_extraction', {
 972            success: false,
 973            available: extractResult.error.reason !== 'unavailable',
 974            fileSize: stats.size,
 975          })
 976        }
 977      }
 978  
 979      if (!isPDFSupported()) {
 980        throw new Error(
 981          'Reading full PDFs is not supported with this model. Use a newer model (Sonnet 3.5 v2 or later), ' +
 982            `or use the pages parameter to read specific page ranges (e.g., pages: "1-5", maximum ${PDF_MAX_PAGES_PER_READ} pages per request). ` +
 983            'Page extraction requires poppler-utils: install with `brew install poppler` on macOS or `apt-get install poppler-utils` on Debian/Ubuntu.',
 984        )
 985      }
 986  
 987      const readResult = await readPDF(resolvedFilePath)
 988      if (!readResult.success) {
 989        throw new Error(readResult.error.message)
 990      }
 991      const pdfData = readResult.data
 992      logFileOperation({
 993        operation: 'read',
 994        tool: 'FileReadTool',
 995        filePath: fullFilePath,
 996        content: pdfData.file.base64,
 997      })
 998  
 999      return {
1000        data: pdfData,
1001        newMessages: [
1002          createUserMessage({
1003            content: [
1004              {
1005                type: 'document',
1006                source: {
1007                  type: 'base64',
1008                  media_type: 'application/pdf',
1009                  data: pdfData.file.base64,
1010                },
1011              },
1012            ],
1013            isMeta: true,
1014          }),
1015        ],
1016      }
1017    }
1018  
1019    // --- Text file (single async read via readFileInRange) ---
1020    const lineOffset = offset === 0 ? 0 : offset - 1
1021    const { content, lineCount, totalLines, totalBytes, readBytes, mtimeMs } =
1022      await readFileInRange(
1023        resolvedFilePath,
1024        lineOffset,
1025        limit,
1026        limit === undefined ? maxSizeBytes : undefined,
1027        context.abortController.signal,
1028      )
1029  
1030    await validateContentTokens(content, ext, maxTokens)
1031  
1032    readFileState.set(fullFilePath, {
1033      content,
1034      timestamp: Math.floor(mtimeMs),
1035      offset,
1036      limit,
1037    })
1038    context.nestedMemoryAttachmentTriggers?.add(fullFilePath)
1039  
1040    // Snapshot before iterating — a listener that unsubscribes mid-callback
1041    // would splice the live array and skip the next listener.
1042    for (const listener of fileReadListeners.slice()) {
1043      listener(resolvedFilePath, content)
1044    }
1045  
1046    const data = {
1047      type: 'text' as const,
1048      file: {
1049        filePath: file_path,
1050        content,
1051        numLines: lineCount,
1052        startLine: offset,
1053        totalLines,
1054      },
1055    }
1056    if (isAutoMemFile(fullFilePath)) {
1057      memoryFileMtimes.set(data, mtimeMs)
1058    }
1059  
1060    logFileOperation({
1061      operation: 'read',
1062      tool: 'FileReadTool',
1063      filePath: fullFilePath,
1064      content,
1065    })
1066  
1067    const sessionFileType = detectSessionFileType(fullFilePath)
1068    const analyticsExt = getFileExtensionForAnalytics(fullFilePath)
1069    logEvent('tengu_session_file_read', {
1070      totalLines,
1071      readLines: lineCount,
1072      totalBytes,
1073      readBytes,
1074      offset,
1075      ...(limit !== undefined && { limit }),
1076      ...(analyticsExt !== undefined && { ext: analyticsExt }),
1077      ...(messageId !== undefined && {
1078        messageID:
1079          messageId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1080      }),
1081      is_session_memory: sessionFileType === 'session_memory',
1082      is_session_transcript: sessionFileType === 'session_transcript',
1083    })
1084  
1085    return { data }
1086  }
1087  
1088  /**
1089   * Reads an image file and applies token-based compression if needed.
1090   * Reads the file ONCE, then applies standard resize. If the result exceeds
1091   * the token limit, applies aggressive compression from the same buffer.
1092   *
1093   * @param filePath - Path to the image file
1094   * @param maxTokens - Maximum token budget for the image
1095   * @returns Image data with appropriate compression applied
1096   */
1097  export async function readImageWithTokenBudget(
1098    filePath: string,
1099    maxTokens: number = getDefaultFileReadingLimits().maxTokens,
1100    maxBytes?: number,
1101  ): Promise<ImageResult> {
1102    // Read file ONCE — capped to maxBytes to avoid OOM on huge files
1103    const imageBuffer = await getFsImplementation().readFileBytes(
1104      filePath,
1105      maxBytes,
1106    )
1107    const originalSize = imageBuffer.length
1108  
1109    if (originalSize === 0) {
1110      throw new Error(`Image file is empty: ${filePath}`)
1111    }
1112  
1113    const detectedMediaType = detectImageFormatFromBuffer(imageBuffer)
1114    const detectedFormat = detectedMediaType.split('/')[1] || 'png'
1115  
1116    // Try standard resize
1117    let result: ImageResult
1118    try {
1119      const resized = await maybeResizeAndDownsampleImageBuffer(
1120        imageBuffer,
1121        originalSize,
1122        detectedFormat,
1123      )
1124      result = createImageResponse(
1125        resized.buffer,
1126        resized.mediaType,
1127        originalSize,
1128        resized.dimensions,
1129      )
1130    } catch (e) {
1131      if (e instanceof ImageResizeError) throw e
1132      logError(e)
1133      result = createImageResponse(imageBuffer, detectedFormat, originalSize)
1134    }
1135  
1136    // Check if it fits in token budget
1137    const estimatedTokens = Math.ceil(result.file.base64.length * 0.125)
1138    if (estimatedTokens > maxTokens) {
1139      // Aggressive compression from the SAME buffer (no re-read)
1140      try {
1141        const compressed = await compressImageBufferWithTokenLimit(
1142          imageBuffer,
1143          maxTokens,
1144          detectedMediaType,
1145        )
1146        return {
1147          type: 'image',
1148          file: {
1149            base64: compressed.base64,
1150            type: compressed.mediaType,
1151            originalSize,
1152          },
1153        }
1154      } catch (e) {
1155        logError(e)
1156        // Fallback: heavily compressed version from the SAME buffer
1157        try {
1158          const sharpModule = await import('sharp')
1159          const sharp =
1160            (
1161              sharpModule as {
1162                default?: typeof sharpModule
1163              } & typeof sharpModule
1164            ).default || sharpModule
1165  
1166          const fallbackBuffer = await sharp(imageBuffer)
1167            .resize(400, 400, {
1168              fit: 'inside',
1169              withoutEnlargement: true,
1170            })
1171            .jpeg({ quality: 20 })
1172            .toBuffer()
1173  
1174          return createImageResponse(fallbackBuffer, 'jpeg', originalSize)
1175        } catch (error) {
1176          logError(error)
1177          return createImageResponse(imageBuffer, detectedFormat, originalSize)
1178        }
1179      }
1180    }
1181  
1182    return result
1183  }