/ utils / file.ts
file.ts
  1  import { chmodSync, writeFileSync as fsWriteFileSync } from 'fs'
  2  import { realpath, stat } from 'fs/promises'
  3  import { homedir } from 'os'
  4  import {
  5    basename,
  6    dirname,
  7    extname,
  8    isAbsolute,
  9    join,
 10    normalize,
 11    relative,
 12    resolve,
 13    sep,
 14  } from 'path'
 15  import { logEvent } from 'src/services/analytics/index.js'
 16  import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
 17  import { getCwd } from '../utils/cwd.js'
 18  import { logForDebugging } from './debug.js'
 19  import { isENOENT, isFsInaccessible } from './errors.js'
 20  import {
 21    detectEncodingForResolvedPath,
 22    detectLineEndingsForString,
 23    type LineEndingType,
 24  } from './fileRead.js'
 25  import { fileReadCache } from './fileReadCache.js'
 26  import { getFsImplementation, safeResolvePath } from './fsOperations.js'
 27  import { logError } from './log.js'
 28  import { expandPath } from './path.js'
 29  import { getPlatform } from './platform.js'
 30  
 31  export type File = {
 32    filename: string
 33    content: string
 34  }
 35  
 36  /**
 37   * Check if a path exists asynchronously.
 38   */
 39  export async function pathExists(path: string): Promise<boolean> {
 40    try {
 41      await stat(path)
 42      return true
 43    } catch {
 44      return false
 45    }
 46  }
 47  
 48  export const MAX_OUTPUT_SIZE = 0.25 * 1024 * 1024 // 0.25MB in bytes
 49  
 50  export function readFileSafe(filepath: string): string | null {
 51    try {
 52      const fs = getFsImplementation()
 53      return fs.readFileSync(filepath, { encoding: 'utf8' })
 54    } catch (error) {
 55      logError(error)
 56      return null
 57    }
 58  }
 59  
 60  /**
 61   * Get the normalized modification time of a file in milliseconds.
 62   * Uses Math.floor to ensure consistent timestamp comparisons across file operations,
 63   * reducing false positives from sub-millisecond precision changes (e.g., from IDE
 64   * file watchers that touch files without changing content).
 65   */
 66  export function getFileModificationTime(filePath: string): number {
 67    const fs = getFsImplementation()
 68    return Math.floor(fs.statSync(filePath).mtimeMs)
 69  }
 70  
 71  /**
 72   * Async variant of getFileModificationTime. Same floor semantics.
 73   * Use this in async paths (getChangedFiles runs every turn on every readFileState
 74   * entry — sync statSync there triggers the slow-operation indicator on network/
 75   * slow disks).
 76   */
 77  export async function getFileModificationTimeAsync(
 78    filePath: string,
 79  ): Promise<number> {
 80    const s = await getFsImplementation().stat(filePath)
 81    return Math.floor(s.mtimeMs)
 82  }
 83  
 84  export function writeTextContent(
 85    filePath: string,
 86    content: string,
 87    encoding: BufferEncoding,
 88    endings: LineEndingType,
 89  ): void {
 90    let toWrite = content
 91    if (endings === 'CRLF') {
 92      // Normalize any existing CRLF to LF first so a new_string that already
 93      // contains \r\n (raw model output) doesn't become \r\r\n after the join.
 94      toWrite = content.replaceAll('\r\n', '\n').split('\n').join('\r\n')
 95    }
 96  
 97    writeFileSyncAndFlush_DEPRECATED(filePath, toWrite, { encoding })
 98  }
 99  
100  export function detectFileEncoding(filePath: string): BufferEncoding {
101    try {
102      const fs = getFsImplementation()
103      const { resolvedPath } = safeResolvePath(fs, filePath)
104      return detectEncodingForResolvedPath(resolvedPath)
105    } catch (error) {
106      if (isFsInaccessible(error)) {
107        logForDebugging(
108          `detectFileEncoding failed for expected reason: ${error.code}`,
109          {
110            level: 'debug',
111          },
112        )
113      } else {
114        logError(error)
115      }
116      return 'utf8'
117    }
118  }
119  
120  export function detectLineEndings(
121    filePath: string,
122    encoding: BufferEncoding = 'utf8',
123  ): LineEndingType {
124    try {
125      const fs = getFsImplementation()
126      const { resolvedPath } = safeResolvePath(fs, filePath)
127      const { buffer, bytesRead } = fs.readSync(resolvedPath, { length: 4096 })
128  
129      const content = buffer.toString(encoding, 0, bytesRead)
130      return detectLineEndingsForString(content)
131    } catch (error) {
132      logError(error)
133      return 'LF'
134    }
135  }
136  
137  export function convertLeadingTabsToSpaces(content: string): string {
138    // The /gm regex scans every line even on no-match; skip it entirely
139    // for the common tab-free case.
140    if (!content.includes('\t')) return content
141    return content.replace(/^\t+/gm, _ => '  '.repeat(_.length))
142  }
143  
144  export function getAbsoluteAndRelativePaths(path: string | undefined): {
145    absolutePath: string | undefined
146    relativePath: string | undefined
147  } {
148    const absolutePath = path ? expandPath(path) : undefined
149    const relativePath = absolutePath
150      ? relative(getCwd(), absolutePath)
151      : undefined
152    return { absolutePath, relativePath }
153  }
154  
155  export function getDisplayPath(filePath: string): string {
156    // Use relative path if file is in the current working directory
157    const { relativePath } = getAbsoluteAndRelativePaths(filePath)
158    if (relativePath && !relativePath.startsWith('..')) {
159      return relativePath
160    }
161  
162    // Use tilde notation for files in home directory
163    const homeDir = homedir()
164    if (filePath.startsWith(homeDir + sep)) {
165      return '~' + filePath.slice(homeDir.length)
166    }
167  
168    // Otherwise return the absolute path
169    return filePath
170  }
171  
172  /**
173   * Find files with the same name but different extensions in the same directory
174   * @param filePath The path to the file that doesn't exist
175   * @returns The found file with a different extension, or undefined if none found
176   */
177  
178  export function findSimilarFile(filePath: string): string | undefined {
179    const fs = getFsImplementation()
180    try {
181      const dir = dirname(filePath)
182      const fileBaseName = basename(filePath, extname(filePath))
183  
184      // Get all files in the directory
185      const files = fs.readdirSync(dir)
186  
187      // Find files with the same base name but different extension
188      const similarFiles = files.filter(
189        file =>
190          basename(file.name, extname(file.name)) === fileBaseName &&
191          join(dir, file.name) !== filePath,
192      )
193  
194      // Return just the filename of the first match if found
195      const firstMatch = similarFiles[0]
196      if (firstMatch) {
197        return firstMatch.name
198      }
199      return undefined
200    } catch (error) {
201      // Missing dir (ENOENT) is expected; for other errors log and return undefined
202      if (!isENOENT(error)) {
203        logError(error)
204      }
205      return undefined
206    }
207  }
208  
209  /**
210   * Marker included in file-not-found error messages that contain a cwd note.
211   * UI renderers check for this to show a short "File not found" message.
212   */
213  export const FILE_NOT_FOUND_CWD_NOTE = 'Note: your current working directory is'
214  
215  /**
216   * Suggests a corrected path under the current working directory when a file/directory
217   * is not found. Detects the "dropped repo folder" pattern where the model constructs
218   * an absolute path missing the repo directory component.
219   *
220   * Example:
221   *   cwd = /Users/zeeg/src/currentRepo
222   *   requestedPath = /Users/zeeg/src/foobar           (doesn't exist)
223   *   returns        /Users/zeeg/src/currentRepo/foobar (if it exists)
224   *
225   * @param requestedPath - The absolute path that was not found
226   * @returns The corrected path if found under cwd, undefined otherwise
227   */
228  export async function suggestPathUnderCwd(
229    requestedPath: string,
230  ): Promise<string | undefined> {
231    const cwd = getCwd()
232    const cwdParent = dirname(cwd)
233  
234    // Resolve symlinks in the requested path's parent directory (e.g., /tmp -> /private/tmp on macOS)
235    // so the prefix comparison works correctly against the cwd (which is already realpath-resolved).
236    let resolvedPath = requestedPath
237    try {
238      const resolvedDir = await realpath(dirname(requestedPath))
239      resolvedPath = join(resolvedDir, basename(requestedPath))
240    } catch {
241      // Parent directory doesn't exist, use the original path
242    }
243  
244    // Only check if the requested path is under cwd's parent but not under cwd itself.
245    // When cwdParent is the root directory (e.g., '/'), use it directly as the prefix
246    // to avoid a double-separator '//' that would never match.
247    const cwdParentPrefix = cwdParent === sep ? sep : cwdParent + sep
248    if (
249      !resolvedPath.startsWith(cwdParentPrefix) ||
250      resolvedPath.startsWith(cwd + sep) ||
251      resolvedPath === cwd
252    ) {
253      return undefined
254    }
255  
256    // Get the relative path from the parent directory
257    const relFromParent = relative(cwdParent, resolvedPath)
258  
259    // Check if the same relative path exists under cwd
260    const correctedPath = join(cwd, relFromParent)
261    try {
262      await stat(correctedPath)
263      return correctedPath
264    } catch {
265      return undefined
266    }
267  }
268  
269  /**
270   * Whether to use the compact line-number prefix format (`N\t` instead of
271   * `     N→`). The padded-arrow format costs 9 bytes/line overhead; at
272   * 1.35B Read calls × 132 lines avg this is 2.18% of fleet uncached input
273   * (bq-queries/read_line_prefix_overhead_verify.sql).
274   *
275   * Ant soak validated no Edit error regression (6.29% vs 6.86% baseline).
276   * Killswitch pattern: GB can disable if issues surface externally.
277   */
278  export function isCompactLinePrefixEnabled(): boolean {
279    // 3P default: killswitch off = compact format enabled. Client-side only —
280    // no server support needed, safe for Bedrock/Vertex/Foundry.
281    return !getFeatureValue_CACHED_MAY_BE_STALE(
282      'tengu_compact_line_prefix_killswitch',
283      false,
284    )
285  }
286  
287  /**
288   * Adds cat -n style line numbers to the content.
289   */
290  export function addLineNumbers({
291    content,
292    // 1-indexed
293    startLine,
294  }: {
295    content: string
296    startLine: number
297  }): string {
298    if (!content) {
299      return ''
300    }
301  
302    const lines = content.split(/\r?\n/)
303  
304    if (isCompactLinePrefixEnabled()) {
305      return lines
306        .map((line, index) => `${index + startLine}\t${line}`)
307        .join('\n')
308    }
309  
310    return lines
311      .map((line, index) => {
312        const numStr = String(index + startLine)
313        if (numStr.length >= 6) {
314          return `${numStr}→${line}`
315        }
316        return `${numStr.padStart(6, ' ')}→${line}`
317      })
318      .join('\n')
319  }
320  
321  /**
322   * Inverse of addLineNumbers — strips the `N→` or `N\t` prefix from a single
323   * line. Co-located so format changes here and in addLineNumbers stay in sync.
324   */
325  export function stripLineNumberPrefix(line: string): string {
326    const match = line.match(/^\s*\d+[\u2192\t](.*)$/)
327    return match?.[1] ?? line
328  }
329  
330  /**
331   * Checks if a directory is empty.
332   * @param dirPath The path to the directory to check
333   * @returns true if the directory is empty or does not exist, false otherwise
334   */
335  export function isDirEmpty(dirPath: string): boolean {
336    try {
337      return getFsImplementation().isDirEmptySync(dirPath)
338    } catch (e) {
339      // ENOENT: directory doesn't exist, consider it empty
340      // Other errors (EPERM on macOS protected folders, etc.): assume not empty
341      return isENOENT(e)
342    }
343  }
344  
345  /**
346   * Reads a file with caching to avoid redundant I/O operations.
347   * This is the preferred method for FileEditTool operations.
348   */
349  export function readFileSyncCached(filePath: string): string {
350    const { content } = fileReadCache.readFile(filePath)
351    return content
352  }
353  
354  /**
355   * Writes to a file and flushes the file to disk
356   * @param filePath The path to the file to write to
357   * @param content The content to write to the file
358   * @param options Options for writing the file, including encoding and mode
359   * @deprecated Use `fs.promises.writeFile` with flush option instead for non-blocking writes.
360   * Sync file writes block the event loop and cause performance issues.
361   */
362  export function writeFileSyncAndFlush_DEPRECATED(
363    filePath: string,
364    content: string,
365    options: { encoding: BufferEncoding; mode?: number } = { encoding: 'utf-8' },
366  ): void {
367    const fs = getFsImplementation()
368  
369    // Check if the target file is a symlink to preserve it for all users
370    // Note: We don't use safeResolvePath here because we need to manually handle
371    // symlinks to ensure we write to the target while preserving the symlink itself
372    let targetPath = filePath
373    try {
374      // Try to read the symlink - if successful, it's a symlink
375      const linkTarget = fs.readlinkSync(filePath)
376      // Resolve to absolute path
377      targetPath = isAbsolute(linkTarget)
378        ? linkTarget
379        : resolve(dirname(filePath), linkTarget)
380      logForDebugging(`Writing through symlink: ${filePath} -> ${targetPath}`)
381    } catch {
382      // ENOENT (doesn't exist) or EINVAL (not a symlink) — keep targetPath = filePath
383    }
384  
385    // Try atomic write first
386    const tempPath = `${targetPath}.tmp.${process.pid}.${Date.now()}`
387  
388    // Check if target file exists and get its permissions (single stat, reused in both atomic and fallback paths)
389    let targetMode: number | undefined
390    let targetExists = false
391    try {
392      targetMode = fs.statSync(targetPath).mode
393      targetExists = true
394      logForDebugging(`Preserving file permissions: ${targetMode.toString(8)}`)
395    } catch (e) {
396      if (!isENOENT(e)) throw e
397      if (options.mode !== undefined) {
398        // Use provided mode for new files
399        targetMode = options.mode
400        logForDebugging(
401          `Setting permissions for new file: ${targetMode.toString(8)}`,
402        )
403      }
404    }
405  
406    try {
407      logForDebugging(`Writing to temp file: ${tempPath}`)
408  
409      // Write to temp file with flush and mode (if specified for new file)
410      const writeOptions: {
411        encoding: BufferEncoding
412        flush: boolean
413        mode?: number
414      } = {
415        encoding: options.encoding,
416        flush: true,
417      }
418      // Only set mode in writeFileSync for new files to ensure atomic permission setting
419      if (!targetExists && options.mode !== undefined) {
420        writeOptions.mode = options.mode
421      }
422  
423      fsWriteFileSync(tempPath, content, writeOptions)
424      logForDebugging(
425        `Temp file written successfully, size: ${content.length} bytes`,
426      )
427  
428      // For existing files or if mode was not set atomically, apply permissions
429      if (targetExists && targetMode !== undefined) {
430        chmodSync(tempPath, targetMode)
431        logForDebugging(`Applied original permissions to temp file`)
432      }
433  
434      // Atomic rename (on POSIX systems, this is atomic)
435      // On Windows, this will overwrite the destination if it exists
436      logForDebugging(`Renaming ${tempPath} to ${targetPath}`)
437      fs.renameSync(tempPath, targetPath)
438      logForDebugging(`File ${targetPath} written atomically`)
439    } catch (atomicError) {
440      logForDebugging(`Failed to write file atomically: ${atomicError}`, {
441        level: 'error',
442      })
443      logEvent('tengu_atomic_write_error', {})
444  
445      // Clean up temp file on error
446      try {
447        logForDebugging(`Cleaning up temp file: ${tempPath}`)
448        fs.unlinkSync(tempPath)
449      } catch (cleanupError) {
450        logForDebugging(`Failed to clean up temp file: ${cleanupError}`)
451      }
452  
453      // Fallback to non-atomic write
454      logForDebugging(`Falling back to non-atomic write for ${targetPath}`)
455      try {
456        const fallbackOptions: {
457          encoding: BufferEncoding
458          flush: boolean
459          mode?: number
460        } = {
461          encoding: options.encoding,
462          flush: true,
463        }
464        // Only set mode for new files
465        if (!targetExists && options.mode !== undefined) {
466          fallbackOptions.mode = options.mode
467        }
468  
469        fsWriteFileSync(targetPath, content, fallbackOptions)
470        logForDebugging(
471          `File ${targetPath} written successfully with non-atomic fallback`,
472        )
473      } catch (fallbackError) {
474        logForDebugging(`Non-atomic write also failed: ${fallbackError}`)
475        throw fallbackError
476      }
477    }
478  }
479  
480  export function getDesktopPath(): string {
481    const platform = getPlatform()
482    const homeDir = homedir()
483  
484    if (platform === 'macos') {
485      return join(homeDir, 'Desktop')
486    }
487  
488    if (platform === 'windows') {
489      // For WSL, try to access Windows desktop
490      const windowsHome = process.env.USERPROFILE
491        ? process.env.USERPROFILE.replace(/\\/g, '/')
492        : null
493  
494      if (windowsHome) {
495        const wslPath = windowsHome.replace(/^[A-Z]:/, '')
496        const desktopPath = `/mnt/c${wslPath}/Desktop`
497  
498        if (getFsImplementation().existsSync(desktopPath)) {
499          return desktopPath
500        }
501      }
502  
503      // Fallback: try to find desktop in typical Windows user location
504      try {
505        const usersDir = '/mnt/c/Users'
506        const userDirs = getFsImplementation().readdirSync(usersDir)
507  
508        for (const user of userDirs) {
509          if (
510            user.name === 'Public' ||
511            user.name === 'Default' ||
512            user.name === 'Default User' ||
513            user.name === 'All Users'
514          ) {
515            continue
516          }
517  
518          const potentialDesktopPath = join(usersDir, user.name, 'Desktop')
519  
520          if (getFsImplementation().existsSync(potentialDesktopPath)) {
521            return potentialDesktopPath
522          }
523        }
524      } catch (error) {
525        logError(error)
526      }
527    }
528  
529    // Linux/unknown platform fallback
530    const desktopPath = join(homeDir, 'Desktop')
531    if (getFsImplementation().existsSync(desktopPath)) {
532      return desktopPath
533    }
534  
535    // If Desktop folder doesn't exist, fallback to home directory
536    return homeDir
537  }
538  
539  /**
540   * Validates that a file size is within the specified limit.
541   * Returns true if the file is within the limit, false otherwise.
542   *
543   * @param filePath The path to the file to validate
544   * @param maxSizeBytes The maximum allowed file size in bytes
545   * @returns true if file size is within limit, false otherwise
546   */
547  export function isFileWithinReadSizeLimit(
548    filePath: string,
549    maxSizeBytes: number = MAX_OUTPUT_SIZE,
550  ): boolean {
551    try {
552      const stats = getFsImplementation().statSync(filePath)
553      return stats.size <= maxSizeBytes
554    } catch {
555      // If we can't stat the file, return false to indicate validation failure
556      return false
557    }
558  }
559  
560  /**
561   * Normalize a file path for comparison, handling platform differences.
562   * On Windows, normalizes path separators and converts to lowercase for
563   * case-insensitive comparison.
564   */
565  export function normalizePathForComparison(filePath: string): string {
566    // Use path.normalize() to clean up redundant separators and resolve . and ..
567    let normalized = normalize(filePath)
568  
569    // On Windows, normalize for case-insensitive comparison:
570    // - Convert forward slashes to backslashes (path.normalize only does this on actual Windows)
571    // - Convert to lowercase (Windows paths are case-insensitive)
572    if (getPlatform() === 'windows') {
573      normalized = normalized.replace(/\//g, '\\').toLowerCase()
574    }
575  
576    return normalized
577  }
578  
579  /**
580   * Compare two file paths for equality, handling Windows case-insensitivity.
581   */
582  export function pathsEqual(path1: string, path2: string): boolean {
583    return normalizePathForComparison(path1) === normalizePathForComparison(path2)
584  }