/ utils / fileRead.ts
fileRead.ts
  1  /**
  2   * Sync file-read path, extracted from file.ts.
  3   *
  4   * file.ts sits in the settings SCC via log.ts → types/logs.ts → types/message.ts →
  5   * Tool.ts → commands.ts → … Anything that needs readFileSync from file.ts
  6   * pulls in the whole chain. This leaf imports only fsOperations and debug,
  7   * both of which terminate in Node builtins.
  8   *
  9   * detectFileEncoding/detectLineEndings stay in file.ts — they call logError
 10   * (log.ts → SCC) on unexpected failures. The -ForResolvedPath/-ForString
 11   * helpers here are the pure parts; callers who need the logging wrappers
 12   * import from file.ts.
 13   */
 14  
 15  import { logForDebugging } from './debug.js'
 16  import { getFsImplementation, safeResolvePath } from './fsOperations.js'
 17  
 18  export type LineEndingType = 'CRLF' | 'LF'
 19  
 20  export function detectEncodingForResolvedPath(
 21    resolvedPath: string,
 22  ): BufferEncoding {
 23    const { buffer, bytesRead } = getFsImplementation().readSync(resolvedPath, {
 24      length: 4096,
 25    })
 26  
 27    // Empty files should default to utf8, not ascii
 28    // This fixes a bug where writing emojis/CJK to empty files caused corruption
 29    if (bytesRead === 0) {
 30      return 'utf8'
 31    }
 32  
 33    if (bytesRead >= 2) {
 34      if (buffer[0] === 0xff && buffer[1] === 0xfe) return 'utf16le'
 35    }
 36  
 37    if (
 38      bytesRead >= 3 &&
 39      buffer[0] === 0xef &&
 40      buffer[1] === 0xbb &&
 41      buffer[2] === 0xbf
 42    ) {
 43      return 'utf8'
 44    }
 45  
 46    // For non-empty files, default to utf8 since it's a superset of ascii
 47    // and handles all Unicode characters properly
 48    return 'utf8'
 49  }
 50  
 51  export function detectLineEndingsForString(content: string): LineEndingType {
 52    let crlfCount = 0
 53    let lfCount = 0
 54  
 55    for (let i = 0; i < content.length; i++) {
 56      if (content[i] === '\n') {
 57        if (i > 0 && content[i - 1] === '\r') {
 58          crlfCount++
 59        } else {
 60          lfCount++
 61        }
 62      }
 63    }
 64  
 65    return crlfCount > lfCount ? 'CRLF' : 'LF'
 66  }
 67  
 68  /**
 69   * Like readFileSync but also returns the detected encoding and original line
 70   * ending style in one filesystem pass. Callers writing the file back (e.g.
 71   * FileEditTool) can reuse these instead of calling detectFileEncoding /
 72   * detectLineEndings separately, which would each redo safeResolvePath +
 73   * readSync(4KB).
 74   */
 75  export function readFileSyncWithMetadata(filePath: string): {
 76    content: string
 77    encoding: BufferEncoding
 78    lineEndings: LineEndingType
 79  } {
 80    const fs = getFsImplementation()
 81    const { resolvedPath, isSymlink } = safeResolvePath(fs, filePath)
 82  
 83    if (isSymlink) {
 84      logForDebugging(`Reading through symlink: ${filePath} -> ${resolvedPath}`)
 85    }
 86  
 87    const encoding = detectEncodingForResolvedPath(resolvedPath)
 88    const raw = fs.readFileSync(resolvedPath, { encoding })
 89    // Detect line endings from the raw head before CRLF normalization erases
 90    // the distinction. 4096 code units is ≥ detectLineEndings's 4096-byte
 91    // readSync sample (line endings are ASCII, so the unit mismatch is moot).
 92    const lineEndings = detectLineEndingsForString(raw.slice(0, 4096))
 93    return {
 94      content: raw.replaceAll('\r\n', '\n'),
 95      encoding,
 96      lineEndings,
 97    }
 98  }
 99  
100  export function readFileSync(filePath: string): string {
101    return readFileSyncWithMetadata(filePath).content
102  }