/ src / utils / file.ts
file.ts
  1  import {
  2    readFileSync,
  3    writeFileSync,
  4    openSync,
  5    readSync,
  6    closeSync,
  7    existsSync,
  8    readdirSync,
  9    opendirSync,
 10  } from 'fs'
 11  import { logError } from './log.js'
 12  import {
 13    isAbsolute,
 14    normalize,
 15    resolve,
 16    resolve as resolvePath,
 17    relative,
 18    sep,
 19    basename,
 20    dirname,
 21    extname,
 22    join,
 23  } from 'path'
 24  import { glob as globLib } from 'glob'
 25  import { cwd } from 'process'
 26  import { listAllContentFiles } from './ripgrep.js'
 27  import { LRUCache } from 'lru-cache'
 28  import { getCwd } from './state.js'
 29  
 30  export type File = {
 31    filename: string
 32    content: string
 33  }
 34  
 35  export type LineEndingType = 'CRLF' | 'LF'
 36  
 37  export async function glob(
 38    filePattern: string,
 39    cwd: string,
 40    { limit, offset }: { limit: number; offset: number },
 41    abortSignal: AbortSignal,
 42  ): Promise<{ files: string[]; truncated: boolean }> {
 43    // TODO: Use worker threads
 44    const paths = await globLib([filePattern], {
 45      cwd,
 46      nocase: true,
 47      nodir: true,
 48      signal: abortSignal,
 49      stat: true,
 50      withFileTypes: true,
 51    })
 52    const sortedPaths = paths.sort((a, b) => (a.mtimeMs ?? 0) - (b.mtimeMs ?? 0))
 53    const truncated = sortedPaths.length > offset + limit
 54    return {
 55      files: sortedPaths
 56        .slice(offset, offset + limit)
 57        .map(path => path.fullpath()),
 58      truncated,
 59    }
 60  }
 61  
 62  export function readFileSafe(filepath: string): string | null {
 63    try {
 64      return readFileSync(filepath, 'utf-8')
 65    } catch (error) {
 66      logError(error)
 67      return null
 68    }
 69  }
 70  
 71  export function isInDirectory(
 72    relativePath: string,
 73    relativeCwd: string,
 74  ): boolean {
 75    if (relativePath === '.') {
 76      return true
 77    }
 78  
 79    // Reject paths starting with ~ (home directory)
 80    if (relativePath.startsWith('~')) {
 81      return false
 82    }
 83  
 84    // Reject paths containing null bytes or other sneaky characters
 85    if (relativePath.includes('\0') || relativeCwd.includes('\0')) {
 86      return false
 87    }
 88  
 89    // Normalize paths to resolve any '..' or '.' segments
 90    // and add trailing slashes
 91    let normalizedPath = normalize(relativePath)
 92    let normalizedCwd = normalize(relativeCwd)
 93  
 94    normalizedPath = normalizedPath.endsWith(sep)
 95      ? normalizedPath
 96      : normalizedPath + sep
 97    normalizedCwd = normalizedCwd.endsWith(sep)
 98      ? normalizedCwd
 99      : normalizedCwd + sep
100  
101    // Join with a base directory to make them absolute-like for comparison
102    // Using 'dummy' as base to avoid any actual file system dependencies
103    const fullPath = resolvePath(cwd(), normalizedCwd, normalizedPath)
104    const fullCwd = resolvePath(cwd(), normalizedCwd)
105  
106    // Check if the path starts with the cwd
107    return fullPath.startsWith(fullCwd)
108  }
109  
110  export function readTextContent(
111    filePath: string,
112    offset = 0,
113    maxLines?: number,
114  ): { content: string; lineCount: number; totalLines: number } {
115    const enc = detectFileEncoding(filePath)
116    const content = readFileSync(filePath, enc)
117    const lines = content.split(/\r?\n/)
118  
119    // Truncate number of lines if needed
120    const toReturn =
121      maxLines !== undefined && lines.length - offset > maxLines
122        ? lines.slice(offset, offset + maxLines)
123        : lines.slice(offset)
124  
125    return {
126      content: toReturn.join('\n'), // TODO: This probably won't work for Windows
127      lineCount: toReturn.length,
128      totalLines: lines.length,
129    }
130  }
131  
132  export function writeTextContent(
133    filePath: string,
134    content: string,
135    encoding: BufferEncoding,
136    endings: LineEndingType,
137  ): void {
138    let toWrite = content
139    if (endings === 'CRLF') {
140      toWrite = content.split('\n').join('\r\n')
141    }
142  
143    writeFileSync(filePath, toWrite, { encoding, flush: true })
144  }
145  
146  const repoEndingCache = new LRUCache<string, LineEndingType>({
147    fetchMethod: path => detectRepoLineEndingsDirect(path),
148    ttl: 5 * 60 * 1000,
149    ttlAutopurge: false,
150    max: 1000,
151  })
152  
153  export async function detectRepoLineEndings(
154    filePath: string,
155  ): Promise<LineEndingType | undefined> {
156    return repoEndingCache.fetch(resolve(filePath))
157  }
158  
159  export async function detectRepoLineEndingsDirect(
160    cwd: string,
161  ): Promise<LineEndingType> {
162    const abortController = new AbortController()
163    setTimeout(() => {
164      abortController.abort()
165    }, 1_000)
166    const allFiles = await listAllContentFiles(cwd, abortController.signal, 15)
167  
168    let crlfCount = 0
169    for (const file of allFiles) {
170      const lineEnding = detectLineEndings(file)
171      if (lineEnding === 'CRLF') {
172        crlfCount++
173      }
174    }
175  
176    return crlfCount > 3 ? 'CRLF' : 'LF'
177  }
178  
179  // eslint-disable-next-line @typescript-eslint/no-empty-object-type
180  function fetch<K extends {}, V extends {}>(
181    cache: LRUCache<K, V>,
182    key: K,
183    value: () => V,
184  ): V {
185    if (cache.has(key)) {
186      return cache.get(key)!
187    }
188  
189    const v = value()
190    cache.set(key, v)
191    return v
192  }
193  
194  const fileEncodingCache = new LRUCache<string, BufferEncoding>({
195    fetchMethod: path => detectFileEncodingDirect(path),
196    ttl: 5 * 60 * 1000,
197    ttlAutopurge: false,
198    max: 1000,
199  })
200  
201  export function detectFileEncoding(filePath: string): BufferEncoding {
202    const k = resolve(filePath)
203    return fetch(fileEncodingCache, k, () => detectFileEncodingDirect(k))
204  }
205  
206  export function detectFileEncodingDirect(filePath: string): BufferEncoding {
207    const BUFFER_SIZE = 4096
208    const buffer = Buffer.alloc(BUFFER_SIZE)
209  
210    let fd: number | undefined = undefined
211    try {
212      fd = openSync(filePath, 'r')
213      const bytesRead = readSync(fd, buffer, 0, BUFFER_SIZE, 0)
214  
215      if (bytesRead >= 2) {
216        if (buffer[0] === 0xff && buffer[1] === 0xfe) return 'utf16le'
217      }
218  
219      if (
220        bytesRead >= 3 &&
221        buffer[0] === 0xef &&
222        buffer[1] === 0xbb &&
223        buffer[2] === 0xbf
224      ) {
225        return 'utf8'
226      }
227  
228      const isUtf8 = buffer.slice(0, bytesRead).toString('utf8').length > 0
229      return isUtf8 ? 'utf8' : 'ascii'
230    } catch (error) {
231      logError(`Error detecting encoding for file ${filePath}: ${error}`)
232      return 'utf8'
233    } finally {
234      if (fd) closeSync(fd)
235    }
236  }
237  
238  const lineEndingCache = new LRUCache<string, LineEndingType>({
239    fetchMethod: path => detectLineEndingsDirect(path),
240    ttl: 5 * 60 * 1000,
241    ttlAutopurge: false,
242    max: 1000,
243  })
244  
245  export function detectLineEndings(filePath: string): LineEndingType {
246    const k = resolve(filePath)
247    return fetch(lineEndingCache, k, () => detectLineEndingsDirect(k))
248  }
249  
250  export function detectLineEndingsDirect(
251    filePath: string,
252    encoding: BufferEncoding = 'utf8',
253  ): LineEndingType {
254    try {
255      const buffer = Buffer.alloc(4096)
256      const fd = openSync(filePath, 'r')
257      const bytesRead = readSync(fd, buffer, 0, 4096, 0)
258      closeSync(fd)
259  
260      const content = buffer.toString(encoding, 0, bytesRead)
261      let crlfCount = 0
262      let lfCount = 0
263  
264      for (let i = 0; i < content.length; i++) {
265        if (content[i] === '\n') {
266          if (i > 0 && content[i - 1] === '\r') {
267            crlfCount++
268          } else {
269            lfCount++
270          }
271        }
272      }
273  
274      return crlfCount > lfCount ? 'CRLF' : 'LF'
275    } catch (error) {
276      logError(`Error detecting line endings for file ${filePath}: ${error}`)
277      return 'LF'
278    }
279  }
280  
281  export function normalizeFilePath(filePath: string): string {
282    const absoluteFilePath = isAbsolute(filePath)
283      ? filePath
284      : resolve(getCwd(), filePath)
285  
286    // One weird trick for half-width space characters in MacOS screenshot filenames
287    if (absoluteFilePath.endsWith(' AM.png')) {
288      return absoluteFilePath.replace(
289        ' AM.png',
290        `${String.fromCharCode(8239)}AM.png`,
291      )
292    }
293  
294    // One weird trick for half-width space characters in MacOS screenshot filenames
295    if (absoluteFilePath.endsWith(' PM.png')) {
296      return absoluteFilePath.replace(
297        ' PM.png',
298        `${String.fromCharCode(8239)}PM.png`,
299      )
300    }
301  
302    return absoluteFilePath
303  }
304  
305  export function getAbsolutePath(path: string | undefined): string | undefined {
306    return path ? (isAbsolute(path) ? path : resolve(getCwd(), path)) : undefined
307  }
308  
309  export function getAbsoluteAndRelativePaths(path: string | undefined): {
310    absolutePath: string | undefined
311    relativePath: string | undefined
312  } {
313    const absolutePath = getAbsolutePath(path)
314    const relativePath = absolutePath
315      ? relative(getCwd(), absolutePath)
316      : undefined
317    return { absolutePath, relativePath }
318  }
319  
320  /**
321   * Find files with the same name but different extensions in the same directory
322   * @param filePath The path to the file that doesn't exist
323   * @returns The found file with a different extension, or undefined if none found
324   */
325  
326  export function findSimilarFile(filePath: string): string | undefined {
327    try {
328      const dir = dirname(filePath)
329      const fileBaseName = basename(filePath, extname(filePath))
330  
331      // Check if directory exists
332      if (!existsSync(dir)) {
333        return undefined
334      }
335  
336      // Get all files in the directory
337      const files = readdirSync(dir)
338  
339      // Find files with the same base name but different extension
340      const similarFiles = files.filter(
341        file =>
342          basename(file, extname(file)) === fileBaseName &&
343          join(dir, file) !== filePath,
344      )
345  
346      // Return just the filename of the first match if found
347      const firstMatch = similarFiles[0]
348      if (firstMatch) {
349        return firstMatch
350      }
351      return undefined
352    } catch (error) {
353      // In case of any errors, return undefined
354      logError(`Error finding similar file for ${filePath}: ${error}`)
355      return undefined
356    }
357  }
358  
359  /**
360   * Adds cat -n style line numbers to the content
361   */
362  export function addLineNumbers({
363    content,
364    // 1-indexed
365    startLine,
366  }: {
367    content: string
368    startLine: number
369  }): string {
370    if (!content) {
371      return ''
372    }
373  
374    return content
375      .split(/\r?\n/)
376      .map((line, index) => {
377        const lineNum = index + startLine
378        const numStr = String(lineNum)
379        // Handle large numbers differently
380        if (numStr.length >= 6) {
381          return `${numStr}\t${line}`
382        }
383        // Regular numbers get padding to 6 characters
384        const n = numStr.padStart(6, ' ')
385        return `${n}\t${line}`
386      })
387      .join('\n') // TODO: This probably won't work for Windows
388  }
389  
390  /**
391   * Checks if a directory is empty by efficiently reading just the first entry
392   * @param dirPath The path to the directory to check
393   * @returns true if the directory is empty, false otherwise
394   */
395  export function isDirEmpty(dirPath: string): boolean {
396    try {
397      const dir = opendirSync(dirPath)
398      const firstEntry = dir.readSync()
399      dir.closeSync()
400      return firstEntry === null
401    } catch (error) {
402      logError(`Error checking directory: ${error}`)
403      return false
404    }
405  }