file.ts
1 import { 2 readFileSync, 3 writeFileSync, 4 openSync, 5 readSync, 6 closeSync, 7 existsSync, 8 readdirSync, 9 opendirSync, 10 } from 'fs' 11 import { logError } from './log.js' 12 import { 13 isAbsolute, 14 normalize, 15 resolve, 16 resolve as resolvePath, 17 relative, 18 sep, 19 basename, 20 dirname, 21 extname, 22 join, 23 } from 'path' 24 import { glob as globLib } from 'glob' 25 import { cwd } from 'process' 26 import { listAllContentFiles } from './ripgrep.js' 27 import { LRUCache } from 'lru-cache' 28 import { getCwd } from './state.js' 29 30 export type File = { 31 filename: string 32 content: string 33 } 34 35 export type LineEndingType = 'CRLF' | 'LF' 36 37 export async function glob( 38 filePattern: string, 39 cwd: string, 40 { limit, offset }: { limit: number; offset: number }, 41 abortSignal: AbortSignal, 42 ): Promise<{ files: string[]; truncated: boolean }> { 43 // TODO: Use worker threads 44 const paths = await globLib([filePattern], { 45 cwd, 46 nocase: true, 47 nodir: true, 48 signal: abortSignal, 49 stat: true, 50 withFileTypes: true, 51 }) 52 const sortedPaths = paths.sort((a, b) => (a.mtimeMs ?? 0) - (b.mtimeMs ?? 0)) 53 const truncated = sortedPaths.length > offset + limit 54 return { 55 files: sortedPaths 56 .slice(offset, offset + limit) 57 .map(path => path.fullpath()), 58 truncated, 59 } 60 } 61 62 export function readFileSafe(filepath: string): string | null { 63 try { 64 return readFileSync(filepath, 'utf-8') 65 } catch (error) { 66 logError(error) 67 return null 68 } 69 } 70 71 export function isInDirectory( 72 relativePath: string, 73 relativeCwd: string, 74 ): boolean { 75 if (relativePath === '.') { 76 return true 77 } 78 79 // Reject paths starting with ~ (home directory) 80 if (relativePath.startsWith('~')) { 81 return false 82 } 83 84 // Reject paths containing null bytes or other sneaky characters 85 if (relativePath.includes('\0') || relativeCwd.includes('\0')) { 86 return false 87 } 88 89 // Normalize paths to resolve any '..' or '.' segments 90 // and add trailing slashes 91 let normalizedPath = normalize(relativePath) 92 let normalizedCwd = normalize(relativeCwd) 93 94 normalizedPath = normalizedPath.endsWith(sep) 95 ? normalizedPath 96 : normalizedPath + sep 97 normalizedCwd = normalizedCwd.endsWith(sep) 98 ? normalizedCwd 99 : normalizedCwd + sep 100 101 // Join with a base directory to make them absolute-like for comparison 102 // Using 'dummy' as base to avoid any actual file system dependencies 103 const fullPath = resolvePath(cwd(), normalizedCwd, normalizedPath) 104 const fullCwd = resolvePath(cwd(), normalizedCwd) 105 106 // Check if the path starts with the cwd 107 return fullPath.startsWith(fullCwd) 108 } 109 110 export function readTextContent( 111 filePath: string, 112 offset = 0, 113 maxLines?: number, 114 ): { content: string; lineCount: number; totalLines: number } { 115 const enc = detectFileEncoding(filePath) 116 const content = readFileSync(filePath, enc) 117 const lines = content.split(/\r?\n/) 118 119 // Truncate number of lines if needed 120 const toReturn = 121 maxLines !== undefined && lines.length - offset > maxLines 122 ? lines.slice(offset, offset + maxLines) 123 : lines.slice(offset) 124 125 return { 126 content: toReturn.join('\n'), // TODO: This probably won't work for Windows 127 lineCount: toReturn.length, 128 totalLines: lines.length, 129 } 130 } 131 132 export function writeTextContent( 133 filePath: string, 134 content: string, 135 encoding: BufferEncoding, 136 endings: LineEndingType, 137 ): void { 138 let toWrite = content 139 if (endings === 'CRLF') { 140 toWrite = content.split('\n').join('\r\n') 141 } 142 143 writeFileSync(filePath, toWrite, { encoding, flush: true }) 144 } 145 146 const repoEndingCache = new LRUCache<string, LineEndingType>({ 147 fetchMethod: path => detectRepoLineEndingsDirect(path), 148 ttl: 5 * 60 * 1000, 149 ttlAutopurge: false, 150 max: 1000, 151 }) 152 153 export async function detectRepoLineEndings( 154 filePath: string, 155 ): Promise<LineEndingType | undefined> { 156 return repoEndingCache.fetch(resolve(filePath)) 157 } 158 159 export async function detectRepoLineEndingsDirect( 160 cwd: string, 161 ): Promise<LineEndingType> { 162 const abortController = new AbortController() 163 setTimeout(() => { 164 abortController.abort() 165 }, 1_000) 166 const allFiles = await listAllContentFiles(cwd, abortController.signal, 15) 167 168 let crlfCount = 0 169 for (const file of allFiles) { 170 const lineEnding = detectLineEndings(file) 171 if (lineEnding === 'CRLF') { 172 crlfCount++ 173 } 174 } 175 176 return crlfCount > 3 ? 'CRLF' : 'LF' 177 } 178 179 // eslint-disable-next-line @typescript-eslint/no-empty-object-type 180 function fetch<K extends {}, V extends {}>( 181 cache: LRUCache<K, V>, 182 key: K, 183 value: () => V, 184 ): V { 185 if (cache.has(key)) { 186 return cache.get(key)! 187 } 188 189 const v = value() 190 cache.set(key, v) 191 return v 192 } 193 194 const fileEncodingCache = new LRUCache<string, BufferEncoding>({ 195 fetchMethod: path => detectFileEncodingDirect(path), 196 ttl: 5 * 60 * 1000, 197 ttlAutopurge: false, 198 max: 1000, 199 }) 200 201 export function detectFileEncoding(filePath: string): BufferEncoding { 202 const k = resolve(filePath) 203 return fetch(fileEncodingCache, k, () => detectFileEncodingDirect(k)) 204 } 205 206 export function detectFileEncodingDirect(filePath: string): BufferEncoding { 207 const BUFFER_SIZE = 4096 208 const buffer = Buffer.alloc(BUFFER_SIZE) 209 210 let fd: number | undefined = undefined 211 try { 212 fd = openSync(filePath, 'r') 213 const bytesRead = readSync(fd, buffer, 0, BUFFER_SIZE, 0) 214 215 if (bytesRead >= 2) { 216 if (buffer[0] === 0xff && buffer[1] === 0xfe) return 'utf16le' 217 } 218 219 if ( 220 bytesRead >= 3 && 221 buffer[0] === 0xef && 222 buffer[1] === 0xbb && 223 buffer[2] === 0xbf 224 ) { 225 return 'utf8' 226 } 227 228 const isUtf8 = buffer.slice(0, bytesRead).toString('utf8').length > 0 229 return isUtf8 ? 'utf8' : 'ascii' 230 } catch (error) { 231 logError(`Error detecting encoding for file ${filePath}: ${error}`) 232 return 'utf8' 233 } finally { 234 if (fd) closeSync(fd) 235 } 236 } 237 238 const lineEndingCache = new LRUCache<string, LineEndingType>({ 239 fetchMethod: path => detectLineEndingsDirect(path), 240 ttl: 5 * 60 * 1000, 241 ttlAutopurge: false, 242 max: 1000, 243 }) 244 245 export function detectLineEndings(filePath: string): LineEndingType { 246 const k = resolve(filePath) 247 return fetch(lineEndingCache, k, () => detectLineEndingsDirect(k)) 248 } 249 250 export function detectLineEndingsDirect( 251 filePath: string, 252 encoding: BufferEncoding = 'utf8', 253 ): LineEndingType { 254 try { 255 const buffer = Buffer.alloc(4096) 256 const fd = openSync(filePath, 'r') 257 const bytesRead = readSync(fd, buffer, 0, 4096, 0) 258 closeSync(fd) 259 260 const content = buffer.toString(encoding, 0, bytesRead) 261 let crlfCount = 0 262 let lfCount = 0 263 264 for (let i = 0; i < content.length; i++) { 265 if (content[i] === '\n') { 266 if (i > 0 && content[i - 1] === '\r') { 267 crlfCount++ 268 } else { 269 lfCount++ 270 } 271 } 272 } 273 274 return crlfCount > lfCount ? 'CRLF' : 'LF' 275 } catch (error) { 276 logError(`Error detecting line endings for file ${filePath}: ${error}`) 277 return 'LF' 278 } 279 } 280 281 export function normalizeFilePath(filePath: string): string { 282 const absoluteFilePath = isAbsolute(filePath) 283 ? filePath 284 : resolve(getCwd(), filePath) 285 286 // One weird trick for half-width space characters in MacOS screenshot filenames 287 if (absoluteFilePath.endsWith(' AM.png')) { 288 return absoluteFilePath.replace( 289 ' AM.png', 290 `${String.fromCharCode(8239)}AM.png`, 291 ) 292 } 293 294 // One weird trick for half-width space characters in MacOS screenshot filenames 295 if (absoluteFilePath.endsWith(' PM.png')) { 296 return absoluteFilePath.replace( 297 ' PM.png', 298 `${String.fromCharCode(8239)}PM.png`, 299 ) 300 } 301 302 return absoluteFilePath 303 } 304 305 export function getAbsolutePath(path: string | undefined): string | undefined { 306 return path ? (isAbsolute(path) ? path : resolve(getCwd(), path)) : undefined 307 } 308 309 export function getAbsoluteAndRelativePaths(path: string | undefined): { 310 absolutePath: string | undefined 311 relativePath: string | undefined 312 } { 313 const absolutePath = getAbsolutePath(path) 314 const relativePath = absolutePath 315 ? relative(getCwd(), absolutePath) 316 : undefined 317 return { absolutePath, relativePath } 318 } 319 320 /** 321 * Find files with the same name but different extensions in the same directory 322 * @param filePath The path to the file that doesn't exist 323 * @returns The found file with a different extension, or undefined if none found 324 */ 325 326 export function findSimilarFile(filePath: string): string | undefined { 327 try { 328 const dir = dirname(filePath) 329 const fileBaseName = basename(filePath, extname(filePath)) 330 331 // Check if directory exists 332 if (!existsSync(dir)) { 333 return undefined 334 } 335 336 // Get all files in the directory 337 const files = readdirSync(dir) 338 339 // Find files with the same base name but different extension 340 const similarFiles = files.filter( 341 file => 342 basename(file, extname(file)) === fileBaseName && 343 join(dir, file) !== filePath, 344 ) 345 346 // Return just the filename of the first match if found 347 const firstMatch = similarFiles[0] 348 if (firstMatch) { 349 return firstMatch 350 } 351 return undefined 352 } catch (error) { 353 // In case of any errors, return undefined 354 logError(`Error finding similar file for ${filePath}: ${error}`) 355 return undefined 356 } 357 } 358 359 /** 360 * Adds cat -n style line numbers to the content 361 */ 362 export function addLineNumbers({ 363 content, 364 // 1-indexed 365 startLine, 366 }: { 367 content: string 368 startLine: number 369 }): string { 370 if (!content) { 371 return '' 372 } 373 374 return content 375 .split(/\r?\n/) 376 .map((line, index) => { 377 const lineNum = index + startLine 378 const numStr = String(lineNum) 379 // Handle large numbers differently 380 if (numStr.length >= 6) { 381 return `${numStr}\t${line}` 382 } 383 // Regular numbers get padding to 6 characters 384 const n = numStr.padStart(6, ' ') 385 return `${n}\t${line}` 386 }) 387 .join('\n') // TODO: This probably won't work for Windows 388 } 389 390 /** 391 * Checks if a directory is empty by efficiently reading just the first entry 392 * @param dirPath The path to the directory to check 393 * @returns true if the directory is empty, false otherwise 394 */ 395 export function isDirEmpty(dirPath: string): boolean { 396 try { 397 const dir = opendirSync(dirPath) 398 const firstEntry = dir.readSync() 399 dir.closeSync() 400 return firstEntry === null 401 } catch (error) { 402 logError(`Error checking directory: ${error}`) 403 return false 404 } 405 }