/ utils / readEditContext.ts
readEditContext.ts
  1  import { type FileHandle, open } from 'fs/promises'
  2  import { isENOENT } from './errors.js'
  3  
  4  export const CHUNK_SIZE = 8 * 1024
  5  export const MAX_SCAN_BYTES = 10 * 1024 * 1024
  6  const NL = 0x0a
  7  
  8  export type EditContext = {
  9    /** Slice of the file: contextLines before/after the match, on line boundaries. */
 10    content: string
 11    /** 1-based line number of content's first line in the original file. */
 12    lineOffset: number
 13    /** True if MAX_SCAN_BYTES was hit without finding the needle. */
 14    truncated: boolean
 15  }
 16  
 17  /**
 18   * Finds `needle` in the file at `path` and returns a context-window slice
 19   * containing the match plus `contextLines` of surrounding context on each side.
 20   *
 21   * Scans in 8KB chunks with a straddle overlap so matches crossing a chunk
 22   * boundary are found. Capped at MAX_SCAN_BYTES. No stat — EOF detected via
 23   * bytesRead.
 24   *
 25   * React callers: wrap in useState lazy-init then use() + Suspense. useMemo
 26   * re-runs when callers pass fresh array literals.
 27   *
 28   * Returns null on ENOENT. Returns { truncated: true, content: '' } if the
 29   * needle isn't found within MAX_SCAN_BYTES.
 30   */
 31  export async function readEditContext(
 32    path: string,
 33    needle: string,
 34    contextLines = 3,
 35  ): Promise<EditContext | null> {
 36    const handle = await openForScan(path)
 37    if (handle === null) return null
 38    try {
 39      return await scanForContext(handle, needle, contextLines)
 40    } finally {
 41      await handle.close()
 42    }
 43  }
 44  
 45  /**
 46   * Opens `path` for reading. Returns null on ENOENT. Caller owns close().
 47   */
 48  export async function openForScan(path: string): Promise<FileHandle | null> {
 49    try {
 50      return await open(path, 'r')
 51    } catch (e) {
 52      if (isENOENT(e)) return null
 53      throw e
 54    }
 55  }
 56  
 57  /**
 58   * Handle-accepting core of readEditContext. Caller owns open/close.
 59   */
 60  export async function scanForContext(
 61    handle: FileHandle,
 62    needle: string,
 63    contextLines: number,
 64  ): Promise<EditContext> {
 65    if (needle === '') return { content: '', lineOffset: 1, truncated: false }
 66    const needleLF = Buffer.from(needle, 'utf8')
 67    // Model sends LF; files may be CRLF. Count newlines to size the overlap for
 68    // the longer CRLF form; defer encoding the CRLF buffer until LF scan misses.
 69    let nlCount = 0
 70    for (let i = 0; i < needleLF.length; i++) if (needleLF[i] === NL) nlCount++
 71    let needleCRLF: Buffer | undefined
 72    const overlap = needleLF.length + nlCount - 1
 73  
 74    const buf = Buffer.allocUnsafe(CHUNK_SIZE + overlap)
 75    let pos = 0
 76    let linesBeforePos = 0
 77    let prevTail = 0
 78  
 79    while (pos < MAX_SCAN_BYTES) {
 80      const { bytesRead } = await handle.read(buf, prevTail, CHUNK_SIZE, pos)
 81      if (bytesRead === 0) break
 82      const viewLen = prevTail + bytesRead
 83  
 84      let matchAt = indexOfWithin(buf, needleLF, viewLen)
 85      let matchLen = needleLF.length
 86      if (matchAt === -1 && nlCount > 0) {
 87        needleCRLF ??= Buffer.from(needle.replaceAll('\n', '\r\n'), 'utf8')
 88        matchAt = indexOfWithin(buf, needleCRLF, viewLen)
 89        matchLen = needleCRLF.length
 90      }
 91      if (matchAt !== -1) {
 92        const absMatch = pos - prevTail + matchAt
 93        return await sliceContext(
 94          handle,
 95          buf,
 96          absMatch,
 97          matchLen,
 98          contextLines,
 99          linesBeforePos + countNewlines(buf, 0, matchAt),
100        )
101      }
102      pos += bytesRead
103      // Shift the tail to the front for straddle. linesBeforePos tracks
104      // newlines in bytes we've DISCARDED (not in buf) — count only the
105      // non-overlap portion we're about to copyWithin over.
106      const nextTail = Math.min(overlap, viewLen)
107      linesBeforePos += countNewlines(buf, 0, viewLen - nextTail)
108      prevTail = nextTail
109      buf.copyWithin(0, viewLen - prevTail, viewLen)
110    }
111  
112    return { content: '', lineOffset: 1, truncated: pos >= MAX_SCAN_BYTES }
113  }
114  
115  /**
116   * Reads the entire file via `handle` up to MAX_SCAN_BYTES. Returns null if the
117   * file exceeds the cap. For the multi-edit path in FileEditToolDiff where
118   * sequential replacements need the full string.
119   *
120   * Single buffer, doubles on fill — ~log2(size/8KB) allocs instead of O(n)
121   * chunks + concat. Reads directly into the right offset; no intermediate copies.
122   */
123  export async function readCapped(handle: FileHandle): Promise<string | null> {
124    let buf = Buffer.allocUnsafe(CHUNK_SIZE)
125    let total = 0
126    for (;;) {
127      if (total === buf.length) {
128        const grown = Buffer.allocUnsafe(
129          Math.min(buf.length * 2, MAX_SCAN_BYTES + CHUNK_SIZE),
130        )
131        buf.copy(grown, 0, 0, total)
132        buf = grown
133      }
134      const { bytesRead } = await handle.read(
135        buf,
136        total,
137        buf.length - total,
138        total,
139      )
140      if (bytesRead === 0) break
141      total += bytesRead
142      if (total > MAX_SCAN_BYTES) return null
143    }
144    return normalizeCRLF(buf, total)
145  }
146  
147  /** buf.indexOf bounded to [0, end) without allocating a view. */
148  function indexOfWithin(buf: Buffer, needle: Buffer, end: number): number {
149    const at = buf.indexOf(needle)
150    return at === -1 || at + needle.length > end ? -1 : at
151  }
152  
153  function countNewlines(buf: Buffer, start: number, end: number): number {
154    let n = 0
155    for (let i = start; i < end; i++) if (buf[i] === NL) n++
156    return n
157  }
158  
159  /** Decode buf[0..len) to utf8, normalizing CRLF only if CR is present. */
160  function normalizeCRLF(buf: Buffer, len: number): string {
161    const s = buf.toString('utf8', 0, len)
162    return s.includes('\r') ? s.replaceAll('\r\n', '\n') : s
163  }
164  
165  /**
166   * Given an absolute match offset, read ±contextLines around it and return
167   * the decoded slice with its starting line number. Reuses `scratch` (the
168   * caller's scan buffer) for back/forward/output reads — zero new allocs
169   * when the context fits, one alloc otherwise.
170   */
171  async function sliceContext(
172    handle: FileHandle,
173    scratch: Buffer,
174    matchStart: number,
175    matchLen: number,
176    contextLines: number,
177    linesBeforeMatch: number,
178  ): Promise<EditContext> {
179    // Scan backward from matchStart to find contextLines prior newlines.
180    const backChunk = Math.min(matchStart, CHUNK_SIZE)
181    const { bytesRead: backRead } = await handle.read(
182      scratch,
183      0,
184      backChunk,
185      matchStart - backChunk,
186    )
187    let ctxStart = matchStart
188    let nlSeen = 0
189    for (let i = backRead - 1; i >= 0 && nlSeen <= contextLines; i--) {
190      if (scratch[i] === NL) {
191        nlSeen++
192        if (nlSeen > contextLines) break
193      }
194      ctxStart--
195    }
196    // Compute lineOffset now, before scratch is overwritten by the forward read.
197    const walkedBack = matchStart - ctxStart
198    const lineOffset =
199      linesBeforeMatch -
200      countNewlines(scratch, backRead - walkedBack, backRead) +
201      1
202  
203    // Scan forward from matchEnd to find contextLines trailing newlines.
204    const matchEnd = matchStart + matchLen
205    const { bytesRead: fwdRead } = await handle.read(
206      scratch,
207      0,
208      CHUNK_SIZE,
209      matchEnd,
210    )
211    let ctxEnd = matchEnd
212    nlSeen = 0
213    for (let i = 0; i < fwdRead; i++) {
214      ctxEnd++
215      if (scratch[i] === NL) {
216        nlSeen++
217        if (nlSeen >= contextLines + 1) break
218      }
219    }
220  
221    // Read the exact context range. Reuse scratch if it fits.
222    const len = ctxEnd - ctxStart
223    const out = len <= scratch.length ? scratch : Buffer.allocUnsafe(len)
224    const { bytesRead: outRead } = await handle.read(out, 0, len, ctxStart)
225  
226    return { content: normalizeCRLF(out, outRead), lineOffset, truncated: false }
227  }