/ utils / sessionStoragePortable.ts
sessionStoragePortable.ts
  1  /**
  2   * Portable session storage utilities.
  3   *
  4   * Pure Node.js — no internal dependencies on logging, experiments, or feature
  5   * flags. Shared between the CLI (src/utils/sessionStorage.ts) and the VS Code
  6   * extension (packages/claude-vscode/src/common-host/sessionStorage.ts).
  7   */
  8  
  9  import type { UUID } from 'crypto'
 10  import { open as fsOpen, readdir, realpath, stat } from 'fs/promises'
 11  import { join } from 'path'
 12  import { getClaudeConfigHomeDir } from './envUtils.js'
 13  import { getWorktreePathsPortable } from './getWorktreePathsPortable.js'
 14  import { djb2Hash } from './hash.js'
 15  
 16  /** Size of the head/tail buffer for lite metadata reads. */
 17  export const LITE_READ_BUF_SIZE = 65536
 18  
 19  // ---------------------------------------------------------------------------
 20  // UUID validation
 21  // ---------------------------------------------------------------------------
 22  
 23  const uuidRegex =
 24    /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i
 25  
 26  export function validateUuid(maybeUuid: unknown): UUID | null {
 27    if (typeof maybeUuid !== 'string') return null
 28    return uuidRegex.test(maybeUuid) ? (maybeUuid as UUID) : null
 29  }
 30  
 31  // ---------------------------------------------------------------------------
 32  // JSON string field extraction — no full parse, works on truncated lines
 33  // ---------------------------------------------------------------------------
 34  
 35  /**
 36   * Unescape a JSON string value extracted as raw text.
 37   * Only allocates a new string when escape sequences are present.
 38   */
 39  export function unescapeJsonString(raw: string): string {
 40    if (!raw.includes('\\')) return raw
 41    try {
 42      return JSON.parse(`"${raw}"`)
 43    } catch {
 44      return raw
 45    }
 46  }
 47  
 48  /**
 49   * Extracts a simple JSON string field value from raw text without full parsing.
 50   * Looks for `"key":"value"` or `"key": "value"` patterns.
 51   * Returns the first match, or undefined if not found.
 52   */
 53  export function extractJsonStringField(
 54    text: string,
 55    key: string,
 56  ): string | undefined {
 57    const patterns = [`"${key}":"`, `"${key}": "`]
 58    for (const pattern of patterns) {
 59      const idx = text.indexOf(pattern)
 60      if (idx < 0) continue
 61  
 62      const valueStart = idx + pattern.length
 63      let i = valueStart
 64      while (i < text.length) {
 65        if (text[i] === '\\') {
 66          i += 2
 67          continue
 68        }
 69        if (text[i] === '"') {
 70          return unescapeJsonString(text.slice(valueStart, i))
 71        }
 72        i++
 73      }
 74    }
 75    return undefined
 76  }
 77  
 78  /**
 79   * Like extractJsonStringField but finds the LAST occurrence.
 80   * Useful for fields that are appended (customTitle, tag, etc.).
 81   */
 82  export function extractLastJsonStringField(
 83    text: string,
 84    key: string,
 85  ): string | undefined {
 86    const patterns = [`"${key}":"`, `"${key}": "`]
 87    let lastValue: string | undefined
 88    for (const pattern of patterns) {
 89      let searchFrom = 0
 90      while (true) {
 91        const idx = text.indexOf(pattern, searchFrom)
 92        if (idx < 0) break
 93  
 94        const valueStart = idx + pattern.length
 95        let i = valueStart
 96        while (i < text.length) {
 97          if (text[i] === '\\') {
 98            i += 2
 99            continue
100          }
101          if (text[i] === '"') {
102            lastValue = unescapeJsonString(text.slice(valueStart, i))
103            break
104          }
105          i++
106        }
107        searchFrom = i + 1
108      }
109    }
110    return lastValue
111  }
112  
113  // ---------------------------------------------------------------------------
114  // First prompt extraction from head chunk
115  // ---------------------------------------------------------------------------
116  
117  /**
118   * Pattern matching auto-generated or system messages that should be skipped
119   * when looking for the first meaningful user prompt. Matches anything that
120   * starts with a lowercase XML-like tag (IDE context, hook output, task
121   * notifications, channel messages, etc.) or a synthetic interrupt marker.
122   */
123  const SKIP_FIRST_PROMPT_PATTERN =
124    /^(?:\s*<[a-z][\w-]*[\s>]|\[Request interrupted by user[^\]]*\])/
125  
126  const COMMAND_NAME_RE = /<command-name>(.*?)<\/command-name>/
127  
128  /**
129   * Extracts the first meaningful user prompt from a JSONL head chunk.
130   *
131   * Skips tool_result messages, isMeta, isCompactSummary, command-name messages,
132   * and auto-generated patterns (session hooks, tick, IDE metadata, etc.).
133   * Truncates to 200 chars.
134   */
135  export function extractFirstPromptFromHead(head: string): string {
136    let start = 0
137    let commandFallback = ''
138    while (start < head.length) {
139      const newlineIdx = head.indexOf('\n', start)
140      const line =
141        newlineIdx >= 0 ? head.slice(start, newlineIdx) : head.slice(start)
142      start = newlineIdx >= 0 ? newlineIdx + 1 : head.length
143  
144      if (!line.includes('"type":"user"') && !line.includes('"type": "user"'))
145        continue
146      if (line.includes('"tool_result"')) continue
147      if (line.includes('"isMeta":true') || line.includes('"isMeta": true'))
148        continue
149      if (
150        line.includes('"isCompactSummary":true') ||
151        line.includes('"isCompactSummary": true')
152      )
153        continue
154  
155      try {
156        const entry = JSON.parse(line) as Record<string, unknown>
157        if (entry.type !== 'user') continue
158  
159        const message = entry.message as Record<string, unknown> | undefined
160        if (!message) continue
161  
162        const content = message.content
163        const texts: string[] = []
164        if (typeof content === 'string') {
165          texts.push(content)
166        } else if (Array.isArray(content)) {
167          for (const block of content as Record<string, unknown>[]) {
168            if (block.type === 'text' && typeof block.text === 'string') {
169              texts.push(block.text as string)
170            }
171          }
172        }
173  
174        for (const raw of texts) {
175          let result = raw.replace(/\n/g, ' ').trim()
176          if (!result) continue
177  
178          // Skip slash-command messages but remember first as fallback
179          const cmdMatch = COMMAND_NAME_RE.exec(result)
180          if (cmdMatch) {
181            if (!commandFallback) commandFallback = cmdMatch[1]!
182            continue
183          }
184  
185          // Format bash input with ! prefix before the generic XML skip
186          const bashMatch = /<bash-input>([\s\S]*?)<\/bash-input>/.exec(result)
187          if (bashMatch) return `! ${bashMatch[1]!.trim()}`
188  
189          if (SKIP_FIRST_PROMPT_PATTERN.test(result)) continue
190  
191          if (result.length > 200) {
192            result = result.slice(0, 200).trim() + '\u2026'
193          }
194          return result
195        }
196      } catch {
197        continue
198      }
199    }
200    if (commandFallback) return commandFallback
201    return ''
202  }
203  
204  // ---------------------------------------------------------------------------
205  // File I/O — read head and tail of a file
206  // ---------------------------------------------------------------------------
207  
208  /**
209   * Reads the first and last LITE_READ_BUF_SIZE bytes of a file.
210   *
211   * For small files where head covers tail, `tail === head`.
212   * Accepts a shared Buffer to avoid per-file allocation overhead.
213   * Returns `{ head: '', tail: '' }` on any error.
214   */
215  export async function readHeadAndTail(
216    filePath: string,
217    fileSize: number,
218    buf: Buffer,
219  ): Promise<{ head: string; tail: string }> {
220    try {
221      const fh = await fsOpen(filePath, 'r')
222      try {
223        const headResult = await fh.read(buf, 0, LITE_READ_BUF_SIZE, 0)
224        if (headResult.bytesRead === 0) return { head: '', tail: '' }
225  
226        const head = buf.toString('utf8', 0, headResult.bytesRead)
227  
228        const tailOffset = Math.max(0, fileSize - LITE_READ_BUF_SIZE)
229        let tail = head
230        if (tailOffset > 0) {
231          const tailResult = await fh.read(buf, 0, LITE_READ_BUF_SIZE, tailOffset)
232          tail = buf.toString('utf8', 0, tailResult.bytesRead)
233        }
234  
235        return { head, tail }
236      } finally {
237        await fh.close()
238      }
239    } catch {
240      return { head: '', tail: '' }
241    }
242  }
243  
244  export type LiteSessionFile = {
245    mtime: number
246    size: number
247    head: string
248    tail: string
249  }
250  
251  /**
252   * Opens a single session file, stats it, and reads head + tail in one fd.
253   * Allocates its own buffer — safe for concurrent use with Promise.all.
254   * Returns null on any error.
255   */
256  export async function readSessionLite(
257    filePath: string,
258  ): Promise<LiteSessionFile | null> {
259    try {
260      const fh = await fsOpen(filePath, 'r')
261      try {
262        const stat = await fh.stat()
263        const buf = Buffer.allocUnsafe(LITE_READ_BUF_SIZE)
264        const headResult = await fh.read(buf, 0, LITE_READ_BUF_SIZE, 0)
265        if (headResult.bytesRead === 0) return null
266  
267        const head = buf.toString('utf8', 0, headResult.bytesRead)
268        const tailOffset = Math.max(0, stat.size - LITE_READ_BUF_SIZE)
269        let tail = head
270        if (tailOffset > 0) {
271          const tailResult = await fh.read(buf, 0, LITE_READ_BUF_SIZE, tailOffset)
272          tail = buf.toString('utf8', 0, tailResult.bytesRead)
273        }
274  
275        return { mtime: stat.mtime.getTime(), size: stat.size, head, tail }
276      } finally {
277        await fh.close()
278      }
279    } catch {
280      return null
281    }
282  }
283  
284  // ---------------------------------------------------------------------------
285  // Path sanitization
286  // ---------------------------------------------------------------------------
287  
288  /**
289   * Maximum length for a single filesystem path component (directory or file name).
290   * Most filesystems (ext4, APFS, NTFS) limit individual components to 255 bytes.
291   * We use 200 to leave room for the hash suffix and separator.
292   */
293  export const MAX_SANITIZED_LENGTH = 200
294  
295  function simpleHash(str: string): string {
296    return Math.abs(djb2Hash(str)).toString(36)
297  }
298  
299  /**
300   * Makes a string safe for use as a directory or file name.
301   * Replaces all non-alphanumeric characters with hyphens.
302   * This ensures compatibility across all platforms, including Windows
303   * where characters like colons are reserved.
304   *
305   * For deeply nested paths that would exceed filesystem limits (255 bytes),
306   * truncates and appends a hash suffix for uniqueness.
307   *
308   * @param name - The string to make safe (e.g., '/Users/foo/my-project' or 'plugin:name:server')
309   * @returns A safe name (e.g., '-Users-foo-my-project' or 'plugin-name-server')
310   */
311  export function sanitizePath(name: string): string {
312    const sanitized = name.replace(/[^a-zA-Z0-9]/g, '-')
313    if (sanitized.length <= MAX_SANITIZED_LENGTH) {
314      return sanitized
315    }
316    const hash =
317      typeof Bun !== 'undefined' ? Bun.hash(name).toString(36) : simpleHash(name)
318    return `${sanitized.slice(0, MAX_SANITIZED_LENGTH)}-${hash}`
319  }
320  
321  // ---------------------------------------------------------------------------
322  // Project directory discovery (shared by listSessions & getSessionMessages)
323  // ---------------------------------------------------------------------------
324  
325  export function getProjectsDir(): string {
326    return join(getClaudeConfigHomeDir(), 'projects')
327  }
328  
329  export function getProjectDir(projectDir: string): string {
330    return join(getProjectsDir(), sanitizePath(projectDir))
331  }
332  
333  /**
334   * Resolves a directory path to its canonical form using realpath + NFC
335   * normalization. Falls back to NFC-only if realpath fails (e.g., the
336   * directory doesn't exist yet). Ensures symlinked paths (e.g.,
337   * /tmp → /private/tmp on macOS) resolve to the same project directory.
338   */
339  export async function canonicalizePath(dir: string): Promise<string> {
340    try {
341      return (await realpath(dir)).normalize('NFC')
342    } catch {
343      return dir.normalize('NFC')
344    }
345  }
346  
347  /**
348   * Finds the project directory for a given path, tolerating hash mismatches
349   * for long paths (>200 chars). The CLI uses Bun.hash while the SDK under
350   * Node.js uses simpleHash — for paths that exceed MAX_SANITIZED_LENGTH,
351   * these produce different directory suffixes. This function falls back to
352   * prefix-based scanning when the exact match doesn't exist.
353   */
354  export async function findProjectDir(
355    projectPath: string,
356  ): Promise<string | undefined> {
357    const exact = getProjectDir(projectPath)
358    try {
359      await readdir(exact)
360      return exact
361    } catch {
362      // Exact match failed — for short paths this means no sessions exist.
363      // For long paths, try prefix matching to handle hash mismatches.
364      const sanitized = sanitizePath(projectPath)
365      if (sanitized.length <= MAX_SANITIZED_LENGTH) {
366        return undefined
367      }
368      const prefix = sanitized.slice(0, MAX_SANITIZED_LENGTH)
369      const projectsDir = getProjectsDir()
370      try {
371        const dirents = await readdir(projectsDir, { withFileTypes: true })
372        const match = dirents.find(
373          d => d.isDirectory() && d.name.startsWith(prefix + '-'),
374        )
375        return match ? join(projectsDir, match.name) : undefined
376      } catch {
377        return undefined
378      }
379    }
380  }
381  
382  /**
383   * Resolve a sessionId to its on-disk JSONL file path.
384   *
385   * When `dir` is provided: canonicalize it, look in that project's directory
386   * (with findProjectDir fallback for Bun/Node hash mismatches), then fall back
387   * to sibling git worktrees. `projectPath` in the result is the canonical
388   * user-facing directory the file was found under.
389   *
390   * When `dir` is omitted: scan all project directories under ~/.claude/projects/.
391   * `projectPath` is undefined in this case (no meaningful project path to report).
392   *
393   * Existence is checked by stat (operate-then-catch-ENOENT, no existsSync).
394   * Zero-byte files are treated as not-found so callers continue searching past
395   * a truncated copy to find a valid one in a sibling directory.
396   *
397   * `fileSize` is returned so callers (loadSessionBuffer) don't need to re-stat.
398   *
399   * Shared by getSessionInfoImpl and getSessionMessagesImpl — the caller
400   * invokes its own reader (readSessionLite / loadSessionBuffer) on the
401   * resolved path.
402   */
403  export async function resolveSessionFilePath(
404    sessionId: string,
405    dir?: string,
406  ): Promise<
407    | { filePath: string; projectPath: string | undefined; fileSize: number }
408    | undefined
409  > {
410    const fileName = `${sessionId}.jsonl`
411  
412    if (dir) {
413      const canonical = await canonicalizePath(dir)
414      const projectDir = await findProjectDir(canonical)
415      if (projectDir) {
416        const filePath = join(projectDir, fileName)
417        try {
418          const s = await stat(filePath)
419          if (s.size > 0)
420            return { filePath, projectPath: canonical, fileSize: s.size }
421        } catch {
422          // ENOENT/EACCES — keep searching
423        }
424      }
425      // Worktree fallback — sessions may live under a different worktree root
426      let worktreePaths: string[]
427      try {
428        worktreePaths = await getWorktreePathsPortable(canonical)
429      } catch {
430        worktreePaths = []
431      }
432      for (const wt of worktreePaths) {
433        if (wt === canonical) continue
434        const wtProjectDir = await findProjectDir(wt)
435        if (!wtProjectDir) continue
436        const filePath = join(wtProjectDir, fileName)
437        try {
438          const s = await stat(filePath)
439          if (s.size > 0) return { filePath, projectPath: wt, fileSize: s.size }
440        } catch {
441          // ENOENT/EACCES — keep searching
442        }
443      }
444      return undefined
445    }
446  
447    // No dir — scan all project directories
448    const projectsDir = getProjectsDir()
449    let dirents: string[]
450    try {
451      dirents = await readdir(projectsDir)
452    } catch {
453      return undefined
454    }
455    for (const name of dirents) {
456      const filePath = join(projectsDir, name, fileName)
457      try {
458        const s = await stat(filePath)
459        if (s.size > 0)
460          return { filePath, projectPath: undefined, fileSize: s.size }
461      } catch {
462        // ENOENT/ENOTDIR — not in this project, keep scanning
463      }
464    }
465    return undefined
466  }
467  
468  // ---------------------------------------------------------------------------
469  // Compact-boundary chunked read (shared by loadTranscriptFile & SDK getSessionMessages)
470  // ---------------------------------------------------------------------------
471  
472  /** Chunk size for the forward transcript reader. 1 MB balances I/O calls vs buffer growth. */
473  const TRANSCRIPT_READ_CHUNK_SIZE = 1024 * 1024
474  
475  /**
476   * File size below which precompact filtering is skipped.
477   * Large sessions (>5 MB) almost always have compact boundaries — they got big
478   * because of many turns triggering auto-compact.
479   */
480  export const SKIP_PRECOMPACT_THRESHOLD = 5 * 1024 * 1024
481  
482  /** Marker bytes searched for when locating the boundary. Lazy: allocated on
483   * first use, not at module load. Most sessions never resume. */
484  let _compactBoundaryMarker: Buffer | undefined
485  function compactBoundaryMarker(): Buffer {
486    return (_compactBoundaryMarker ??= Buffer.from('"compact_boundary"'))
487  }
488  
489  /**
490   * Confirm a byte-matched line is a real compact_boundary (marker can appear
491   * inside user content) and check for preservedSegment.
492   */
493  function parseBoundaryLine(
494    line: string,
495  ): { hasPreservedSegment: boolean } | null {
496    try {
497      const parsed = JSON.parse(line) as {
498        type?: string
499        subtype?: string
500        compactMetadata?: { preservedSegment?: unknown }
501      }
502      if (parsed.type !== 'system' || parsed.subtype !== 'compact_boundary') {
503        return null
504      }
505      return {
506        hasPreservedSegment: Boolean(parsed.compactMetadata?.preservedSegment),
507      }
508    } catch {
509      return null
510    }
511  }
512  
513  /**
514   * Single forward chunked read for the --resume load path. Attr-snap lines
515   * are skipped at the fd level; compact boundaries truncate in-stream. Peak
516   * is the output size, not the file size.
517   *
518   * The surviving (last) attr-snap is appended at EOF instead of in-place;
519   * restoreAttributionStateFromSnapshots only reads [length-1] so position
520   * doesn't matter.
521   */
522  
523  type Sink = { buf: Buffer; len: number; cap: number }
524  
525  function sinkWrite(s: Sink, src: Buffer, start: number, end: number): void {
526    const n = end - start
527    if (n <= 0) return
528    if (s.len + n > s.buf.length) {
529      const grown = Buffer.allocUnsafe(
530        Math.min(Math.max(s.buf.length * 2, s.len + n), s.cap),
531      )
532      s.buf.copy(grown, 0, 0, s.len)
533      s.buf = grown
534    }
535    src.copy(s.buf, s.len, start, end)
536    s.len += n
537  }
538  
539  function hasPrefix(
540    src: Buffer,
541    prefix: Buffer,
542    at: number,
543    end: number,
544  ): boolean {
545    return (
546      end - at >= prefix.length &&
547      src.compare(prefix, 0, prefix.length, at, at + prefix.length) === 0
548    )
549  }
550  
551  const ATTR_SNAP_PREFIX = Buffer.from('{"type":"attribution-snapshot"')
552  const SYSTEM_PREFIX = Buffer.from('{"type":"system"')
553  const LF = 0x0a
554  const LF_BYTE = Buffer.from([LF])
555  const BOUNDARY_SEARCH_BOUND = 256 // marker sits ~28 bytes in; 256 is slack
556  
557  type LoadState = {
558    out: Sink
559    boundaryStartOffset: number
560    hasPreservedSegment: boolean
561    lastSnapSrc: Buffer | null // most-recent attr-snap, appended at EOF
562    lastSnapLen: number
563    lastSnapBuf: Buffer | undefined
564    bufFileOff: number // file offset of buf[0]
565    carryLen: number
566    carryBuf: Buffer | undefined
567    straddleSnapCarryLen: number // per-chunk; reset by processStraddle
568    straddleSnapTailEnd: number
569  }
570  
571  // Line spanning the chunk seam. 0 = fall through to concat.
572  function processStraddle(
573    s: LoadState,
574    chunk: Buffer,
575    bytesRead: number,
576  ): number {
577    s.straddleSnapCarryLen = 0
578    s.straddleSnapTailEnd = 0
579    if (s.carryLen === 0) return 0
580    const cb = s.carryBuf!
581    const firstNl = chunk.indexOf(LF)
582    if (firstNl === -1 || firstNl >= bytesRead) return 0
583    const tailEnd = firstNl + 1
584    if (hasPrefix(cb, ATTR_SNAP_PREFIX, 0, s.carryLen)) {
585      s.straddleSnapCarryLen = s.carryLen
586      s.straddleSnapTailEnd = tailEnd
587      s.lastSnapSrc = null
588    } else if (s.carryLen < ATTR_SNAP_PREFIX.length) {
589      return 0 // too short to rule out attr-snap
590    } else {
591      if (hasPrefix(cb, SYSTEM_PREFIX, 0, s.carryLen)) {
592        const hit = parseBoundaryLine(
593          cb.toString('utf-8', 0, s.carryLen) +
594            chunk.toString('utf-8', 0, firstNl),
595        )
596        if (hit?.hasPreservedSegment) {
597          s.hasPreservedSegment = true
598        } else if (hit) {
599          s.out.len = 0
600          s.boundaryStartOffset = s.bufFileOff
601          s.hasPreservedSegment = false
602          s.lastSnapSrc = null
603        }
604      }
605      sinkWrite(s.out, cb, 0, s.carryLen)
606      sinkWrite(s.out, chunk, 0, tailEnd)
607    }
608    s.bufFileOff += s.carryLen + tailEnd
609    s.carryLen = 0
610    return tailEnd
611  }
612  
613  // Strip attr-snaps, truncate on boundaries. Kept lines write as runs.
614  function scanChunkLines(
615    s: LoadState,
616    buf: Buffer,
617    boundaryMarker: Buffer,
618  ): { lastSnapStart: number; lastSnapEnd: number; trailStart: number } {
619    let boundaryAt = buf.indexOf(boundaryMarker)
620    let runStart = 0
621    let lineStart = 0
622    let lastSnapStart = -1
623    let lastSnapEnd = -1
624    let nl = buf.indexOf(LF)
625    while (nl !== -1) {
626      const lineEnd = nl + 1
627      if (boundaryAt !== -1 && boundaryAt < lineStart) {
628        boundaryAt = buf.indexOf(boundaryMarker, lineStart)
629      }
630      if (hasPrefix(buf, ATTR_SNAP_PREFIX, lineStart, lineEnd)) {
631        sinkWrite(s.out, buf, runStart, lineStart)
632        lastSnapStart = lineStart
633        lastSnapEnd = lineEnd
634        runStart = lineEnd
635      } else if (
636        boundaryAt >= lineStart &&
637        boundaryAt < Math.min(lineStart + BOUNDARY_SEARCH_BOUND, lineEnd)
638      ) {
639        const hit = parseBoundaryLine(buf.toString('utf-8', lineStart, nl))
640        if (hit?.hasPreservedSegment) {
641          s.hasPreservedSegment = true // don't truncate; preserved msgs already in output
642        } else if (hit) {
643          s.out.len = 0
644          s.boundaryStartOffset = s.bufFileOff + lineStart
645          s.hasPreservedSegment = false
646          s.lastSnapSrc = null
647          lastSnapStart = -1
648          s.straddleSnapCarryLen = 0
649          runStart = lineStart
650        }
651        boundaryAt = buf.indexOf(
652          boundaryMarker,
653          boundaryAt + boundaryMarker.length,
654        )
655      }
656      lineStart = lineEnd
657      nl = buf.indexOf(LF, lineStart)
658    }
659    sinkWrite(s.out, buf, runStart, lineStart)
660    return { lastSnapStart, lastSnapEnd, trailStart: lineStart }
661  }
662  
663  // In-buf snap wins over straddle (later in file). carryBuf still valid here.
664  function captureSnap(
665    s: LoadState,
666    buf: Buffer,
667    chunk: Buffer,
668    lastSnapStart: number,
669    lastSnapEnd: number,
670  ): void {
671    if (lastSnapStart !== -1) {
672      s.lastSnapLen = lastSnapEnd - lastSnapStart
673      if (s.lastSnapBuf === undefined || s.lastSnapLen > s.lastSnapBuf.length) {
674        s.lastSnapBuf = Buffer.allocUnsafe(s.lastSnapLen)
675      }
676      buf.copy(s.lastSnapBuf, 0, lastSnapStart, lastSnapEnd)
677      s.lastSnapSrc = s.lastSnapBuf
678    } else if (s.straddleSnapCarryLen > 0) {
679      s.lastSnapLen = s.straddleSnapCarryLen + s.straddleSnapTailEnd
680      if (s.lastSnapBuf === undefined || s.lastSnapLen > s.lastSnapBuf.length) {
681        s.lastSnapBuf = Buffer.allocUnsafe(s.lastSnapLen)
682      }
683      s.carryBuf!.copy(s.lastSnapBuf, 0, 0, s.straddleSnapCarryLen)
684      chunk.copy(s.lastSnapBuf, s.straddleSnapCarryLen, 0, s.straddleSnapTailEnd)
685      s.lastSnapSrc = s.lastSnapBuf
686    }
687  }
688  
689  function captureCarry(s: LoadState, buf: Buffer, trailStart: number): void {
690    s.carryLen = buf.length - trailStart
691    if (s.carryLen > 0) {
692      if (s.carryBuf === undefined || s.carryLen > s.carryBuf.length) {
693        s.carryBuf = Buffer.allocUnsafe(s.carryLen)
694      }
695      buf.copy(s.carryBuf, 0, trailStart, buf.length)
696    }
697  }
698  
699  function finalizeOutput(s: LoadState): void {
700    if (s.carryLen > 0) {
701      const cb = s.carryBuf!
702      if (hasPrefix(cb, ATTR_SNAP_PREFIX, 0, s.carryLen)) {
703        s.lastSnapSrc = cb
704        s.lastSnapLen = s.carryLen
705      } else {
706        sinkWrite(s.out, cb, 0, s.carryLen)
707      }
708    }
709    if (s.lastSnapSrc) {
710      if (s.out.len > 0 && s.out.buf[s.out.len - 1] !== LF) {
711        sinkWrite(s.out, LF_BYTE, 0, 1)
712      }
713      sinkWrite(s.out, s.lastSnapSrc, 0, s.lastSnapLen)
714    }
715  }
716  
717  export async function readTranscriptForLoad(
718    filePath: string,
719    fileSize: number,
720  ): Promise<{
721    boundaryStartOffset: number
722    postBoundaryBuf: Buffer
723    hasPreservedSegment: boolean
724  }> {
725    const boundaryMarker = compactBoundaryMarker()
726    const CHUNK_SIZE = TRANSCRIPT_READ_CHUNK_SIZE
727  
728    const s: LoadState = {
729      out: {
730        // Gated callers enter with fileSize > 5MB, so min(fileSize, 8MB) lands
731        // in [5, 8]MB; large boundaryless sessions (24-31MB output) take 2
732        // grows. Ungated callers (attribution.ts) pass small files too — the
733        // min just right-sizes the initial buf, no grows.
734        buf: Buffer.allocUnsafe(Math.min(fileSize, 8 * 1024 * 1024)),
735        len: 0,
736        // +1: finalizeOutput may insert one LF between a non-LF-terminated
737        // carry and the reordered last attr-snap (crash-truncated file).
738        cap: fileSize + 1,
739      },
740      boundaryStartOffset: 0,
741      hasPreservedSegment: false,
742      lastSnapSrc: null,
743      lastSnapLen: 0,
744      lastSnapBuf: undefined,
745      bufFileOff: 0,
746      carryLen: 0,
747      carryBuf: undefined,
748      straddleSnapCarryLen: 0,
749      straddleSnapTailEnd: 0,
750    }
751  
752    const chunk = Buffer.allocUnsafe(CHUNK_SIZE)
753    const fd = await fsOpen(filePath, 'r')
754    try {
755      let filePos = 0
756      while (filePos < fileSize) {
757        const { bytesRead } = await fd.read(
758          chunk,
759          0,
760          Math.min(CHUNK_SIZE, fileSize - filePos),
761          filePos,
762        )
763        if (bytesRead === 0) break
764        filePos += bytesRead
765  
766        const chunkOff = processStraddle(s, chunk, bytesRead)
767  
768        let buf: Buffer
769        if (s.carryLen > 0) {
770          const bufLen = s.carryLen + (bytesRead - chunkOff)
771          buf = Buffer.allocUnsafe(bufLen)
772          s.carryBuf!.copy(buf, 0, 0, s.carryLen)
773          chunk.copy(buf, s.carryLen, chunkOff, bytesRead)
774        } else {
775          buf = chunk.subarray(chunkOff, bytesRead)
776        }
777  
778        const r = scanChunkLines(s, buf, boundaryMarker)
779        captureSnap(s, buf, chunk, r.lastSnapStart, r.lastSnapEnd)
780        captureCarry(s, buf, r.trailStart)
781        s.bufFileOff += r.trailStart
782      }
783      finalizeOutput(s)
784    } finally {
785      await fd.close()
786    }
787  
788    return {
789      boundaryStartOffset: s.boundaryStartOffset,
790      postBoundaryBuf: s.out.buf.subarray(0, s.out.len),
791      hasPreservedSegment: s.hasPreservedSegment,
792    }
793  }