/ src / utils / json.ts
json.ts
  1  import { open, readFile, stat } from 'fs/promises'
  2  import {
  3    applyEdits,
  4    modify,
  5    parse as parseJsonc,
  6  } from 'jsonc-parser/lib/esm/main.js'
  7  import { stripBOM } from './jsonRead.js'
  8  import { logError } from './log.js'
  9  import { memoizeWithLRU } from './memoize.js'
 10  import { jsonStringify } from './slowOperations.js'
 11  
 12  type CachedParse = { ok: true; value: unknown } | { ok: false }
 13  
 14  // Memoized inner parse. Uses a discriminated-union wrapper because:
 15  // 1. memoizeWithLRU requires NonNullable<unknown>, but JSON.parse can return
 16  //    null (e.g. JSON.parse("null")).
 17  // 2. Invalid JSON must also be cached — otherwise repeated calls with the same
 18  //    bad string re-parse and re-log every time (behavioral regression vs the
 19  //    old lodash memoize which wrapped the entire try/catch).
 20  // Bounded to 50 entries to prevent unbounded memory growth — previously this
 21  // used lodash memoize which cached every unique JSON string forever (settings,
 22  // .mcp.json, notebooks, tool results), causing a significant memory leak.
 23  // Note: shouldLogError is intentionally excluded from the cache key (matching
 24  // lodash memoize default resolver = first arg only).
 25  // Skip caching above this size — the LRU stores the full string as the key,
 26  // so a 200KB config file would pin ~10MB in #keyList across 50 slots. Large
 27  // inputs like ~/.claude.json also change between reads (numStartups bumps on
 28  // every CC startup), so the cache never hits anyway.
 29  const PARSE_CACHE_MAX_KEY_BYTES = 8 * 1024
 30  
 31  function parseJSONUncached(json: string, shouldLogError: boolean): CachedParse {
 32    try {
 33      return { ok: true, value: JSON.parse(stripBOM(json)) }
 34    } catch (e) {
 35      if (shouldLogError) {
 36        logError(e)
 37      }
 38      return { ok: false }
 39    }
 40  }
 41  
 42  const parseJSONCached = memoizeWithLRU(parseJSONUncached, json => json, 50)
 43  
 44  // Important: memoized for performance (LRU-bounded to 50 entries, small inputs only).
 45  export const safeParseJSON = Object.assign(
 46    function safeParseJSON(
 47      json: string | null | undefined,
 48      shouldLogError: boolean = true,
 49    ): unknown {
 50      if (!json) return null
 51      const result =
 52        json.length > PARSE_CACHE_MAX_KEY_BYTES
 53          ? parseJSONUncached(json, shouldLogError)
 54          : parseJSONCached(json, shouldLogError)
 55      return result.ok ? result.value : null
 56    },
 57    { cache: parseJSONCached.cache },
 58  )
 59  
 60  /**
 61   * Safely parse JSON with comments (jsonc).
 62   * This is useful for VS Code configuration files like keybindings.json
 63   * which support comments and other jsonc features.
 64   */
 65  export function safeParseJSONC(json: string | null | undefined): unknown {
 66    if (!json) {
 67      return null
 68    }
 69    try {
 70      // Strip BOM before parsing - PowerShell 5.x adds BOM to UTF-8 files
 71      return parseJsonc(stripBOM(json))
 72    } catch (e) {
 73      logError(e)
 74      return null
 75    }
 76  }
 77  
 78  /**
 79   * Modify a jsonc string by adding a new item to an array, preserving comments and formatting.
 80   * @param content The jsonc string to modify
 81   * @param newItem The new item to add to the array
 82   * @returns The modified jsonc string
 83   */
 84  /**
 85   * Bun.JSONL.parseChunk if available, false otherwise.
 86   * Supports both strings and Buffers, minimizing memory usage and copies.
 87   * Also handles BOM stripping internally.
 88   */
 89  type BunJSONLParseChunk = (
 90    data: string | Buffer,
 91    offset?: number,
 92  ) => { values: unknown[]; error: null | Error; read: number; done: boolean }
 93  
 94  const bunJSONLParse: BunJSONLParseChunk | false = (() => {
 95    if (typeof Bun === 'undefined') return false
 96    const b = Bun as Record<string, unknown>
 97    const jsonl = b.JSONL as Record<string, unknown> | undefined
 98    if (!jsonl?.parseChunk) return false
 99    return jsonl.parseChunk as BunJSONLParseChunk
100  })()
101  
102  function parseJSONLBun<T>(data: string | Buffer): T[] {
103    const parse = bunJSONLParse as BunJSONLParseChunk
104    const len = data.length
105    const result = parse(data)
106    if (!result.error || result.done || result.read >= len) {
107      return result.values as T[]
108    }
109    // Had an error mid-stream — collect what we got and keep going
110    let values = result.values as T[]
111    let offset = result.read
112    while (offset < len) {
113      const newlineIndex =
114        typeof data === 'string'
115          ? data.indexOf('\n', offset)
116          : data.indexOf(0x0a, offset)
117      if (newlineIndex === -1) break
118      offset = newlineIndex + 1
119      const next = parse(data, offset)
120      if (next.values.length > 0) {
121        values = values.concat(next.values as T[])
122      }
123      if (!next.error || next.done || next.read >= len) break
124      offset = next.read
125    }
126    return values
127  }
128  
129  function parseJSONLBuffer<T>(buf: Buffer): T[] {
130    const bufLen = buf.length
131    let start = 0
132  
133    // Strip UTF-8 BOM (EF BB BF)
134    if (buf[0] === 0xef && buf[1] === 0xbb && buf[2] === 0xbf) {
135      start = 3
136    }
137  
138    const results: T[] = []
139    while (start < bufLen) {
140      let end = buf.indexOf(0x0a, start)
141      if (end === -1) end = bufLen
142  
143      const line = buf.toString('utf8', start, end).trim()
144      start = end + 1
145      if (!line) continue
146      try {
147        results.push(JSON.parse(line) as T)
148      } catch {
149        // Skip malformed lines
150      }
151    }
152    return results
153  }
154  
155  function parseJSONLString<T>(data: string): T[] {
156    const stripped = stripBOM(data)
157    const len = stripped.length
158    let start = 0
159  
160    const results: T[] = []
161    while (start < len) {
162      let end = stripped.indexOf('\n', start)
163      if (end === -1) end = len
164  
165      const line = stripped.substring(start, end).trim()
166      start = end + 1
167      if (!line) continue
168      try {
169        results.push(JSON.parse(line) as T)
170      } catch {
171        // Skip malformed lines
172      }
173    }
174    return results
175  }
176  
177  /**
178   * Parses JSONL data from a string or Buffer, skipping malformed lines.
179   * Uses Bun.JSONL.parseChunk when available for better performance,
180   * falls back to indexOf-based scanning otherwise.
181   */
182  export function parseJSONL<T>(data: string | Buffer): T[] {
183    if (bunJSONLParse) {
184      return parseJSONLBun<T>(data)
185    }
186    if (typeof data === 'string') {
187      return parseJSONLString<T>(data)
188    }
189    return parseJSONLBuffer<T>(data)
190  }
191  
192  const MAX_JSONL_READ_BYTES = 100 * 1024 * 1024
193  
194  /**
195   * Reads and parses a JSONL file, reading at most the last 100 MB.
196   * For files larger than 100 MB, reads the tail and skips the first partial line.
197   *
198   * 100 MB is more than sufficient since the longest context window we support
199   * is ~2M tokens, which is well under 100 MB of JSONL.
200   */
201  export async function readJSONLFile<T>(filePath: string): Promise<T[]> {
202    const { size } = await stat(filePath)
203    if (size <= MAX_JSONL_READ_BYTES) {
204      return parseJSONL<T>(await readFile(filePath))
205    }
206    await using fd = await open(filePath, 'r')
207    const buf = Buffer.allocUnsafe(MAX_JSONL_READ_BYTES)
208    let totalRead = 0
209    const fileOffset = size - MAX_JSONL_READ_BYTES
210    while (totalRead < MAX_JSONL_READ_BYTES) {
211      const { bytesRead } = await fd.read(
212        buf,
213        totalRead,
214        MAX_JSONL_READ_BYTES - totalRead,
215        fileOffset + totalRead,
216      )
217      if (bytesRead === 0) break
218      totalRead += bytesRead
219    }
220    // Skip the first partial line
221    const newlineIndex = buf.indexOf(0x0a)
222    if (newlineIndex !== -1 && newlineIndex < totalRead - 1) {
223      return parseJSONL<T>(buf.subarray(newlineIndex + 1, totalRead))
224    }
225    return parseJSONL<T>(buf.subarray(0, totalRead))
226  }
227  
228  export function addItemToJSONCArray(content: string, newItem: unknown): string {
229    try {
230      // If the content is empty or whitespace, create a new JSON file
231      if (!content || content.trim() === '') {
232        return jsonStringify([newItem], null, 4)
233      }
234  
235      // Strip BOM before parsing - PowerShell 5.x adds BOM to UTF-8 files
236      const cleanContent = stripBOM(content)
237  
238      // Parse the content to check if it's valid JSON
239      const parsedContent = parseJsonc(cleanContent)
240  
241      // If the parsed content is a valid array, modify it
242      if (Array.isArray(parsedContent)) {
243        // Get the length of the array
244        const arrayLength = parsedContent.length
245  
246        // Determine if we are dealing with an empty array
247        const isEmpty = arrayLength === 0
248  
249        // If it's an empty array we want to add at index 0, otherwise append to the end
250        const insertPath = isEmpty ? [0] : [arrayLength]
251  
252        // Generate edits - we're using isArrayInsertion to add a new item without overwriting existing ones
253        const edits = modify(cleanContent, insertPath, newItem, {
254          formattingOptions: { insertSpaces: true, tabSize: 4 },
255          isArrayInsertion: true,
256        })
257  
258        // If edits could not be generated, fall back to manual JSON string manipulation
259        if (!edits || edits.length === 0) {
260          const copy = [...parsedContent, newItem]
261          return jsonStringify(copy, null, 4)
262        }
263  
264        // Apply the edits to preserve comments (use cleanContent without BOM)
265        return applyEdits(cleanContent, edits)
266      }
267      // If it's not an array at all, create a new array with the item
268      else {
269        // If the content exists but is not an array, we'll replace it completely
270        return jsonStringify([newItem], null, 4)
271      }
272    } catch (e) {
273      // If parsing fails for any reason, log the error and fallback to creating a new JSON array
274      logError(e)
275      return jsonStringify([newItem], null, 4)
276    }
277  }