/ src / utils / statsCache.ts
statsCache.ts
  1  import { feature } from 'bun:bundle'
  2  import { randomBytes } from 'crypto'
  3  import { open } from 'fs/promises'
  4  import { join } from 'path'
  5  import type { ModelUsage } from '../entrypoints/agentSdkTypes.js'
  6  import { logForDebugging } from './debug.js'
  7  import { getClaudeConfigHomeDir } from './envUtils.js'
  8  import { errorMessage } from './errors.js'
  9  import { getFsImplementation } from './fsOperations.js'
 10  import { logError } from './log.js'
 11  import { jsonParse, jsonStringify } from './slowOperations.js'
 12  import type { DailyActivity, DailyModelTokens, SessionStats } from './stats.js'
 13  
 14  export const STATS_CACHE_VERSION = 3
 15  const MIN_MIGRATABLE_VERSION = 1
 16  const STATS_CACHE_FILENAME = 'stats-cache.json'
 17  
 18  /**
 19   * Simple in-memory lock to prevent concurrent cache operations.
 20   */
 21  let statsCacheLockPromise: Promise<void> | null = null
 22  
 23  /**
 24   * Execute a function while holding the stats cache lock.
 25   * Only one operation can hold the lock at a time.
 26   */
 27  export async function withStatsCacheLock<T>(fn: () => Promise<T>): Promise<T> {
 28    // Wait for any existing lock to be released
 29    while (statsCacheLockPromise) {
 30      await statsCacheLockPromise
 31    }
 32  
 33    // Create our lock
 34    let releaseLock: (() => void) | undefined
 35    statsCacheLockPromise = new Promise<void>(resolve => {
 36      releaseLock = resolve
 37    })
 38  
 39    try {
 40      return await fn()
 41    } finally {
 42      // Release the lock
 43      statsCacheLockPromise = null
 44      releaseLock?.()
 45    }
 46  }
 47  
 48  /**
 49   * Persisted stats cache stored on disk.
 50   * Contains aggregated historical stats that won't change.
 51   * All fields are bounded to prevent unbounded file growth.
 52   */
 53  export type PersistedStatsCache = {
 54    version: number
 55    // Last date that was fully computed (YYYY-MM-DD format)
 56    // Stats up to and including this date are considered complete
 57    lastComputedDate: string | null
 58    // Daily aggregates needed for heatmap, streaks, trends (bounded by days)
 59    dailyActivity: DailyActivity[]
 60    dailyModelTokens: DailyModelTokens[]
 61    // Model usage aggregated (bounded by number of models)
 62    modelUsage: { [modelName: string]: ModelUsage }
 63    // Session aggregates (replaces unbounded sessionStats array)
 64    totalSessions: number
 65    totalMessages: number
 66    longestSession: SessionStats | null
 67    // First session date ever recorded
 68    firstSessionDate: string | null
 69    // Hour counts for peak hour calculation (bounded to 24 entries)
 70    hourCounts: { [hour: number]: number }
 71    // Speculation time saved across all sessions
 72    totalSpeculationTimeSavedMs: number
 73    // Shot distribution: map of shot count → number of sessions (ant-only)
 74    shotDistribution?: { [shotCount: number]: number }
 75  }
 76  
 77  export function getStatsCachePath(): string {
 78    return join(getClaudeConfigHomeDir(), STATS_CACHE_FILENAME)
 79  }
 80  
 81  function getEmptyCache(): PersistedStatsCache {
 82    return {
 83      version: STATS_CACHE_VERSION,
 84      lastComputedDate: null,
 85      dailyActivity: [],
 86      dailyModelTokens: [],
 87      modelUsage: {},
 88      totalSessions: 0,
 89      totalMessages: 0,
 90      longestSession: null,
 91      firstSessionDate: null,
 92      hourCounts: {},
 93      totalSpeculationTimeSavedMs: 0,
 94      shotDistribution: {},
 95    }
 96  }
 97  
 98  /**
 99   * Migrate an older cache to the current schema.
100   * Returns null if the version is unknown or too old to migrate.
101   *
102   * Preserves historical aggregates that would otherwise be lost when
103   * transcript files have already aged out past cleanupPeriodDays.
104   * Pre-migration days may undercount (e.g. v2 lacked subagent tokens);
105   * we accept that rather than drop the history.
106   */
107  function migrateStatsCache(
108    parsed: Partial<PersistedStatsCache> & { version: number },
109  ): PersistedStatsCache | null {
110    if (
111      typeof parsed.version !== 'number' ||
112      parsed.version < MIN_MIGRATABLE_VERSION ||
113      parsed.version > STATS_CACHE_VERSION
114    ) {
115      return null
116    }
117    if (
118      !Array.isArray(parsed.dailyActivity) ||
119      !Array.isArray(parsed.dailyModelTokens) ||
120      typeof parsed.totalSessions !== 'number' ||
121      typeof parsed.totalMessages !== 'number'
122    ) {
123      return null
124    }
125    return {
126      version: STATS_CACHE_VERSION,
127      lastComputedDate: parsed.lastComputedDate ?? null,
128      dailyActivity: parsed.dailyActivity,
129      dailyModelTokens: parsed.dailyModelTokens,
130      modelUsage: parsed.modelUsage ?? {},
131      totalSessions: parsed.totalSessions,
132      totalMessages: parsed.totalMessages,
133      longestSession: parsed.longestSession ?? null,
134      firstSessionDate: parsed.firstSessionDate ?? null,
135      hourCounts: parsed.hourCounts ?? {},
136      totalSpeculationTimeSavedMs: parsed.totalSpeculationTimeSavedMs ?? 0,
137      // Preserve undefined (don't default to {}) so the SHOT_STATS recompute
138      // check in loadStatsCache fires for v1/v2 caches that lacked this field.
139      shotDistribution: parsed.shotDistribution,
140    }
141  }
142  
143  /**
144   * Load the stats cache from disk.
145   * Returns an empty cache if the file doesn't exist or is invalid.
146   */
147  export async function loadStatsCache(): Promise<PersistedStatsCache> {
148    const fs = getFsImplementation()
149    const cachePath = getStatsCachePath()
150  
151    try {
152      const content = await fs.readFile(cachePath, { encoding: 'utf-8' })
153      const parsed = jsonParse(content) as PersistedStatsCache
154  
155      // Validate version
156      if (parsed.version !== STATS_CACHE_VERSION) {
157        const migrated = migrateStatsCache(parsed)
158        if (!migrated) {
159          logForDebugging(
160            `Stats cache version ${parsed.version} not migratable (expected ${STATS_CACHE_VERSION}), returning empty cache`,
161          )
162          return getEmptyCache()
163        }
164        logForDebugging(
165          `Migrated stats cache from v${parsed.version} to v${STATS_CACHE_VERSION}`,
166        )
167        // Persist migration so we don't re-migrate on every load.
168        // aggregateClaudeCodeStats() skips its save when lastComputedDate is
169        // already current, so without this the on-disk file stays at the old
170        // version indefinitely.
171        await saveStatsCache(migrated)
172        if (feature('SHOT_STATS') && !migrated.shotDistribution) {
173          logForDebugging(
174            'Migrated stats cache missing shotDistribution, forcing recomputation',
175          )
176          return getEmptyCache()
177        }
178        return migrated
179      }
180  
181      // Basic validation
182      if (
183        !Array.isArray(parsed.dailyActivity) ||
184        !Array.isArray(parsed.dailyModelTokens) ||
185        typeof parsed.totalSessions !== 'number' ||
186        typeof parsed.totalMessages !== 'number'
187      ) {
188        logForDebugging(
189          'Stats cache has invalid structure, returning empty cache',
190        )
191        return getEmptyCache()
192      }
193  
194      // If SHOT_STATS is enabled but cache doesn't have shotDistribution,
195      // force full recomputation to get historical shot data
196      if (feature('SHOT_STATS') && !parsed.shotDistribution) {
197        logForDebugging(
198          'Stats cache missing shotDistribution, forcing recomputation',
199        )
200        return getEmptyCache()
201      }
202  
203      return parsed
204    } catch (error) {
205      logForDebugging(`Failed to load stats cache: ${errorMessage(error)}`)
206      return getEmptyCache()
207    }
208  }
209  
210  /**
211   * Save the stats cache to disk atomically.
212   * Uses a temp file + rename pattern to prevent corruption.
213   */
214  export async function saveStatsCache(
215    cache: PersistedStatsCache,
216  ): Promise<void> {
217    const fs = getFsImplementation()
218    const cachePath = getStatsCachePath()
219    const tempPath = `${cachePath}.${randomBytes(8).toString('hex')}.tmp`
220  
221    try {
222      // Ensure the directory exists
223      const configDir = getClaudeConfigHomeDir()
224      try {
225        await fs.mkdir(configDir)
226      } catch {
227        // Directory already exists or other error - proceed
228      }
229  
230      // Write to temp file with fsync for atomic write safety
231      const content = jsonStringify(cache, null, 2)
232      const handle = await open(tempPath, 'w', 0o600)
233      try {
234        await handle.writeFile(content, { encoding: 'utf-8' })
235        await handle.sync()
236      } finally {
237        await handle.close()
238      }
239  
240      // Atomic rename
241      await fs.rename(tempPath, cachePath)
242      logForDebugging(
243        `Stats cache saved successfully (lastComputedDate: ${cache.lastComputedDate})`,
244      )
245    } catch (error) {
246      logError(error)
247      // Clean up temp file
248      try {
249        await fs.unlink(tempPath)
250      } catch {
251        // Ignore cleanup errors
252      }
253    }
254  }
255  
256  /**
257   * Merge new stats into an existing cache.
258   * Used when incrementally adding new days to the cache.
259   */
260  export function mergeCacheWithNewStats(
261    existingCache: PersistedStatsCache,
262    newStats: {
263      dailyActivity: DailyActivity[]
264      dailyModelTokens: DailyModelTokens[]
265      modelUsage: { [modelName: string]: ModelUsage }
266      sessionStats: SessionStats[]
267      hourCounts: { [hour: number]: number }
268      totalSpeculationTimeSavedMs: number
269      shotDistribution?: { [shotCount: number]: number }
270    },
271    newLastComputedDate: string,
272  ): PersistedStatsCache {
273    // Merge daily activity - combine by date
274    const dailyActivityMap = new Map<string, DailyActivity>()
275    for (const day of existingCache.dailyActivity) {
276      dailyActivityMap.set(day.date, { ...day })
277    }
278    for (const day of newStats.dailyActivity) {
279      const existing = dailyActivityMap.get(day.date)
280      if (existing) {
281        existing.messageCount += day.messageCount
282        existing.sessionCount += day.sessionCount
283        existing.toolCallCount += day.toolCallCount
284      } else {
285        dailyActivityMap.set(day.date, { ...day })
286      }
287    }
288  
289    // Merge daily model tokens - combine by date
290    const dailyModelTokensMap = new Map<string, { [model: string]: number }>()
291    for (const day of existingCache.dailyModelTokens) {
292      dailyModelTokensMap.set(day.date, { ...day.tokensByModel })
293    }
294    for (const day of newStats.dailyModelTokens) {
295      const existing = dailyModelTokensMap.get(day.date)
296      if (existing) {
297        for (const [model, tokens] of Object.entries(day.tokensByModel)) {
298          existing[model] = (existing[model] || 0) + tokens
299        }
300      } else {
301        dailyModelTokensMap.set(day.date, { ...day.tokensByModel })
302      }
303    }
304  
305    // Merge model usage
306    const modelUsage = { ...existingCache.modelUsage }
307    for (const [model, usage] of Object.entries(newStats.modelUsage)) {
308      if (modelUsage[model]) {
309        modelUsage[model] = {
310          inputTokens: modelUsage[model]!.inputTokens + usage.inputTokens,
311          outputTokens: modelUsage[model]!.outputTokens + usage.outputTokens,
312          cacheReadInputTokens:
313            modelUsage[model]!.cacheReadInputTokens + usage.cacheReadInputTokens,
314          cacheCreationInputTokens:
315            modelUsage[model]!.cacheCreationInputTokens +
316            usage.cacheCreationInputTokens,
317          webSearchRequests:
318            modelUsage[model]!.webSearchRequests + usage.webSearchRequests,
319          costUSD: modelUsage[model]!.costUSD + usage.costUSD,
320          contextWindow: Math.max(
321            modelUsage[model]!.contextWindow,
322            usage.contextWindow,
323          ),
324          maxOutputTokens: Math.max(
325            modelUsage[model]!.maxOutputTokens,
326            usage.maxOutputTokens,
327          ),
328        }
329      } else {
330        modelUsage[model] = { ...usage }
331      }
332    }
333  
334    // Merge hour counts
335    const hourCounts = { ...existingCache.hourCounts }
336    for (const [hour, count] of Object.entries(newStats.hourCounts)) {
337      const hourNum = parseInt(hour, 10)
338      hourCounts[hourNum] = (hourCounts[hourNum] || 0) + count
339    }
340  
341    // Update session aggregates
342    const totalSessions =
343      existingCache.totalSessions + newStats.sessionStats.length
344    const totalMessages =
345      existingCache.totalMessages +
346      newStats.sessionStats.reduce((sum, s) => sum + s.messageCount, 0)
347  
348    // Find longest session (compare existing with new)
349    let longestSession = existingCache.longestSession
350    for (const session of newStats.sessionStats) {
351      if (!longestSession || session.duration > longestSession.duration) {
352        longestSession = session
353      }
354    }
355  
356    // Find first session date
357    let firstSessionDate = existingCache.firstSessionDate
358    for (const session of newStats.sessionStats) {
359      if (!firstSessionDate || session.timestamp < firstSessionDate) {
360        firstSessionDate = session.timestamp
361      }
362    }
363  
364    const result: PersistedStatsCache = {
365      version: STATS_CACHE_VERSION,
366      lastComputedDate: newLastComputedDate,
367      dailyActivity: Array.from(dailyActivityMap.values()).sort((a, b) =>
368        a.date.localeCompare(b.date),
369      ),
370      dailyModelTokens: Array.from(dailyModelTokensMap.entries())
371        .map(([date, tokensByModel]) => ({ date, tokensByModel }))
372        .sort((a, b) => a.date.localeCompare(b.date)),
373      modelUsage,
374      totalSessions,
375      totalMessages,
376      longestSession,
377      firstSessionDate,
378      hourCounts,
379      totalSpeculationTimeSavedMs:
380        existingCache.totalSpeculationTimeSavedMs +
381        newStats.totalSpeculationTimeSavedMs,
382    }
383  
384    if (feature('SHOT_STATS')) {
385      const shotDistribution: { [shotCount: number]: number } = {
386        ...(existingCache.shotDistribution || {}),
387      }
388      for (const [count, sessions] of Object.entries(
389        newStats.shotDistribution || {},
390      )) {
391        const key = parseInt(count, 10)
392        shotDistribution[key] = (shotDistribution[key] || 0) + sessions
393      }
394      result.shotDistribution = shotDistribution
395    }
396  
397    return result
398  }
399  
400  /**
401   * Extract the date portion (YYYY-MM-DD) from a Date object.
402   */
403  export function toDateString(date: Date): string {
404    const parts = date.toISOString().split('T')
405    const dateStr = parts[0]
406    if (!dateStr) {
407      throw new Error('Invalid ISO date string')
408    }
409    return dateStr
410  }
411  
412  /**
413   * Get today's date in YYYY-MM-DD format.
414   */
415  export function getTodayDateString(): string {
416    return toDateString(new Date())
417  }
418  
419  /**
420   * Get yesterday's date in YYYY-MM-DD format.
421   */
422  export function getYesterdayDateString(): string {
423    const yesterday = new Date()
424    yesterday.setDate(yesterday.getDate() - 1)
425    return toDateString(yesterday)
426  }
427  
428  /**
429   * Check if a date string is before another date string.
430   * Both should be in YYYY-MM-DD format.
431   */
432  export function isDateBefore(date1: string, date2: string): boolean {
433    return date1 < date2
434  }