/ utils / stats.ts
stats.ts
   1  import { feature } from 'bun:bundle'
   2  import { open } from 'fs/promises'
   3  import { basename, dirname, join, sep } from 'path'
   4  import type { ModelUsage } from 'src/entrypoints/agentSdkTypes.js'
   5  import type { Entry, TranscriptMessage } from '../types/logs.js'
   6  import { logForDebugging } from './debug.js'
   7  import { errorMessage, isENOENT } from './errors.js'
   8  import { getFsImplementation } from './fsOperations.js'
   9  import { readJSONLFile } from './json.js'
  10  import { SYNTHETIC_MODEL } from './messages.js'
  11  import { getProjectsDir, isTranscriptMessage } from './sessionStorage.js'
  12  import { SHELL_TOOL_NAMES } from './shell/shellToolUtils.js'
  13  import { jsonParse } from './slowOperations.js'
  14  import {
  15    getTodayDateString,
  16    getYesterdayDateString,
  17    isDateBefore,
  18    loadStatsCache,
  19    mergeCacheWithNewStats,
  20    type PersistedStatsCache,
  21    saveStatsCache,
  22    toDateString,
  23    withStatsCacheLock,
  24  } from './statsCache.js'
  25  
  26  export type DailyActivity = {
  27    date: string // YYYY-MM-DD format
  28    messageCount: number
  29    sessionCount: number
  30    toolCallCount: number
  31  }
  32  
  33  export type DailyModelTokens = {
  34    date: string // YYYY-MM-DD format
  35    tokensByModel: { [modelName: string]: number } // total tokens (input + output) per model
  36  }
  37  
  38  export type StreakInfo = {
  39    currentStreak: number
  40    longestStreak: number
  41    currentStreakStart: string | null
  42    longestStreakStart: string | null
  43    longestStreakEnd: string | null
  44  }
  45  
  46  export type SessionStats = {
  47    sessionId: string
  48    duration: number // in milliseconds
  49    messageCount: number
  50    timestamp: string
  51  }
  52  
  53  export type ClaudeCodeStats = {
  54    // Activity overview
  55    totalSessions: number
  56    totalMessages: number
  57    totalDays: number
  58    activeDays: number
  59  
  60    // Streaks
  61    streaks: StreakInfo
  62  
  63    // Daily activity for heatmap
  64    dailyActivity: DailyActivity[]
  65  
  66    // Daily token usage per model for charts
  67    dailyModelTokens: DailyModelTokens[]
  68  
  69    // Session info
  70    longestSession: SessionStats | null
  71  
  72    // Model usage aggregated
  73    modelUsage: { [modelName: string]: ModelUsage }
  74  
  75    // Time stats
  76    firstSessionDate: string | null
  77    lastSessionDate: string | null
  78    peakActivityDay: string | null
  79    peakActivityHour: number | null
  80  
  81    // Speculation time saved
  82    totalSpeculationTimeSavedMs: number
  83  
  84    // Shot stats (ant-only, gated by SHOT_STATS feature flag)
  85    shotDistribution?: { [shotCount: number]: number }
  86    oneShotRate?: number
  87  }
  88  
  89  /**
  90   * Result of processing session files - intermediate stats that can be merged.
  91   */
  92  type ProcessedStats = {
  93    dailyActivity: DailyActivity[]
  94    dailyModelTokens: DailyModelTokens[]
  95    modelUsage: { [modelName: string]: ModelUsage }
  96    sessionStats: SessionStats[]
  97    hourCounts: { [hour: number]: number }
  98    totalMessages: number
  99    totalSpeculationTimeSavedMs: number
 100    shotDistribution?: { [shotCount: number]: number }
 101  }
 102  
 103  /**
 104   * Options for processing session files.
 105   */
 106  type ProcessOptions = {
 107    // Only include data from dates >= this date (YYYY-MM-DD format)
 108    fromDate?: string
 109    // Only include data from dates <= this date (YYYY-MM-DD format)
 110    toDate?: string
 111  }
 112  
 113  /**
 114   * Process session files and extract stats.
 115   * Can filter by date range.
 116   */
 117  async function processSessionFiles(
 118    sessionFiles: string[],
 119    options: ProcessOptions = {},
 120  ): Promise<ProcessedStats> {
 121    const { fromDate, toDate } = options
 122    const fs = getFsImplementation()
 123  
 124    const dailyActivityMap = new Map<string, DailyActivity>()
 125    const dailyModelTokensMap = new Map<string, { [modelName: string]: number }>()
 126    const sessions: SessionStats[] = []
 127    const hourCounts = new Map<number, number>()
 128    let totalMessages = 0
 129    let totalSpeculationTimeSavedMs = 0
 130    const modelUsageAgg: { [modelName: string]: ModelUsage } = {}
 131    const shotDistributionMap = feature('SHOT_STATS')
 132      ? new Map<number, number>()
 133      : undefined
 134    // Track parent sessions that already recorded a shot count (dedup across subagents)
 135    const sessionsWithShotCount = new Set<string>()
 136  
 137    // Process session files in parallel batches for better performance
 138    const BATCH_SIZE = 20
 139    for (let i = 0; i < sessionFiles.length; i += BATCH_SIZE) {
 140      const batch = sessionFiles.slice(i, i + BATCH_SIZE)
 141      const results = await Promise.all(
 142        batch.map(async sessionFile => {
 143          try {
 144            // If we have a fromDate filter, skip files that haven't been modified since then
 145            if (fromDate) {
 146              let fileSize = 0
 147              try {
 148                const fileStat = await fs.stat(sessionFile)
 149                const fileModifiedDate = toDateString(fileStat.mtime)
 150                if (isDateBefore(fileModifiedDate, fromDate)) {
 151                  return {
 152                    sessionFile,
 153                    entries: null,
 154                    error: null,
 155                    skipped: true,
 156                  }
 157                }
 158                fileSize = fileStat.size
 159              } catch {
 160                // If we can't stat the file, try to read it anyway
 161              }
 162              // For large files, peek at the session start date before reading everything.
 163              // Sessions that pass the mtime filter but started before fromDate are skipped
 164              // (e.g. a month-old session resumed today gets a new mtime write but old start date).
 165              if (fileSize > 65536) {
 166                const startDate = await readSessionStartDate(sessionFile)
 167                if (startDate && isDateBefore(startDate, fromDate)) {
 168                  return {
 169                    sessionFile,
 170                    entries: null,
 171                    error: null,
 172                    skipped: true,
 173                  }
 174                }
 175              }
 176            }
 177            const entries = await readJSONLFile<Entry>(sessionFile)
 178            return { sessionFile, entries, error: null, skipped: false }
 179          } catch (error) {
 180            return { sessionFile, entries: null, error, skipped: false }
 181          }
 182        }),
 183      )
 184  
 185      for (const { sessionFile, entries, error, skipped } of results) {
 186        if (skipped) continue
 187        if (error || !entries) {
 188          logForDebugging(
 189            `Failed to read session file ${sessionFile}: ${errorMessage(error)}`,
 190          )
 191          continue
 192        }
 193  
 194        const sessionId = basename(sessionFile, '.jsonl')
 195        const messages: TranscriptMessage[] = []
 196  
 197        for (const entry of entries) {
 198          if (isTranscriptMessage(entry)) {
 199            messages.push(entry)
 200          } else if (entry.type === 'speculation-accept') {
 201            totalSpeculationTimeSavedMs += entry.timeSavedMs
 202          }
 203        }
 204  
 205        if (messages.length === 0) continue
 206  
 207        // Subagent transcripts mark all messages as sidechain. We still want
 208        // their token usage counted, but not as separate sessions.
 209        const isSubagentFile = sessionFile.includes(`${sep}subagents${sep}`)
 210  
 211        // Extract shot count from PR attribution in gh pr create calls (ant-only)
 212        // This must run before the sidechain filter since subagent transcripts
 213        // mark all messages as sidechain
 214        if (feature('SHOT_STATS') && shotDistributionMap) {
 215          const parentSessionId = isSubagentFile
 216            ? basename(dirname(dirname(sessionFile)))
 217            : sessionId
 218  
 219          if (!sessionsWithShotCount.has(parentSessionId)) {
 220            const shotCount = extractShotCountFromMessages(messages)
 221            if (shotCount !== null) {
 222              sessionsWithShotCount.add(parentSessionId)
 223              shotDistributionMap.set(
 224                shotCount,
 225                (shotDistributionMap.get(shotCount) || 0) + 1,
 226              )
 227            }
 228          }
 229        }
 230  
 231        // Filter out sidechain messages for session metadata (duration, counts).
 232        // For subagent files, use all messages since they're all sidechain.
 233        const mainMessages = isSubagentFile
 234          ? messages
 235          : messages.filter(m => !m.isSidechain)
 236        if (mainMessages.length === 0) continue
 237  
 238        const firstMessage = mainMessages[0]!
 239        const lastMessage = mainMessages.at(-1)!
 240  
 241        const firstTimestamp = new Date(firstMessage.timestamp)
 242        const lastTimestamp = new Date(lastMessage.timestamp)
 243  
 244        // Skip sessions with malformed timestamps — some transcripts on disk
 245        // have entries missing the timestamp field (e.g. partial/remote writes).
 246        // new Date(undefined) produces an Invalid Date, and toDateString() would
 247        // throw RangeError: Invalid Date on .toISOString().
 248        if (isNaN(firstTimestamp.getTime()) || isNaN(lastTimestamp.getTime())) {
 249          logForDebugging(
 250            `Skipping session with invalid timestamp: ${sessionFile}`,
 251          )
 252          continue
 253        }
 254  
 255        const dateKey = toDateString(firstTimestamp)
 256  
 257        // Apply date filters
 258        if (fromDate && isDateBefore(dateKey, fromDate)) continue
 259        if (toDate && isDateBefore(toDate, dateKey)) continue
 260  
 261        // Track daily activity (use first message date as session date)
 262        const existing = dailyActivityMap.get(dateKey) || {
 263          date: dateKey,
 264          messageCount: 0,
 265          sessionCount: 0,
 266          toolCallCount: 0,
 267        }
 268  
 269        // Subagent files contribute tokens and tool calls, but aren't sessions.
 270        if (!isSubagentFile) {
 271          const duration = lastTimestamp.getTime() - firstTimestamp.getTime()
 272  
 273          sessions.push({
 274            sessionId,
 275            duration,
 276            messageCount: mainMessages.length,
 277            timestamp: firstMessage.timestamp,
 278          })
 279  
 280          totalMessages += mainMessages.length
 281  
 282          existing.sessionCount++
 283          existing.messageCount += mainMessages.length
 284  
 285          const hour = firstTimestamp.getHours()
 286          hourCounts.set(hour, (hourCounts.get(hour) || 0) + 1)
 287        }
 288  
 289        if (!isSubagentFile || dailyActivityMap.has(dateKey)) {
 290          dailyActivityMap.set(dateKey, existing)
 291        }
 292  
 293        // Process messages for tool usage and model stats
 294        for (const message of mainMessages) {
 295          if (message.type === 'assistant') {
 296            const content = message.message?.content
 297            if (Array.isArray(content)) {
 298              for (const block of content) {
 299                if (block.type === 'tool_use') {
 300                  const activity = dailyActivityMap.get(dateKey)
 301                  if (activity) {
 302                    activity.toolCallCount++
 303                  }
 304                }
 305              }
 306            }
 307  
 308            // Track model usage if available (skip synthetic messages)
 309            if (message.message?.usage) {
 310              const usage = message.message.usage
 311              const model = message.message.model || 'unknown'
 312  
 313              // Skip synthetic messages - they are internal and shouldn't appear in stats
 314              if (model === SYNTHETIC_MODEL) {
 315                continue
 316              }
 317  
 318              if (!modelUsageAgg[model]) {
 319                modelUsageAgg[model] = {
 320                  inputTokens: 0,
 321                  outputTokens: 0,
 322                  cacheReadInputTokens: 0,
 323                  cacheCreationInputTokens: 0,
 324                  webSearchRequests: 0,
 325                  costUSD: 0,
 326                  contextWindow: 0,
 327                  maxOutputTokens: 0,
 328                }
 329              }
 330  
 331              modelUsageAgg[model]!.inputTokens += usage.input_tokens || 0
 332              modelUsageAgg[model]!.outputTokens += usage.output_tokens || 0
 333              modelUsageAgg[model]!.cacheReadInputTokens +=
 334                usage.cache_read_input_tokens || 0
 335              modelUsageAgg[model]!.cacheCreationInputTokens +=
 336                usage.cache_creation_input_tokens || 0
 337  
 338              // Track daily tokens per model
 339              const totalTokens =
 340                (usage.input_tokens || 0) + (usage.output_tokens || 0)
 341              if (totalTokens > 0) {
 342                const dayTokens = dailyModelTokensMap.get(dateKey) || {}
 343                dayTokens[model] = (dayTokens[model] || 0) + totalTokens
 344                dailyModelTokensMap.set(dateKey, dayTokens)
 345              }
 346            }
 347          }
 348        }
 349      }
 350    }
 351  
 352    return {
 353      dailyActivity: Array.from(dailyActivityMap.values()).sort((a, b) =>
 354        a.date.localeCompare(b.date),
 355      ),
 356      dailyModelTokens: Array.from(dailyModelTokensMap.entries())
 357        .map(([date, tokensByModel]) => ({ date, tokensByModel }))
 358        .sort((a, b) => a.date.localeCompare(b.date)),
 359      modelUsage: modelUsageAgg,
 360      sessionStats: sessions,
 361      hourCounts: Object.fromEntries(hourCounts),
 362      totalMessages,
 363      totalSpeculationTimeSavedMs,
 364      ...(feature('SHOT_STATS') && shotDistributionMap
 365        ? { shotDistribution: Object.fromEntries(shotDistributionMap) }
 366        : {}),
 367    }
 368  }
 369  
 370  /**
 371   * Get all session files from all project directories.
 372   * Includes both main session files and subagent transcript files.
 373   */
 374  async function getAllSessionFiles(): Promise<string[]> {
 375    const projectsDir = getProjectsDir()
 376    const fs = getFsImplementation()
 377  
 378    // Get all project directories
 379    let allEntries
 380    try {
 381      allEntries = await fs.readdir(projectsDir)
 382    } catch (e) {
 383      if (isENOENT(e)) return []
 384      throw e
 385    }
 386    const projectDirs = allEntries
 387      .filter(dirent => dirent.isDirectory())
 388      .map(dirent => join(projectsDir, dirent.name))
 389  
 390    // Collect all session files from all projects in parallel
 391    const projectResults = await Promise.all(
 392      projectDirs.map(async projectDir => {
 393        try {
 394          const entries = await fs.readdir(projectDir)
 395  
 396          // Collect main session files (*.jsonl directly in project dir)
 397          const mainFiles = entries
 398            .filter(dirent => dirent.isFile() && dirent.name.endsWith('.jsonl'))
 399            .map(dirent => join(projectDir, dirent.name))
 400  
 401          // Collect subagent files from session subdirectories in parallel
 402          // Structure: {projectDir}/{sessionId}/subagents/agent-{agentId}.jsonl
 403          const sessionDirs = entries.filter(dirent => dirent.isDirectory())
 404          const subagentResults = await Promise.all(
 405            sessionDirs.map(async sessionDir => {
 406              const subagentsDir = join(projectDir, sessionDir.name, 'subagents')
 407              try {
 408                const subagentEntries = await fs.readdir(subagentsDir)
 409                return subagentEntries
 410                  .filter(
 411                    dirent =>
 412                      dirent.isFile() &&
 413                      dirent.name.endsWith('.jsonl') &&
 414                      dirent.name.startsWith('agent-'),
 415                  )
 416                  .map(dirent => join(subagentsDir, dirent.name))
 417              } catch {
 418                // subagents directory doesn't exist for this session, skip
 419                return []
 420              }
 421            }),
 422          )
 423  
 424          return [...mainFiles, ...subagentResults.flat()]
 425        } catch (error) {
 426          logForDebugging(
 427            `Failed to read project directory ${projectDir}: ${errorMessage(error)}`,
 428          )
 429          return []
 430        }
 431      }),
 432    )
 433  
 434    return projectResults.flat()
 435  }
 436  
 437  /**
 438   * Convert a PersistedStatsCache to ClaudeCodeStats by computing derived fields.
 439   */
 440  function cacheToStats(
 441    cache: PersistedStatsCache,
 442    todayStats: ProcessedStats | null,
 443  ): ClaudeCodeStats {
 444    // Merge cache with today's stats
 445    const dailyActivityMap = new Map<string, DailyActivity>()
 446    for (const day of cache.dailyActivity) {
 447      dailyActivityMap.set(day.date, { ...day })
 448    }
 449    if (todayStats) {
 450      for (const day of todayStats.dailyActivity) {
 451        const existing = dailyActivityMap.get(day.date)
 452        if (existing) {
 453          existing.messageCount += day.messageCount
 454          existing.sessionCount += day.sessionCount
 455          existing.toolCallCount += day.toolCallCount
 456        } else {
 457          dailyActivityMap.set(day.date, { ...day })
 458        }
 459      }
 460    }
 461  
 462    const dailyModelTokensMap = new Map<string, { [model: string]: number }>()
 463    for (const day of cache.dailyModelTokens) {
 464      dailyModelTokensMap.set(day.date, { ...day.tokensByModel })
 465    }
 466    if (todayStats) {
 467      for (const day of todayStats.dailyModelTokens) {
 468        const existing = dailyModelTokensMap.get(day.date)
 469        if (existing) {
 470          for (const [model, tokens] of Object.entries(day.tokensByModel)) {
 471            existing[model] = (existing[model] || 0) + tokens
 472          }
 473        } else {
 474          dailyModelTokensMap.set(day.date, { ...day.tokensByModel })
 475        }
 476      }
 477    }
 478  
 479    // Merge model usage
 480    const modelUsage = { ...cache.modelUsage }
 481    if (todayStats) {
 482      for (const [model, usage] of Object.entries(todayStats.modelUsage)) {
 483        if (modelUsage[model]) {
 484          modelUsage[model] = {
 485            inputTokens: modelUsage[model]!.inputTokens + usage.inputTokens,
 486            outputTokens: modelUsage[model]!.outputTokens + usage.outputTokens,
 487            cacheReadInputTokens:
 488              modelUsage[model]!.cacheReadInputTokens +
 489              usage.cacheReadInputTokens,
 490            cacheCreationInputTokens:
 491              modelUsage[model]!.cacheCreationInputTokens +
 492              usage.cacheCreationInputTokens,
 493            webSearchRequests:
 494              modelUsage[model]!.webSearchRequests + usage.webSearchRequests,
 495            costUSD: modelUsage[model]!.costUSD + usage.costUSD,
 496            contextWindow: Math.max(
 497              modelUsage[model]!.contextWindow,
 498              usage.contextWindow,
 499            ),
 500            maxOutputTokens: Math.max(
 501              modelUsage[model]!.maxOutputTokens,
 502              usage.maxOutputTokens,
 503            ),
 504          }
 505        } else {
 506          modelUsage[model] = { ...usage }
 507        }
 508      }
 509    }
 510  
 511    // Merge hour counts
 512    const hourCountsMap = new Map<number, number>()
 513    for (const [hour, count] of Object.entries(cache.hourCounts)) {
 514      hourCountsMap.set(parseInt(hour, 10), count)
 515    }
 516    if (todayStats) {
 517      for (const [hour, count] of Object.entries(todayStats.hourCounts)) {
 518        const hourNum = parseInt(hour, 10)
 519        hourCountsMap.set(hourNum, (hourCountsMap.get(hourNum) || 0) + count)
 520      }
 521    }
 522  
 523    // Calculate derived stats
 524    const dailyActivityArray = Array.from(dailyActivityMap.values()).sort(
 525      (a, b) => a.date.localeCompare(b.date),
 526    )
 527    const streaks = calculateStreaks(dailyActivityArray)
 528  
 529    const dailyModelTokens = Array.from(dailyModelTokensMap.entries())
 530      .map(([date, tokensByModel]) => ({ date, tokensByModel }))
 531      .sort((a, b) => a.date.localeCompare(b.date))
 532  
 533    // Compute session aggregates: combine cache aggregates with today's stats
 534    const totalSessions =
 535      cache.totalSessions + (todayStats?.sessionStats.length || 0)
 536    const totalMessages = cache.totalMessages + (todayStats?.totalMessages || 0)
 537  
 538    // Find longest session (compare cache's longest with today's sessions)
 539    let longestSession = cache.longestSession
 540    if (todayStats) {
 541      for (const session of todayStats.sessionStats) {
 542        if (!longestSession || session.duration > longestSession.duration) {
 543          longestSession = session
 544        }
 545      }
 546    }
 547  
 548    // Find first/last session dates
 549    let firstSessionDate = cache.firstSessionDate
 550    let lastSessionDate: string | null = null
 551    if (todayStats) {
 552      for (const session of todayStats.sessionStats) {
 553        if (!firstSessionDate || session.timestamp < firstSessionDate) {
 554          firstSessionDate = session.timestamp
 555        }
 556        if (!lastSessionDate || session.timestamp > lastSessionDate) {
 557          lastSessionDate = session.timestamp
 558        }
 559      }
 560    }
 561    // If no today sessions, derive lastSessionDate from dailyActivity
 562    if (!lastSessionDate && dailyActivityArray.length > 0) {
 563      lastSessionDate = dailyActivityArray.at(-1)!.date
 564    }
 565  
 566    const peakActivityDay =
 567      dailyActivityArray.length > 0
 568        ? dailyActivityArray.reduce((max, d) =>
 569            d.messageCount > max.messageCount ? d : max,
 570          ).date
 571        : null
 572  
 573    const peakActivityHour =
 574      hourCountsMap.size > 0
 575        ? Array.from(hourCountsMap.entries()).reduce((max, [hour, count]) =>
 576            count > max[1] ? [hour, count] : max,
 577          )[0]
 578        : null
 579  
 580    const totalDays =
 581      firstSessionDate && lastSessionDate
 582        ? Math.ceil(
 583            (new Date(lastSessionDate).getTime() -
 584              new Date(firstSessionDate).getTime()) /
 585              (1000 * 60 * 60 * 24),
 586          ) + 1
 587        : 0
 588  
 589    const totalSpeculationTimeSavedMs =
 590      cache.totalSpeculationTimeSavedMs +
 591      (todayStats?.totalSpeculationTimeSavedMs || 0)
 592  
 593    const result: ClaudeCodeStats = {
 594      totalSessions,
 595      totalMessages,
 596      totalDays,
 597      activeDays: dailyActivityMap.size,
 598      streaks,
 599      dailyActivity: dailyActivityArray,
 600      dailyModelTokens,
 601      longestSession,
 602      modelUsage,
 603      firstSessionDate,
 604      lastSessionDate,
 605      peakActivityDay,
 606      peakActivityHour,
 607      totalSpeculationTimeSavedMs,
 608    }
 609  
 610    if (feature('SHOT_STATS')) {
 611      const shotDistribution: { [shotCount: number]: number } = {
 612        ...(cache.shotDistribution || {}),
 613      }
 614      if (todayStats?.shotDistribution) {
 615        for (const [count, sessions] of Object.entries(
 616          todayStats.shotDistribution,
 617        )) {
 618          const key = parseInt(count, 10)
 619          shotDistribution[key] = (shotDistribution[key] || 0) + sessions
 620        }
 621      }
 622      result.shotDistribution = shotDistribution
 623      const totalWithShots = Object.values(shotDistribution).reduce(
 624        (sum, n) => sum + n,
 625        0,
 626      )
 627      result.oneShotRate =
 628        totalWithShots > 0
 629          ? Math.round(((shotDistribution[1] || 0) / totalWithShots) * 100)
 630          : 0
 631    }
 632  
 633    return result
 634  }
 635  
 636  /**
 637   * Aggregates stats from all Claude Code sessions across all projects.
 638   * Uses a disk cache to avoid reprocessing historical data.
 639   */
 640  export async function aggregateClaudeCodeStats(): Promise<ClaudeCodeStats> {
 641    const allSessionFiles = await getAllSessionFiles()
 642  
 643    if (allSessionFiles.length === 0) {
 644      return getEmptyStats()
 645    }
 646  
 647    // Use lock to prevent race conditions with background cache updates
 648    const updatedCache = await withStatsCacheLock(async () => {
 649      // Load the cache
 650      const cache = await loadStatsCache()
 651      const yesterday = getYesterdayDateString()
 652  
 653      // Determine what needs to be processed
 654      // - If no cache: process everything up to yesterday, then today separately
 655      // - If cache exists: process from day after lastComputedDate to yesterday, then today
 656      let result = cache
 657  
 658      if (!cache.lastComputedDate) {
 659        // No cache - process all historical data (everything before today)
 660        logForDebugging('Stats cache empty, processing all historical data')
 661        const historicalStats = await processSessionFiles(allSessionFiles, {
 662          toDate: yesterday,
 663        })
 664  
 665        if (
 666          historicalStats.sessionStats.length > 0 ||
 667          historicalStats.dailyActivity.length > 0
 668        ) {
 669          result = mergeCacheWithNewStats(cache, historicalStats, yesterday)
 670          await saveStatsCache(result)
 671        }
 672      } else if (isDateBefore(cache.lastComputedDate, yesterday)) {
 673        // Cache is stale - process new days
 674        // Process from day after lastComputedDate to yesterday
 675        const nextDay = getNextDay(cache.lastComputedDate)
 676        logForDebugging(
 677          `Stats cache stale (${cache.lastComputedDate}), processing ${nextDay} to ${yesterday}`,
 678        )
 679        const newStats = await processSessionFiles(allSessionFiles, {
 680          fromDate: nextDay,
 681          toDate: yesterday,
 682        })
 683  
 684        if (
 685          newStats.sessionStats.length > 0 ||
 686          newStats.dailyActivity.length > 0
 687        ) {
 688          result = mergeCacheWithNewStats(cache, newStats, yesterday)
 689          await saveStatsCache(result)
 690        } else {
 691          // No new data, but update lastComputedDate
 692          result = { ...cache, lastComputedDate: yesterday }
 693          await saveStatsCache(result)
 694        }
 695      }
 696  
 697      return result
 698    })
 699  
 700    // Always process today's data live (it's incomplete)
 701    // This doesn't need to be in the lock since it doesn't modify the cache
 702    const today = getTodayDateString()
 703    const todayStats = await processSessionFiles(allSessionFiles, {
 704      fromDate: today,
 705      toDate: today,
 706    })
 707  
 708    // Combine cache with today's stats
 709    return cacheToStats(updatedCache, todayStats)
 710  }
 711  
 712  export type StatsDateRange = '7d' | '30d' | 'all'
 713  
 714  /**
 715   * Aggregates stats for a specific date range.
 716   * For 'all', uses the cached aggregation. For other ranges, processes files directly.
 717   */
 718  export async function aggregateClaudeCodeStatsForRange(
 719    range: StatsDateRange,
 720  ): Promise<ClaudeCodeStats> {
 721    if (range === 'all') {
 722      return aggregateClaudeCodeStats()
 723    }
 724  
 725    const allSessionFiles = await getAllSessionFiles()
 726    if (allSessionFiles.length === 0) {
 727      return getEmptyStats()
 728    }
 729  
 730    // Calculate fromDate based on range
 731    const today = new Date()
 732    const daysBack = range === '7d' ? 7 : 30
 733    const fromDate = new Date(today)
 734    fromDate.setDate(today.getDate() - daysBack + 1) // +1 to include today
 735    const fromDateStr = toDateString(fromDate)
 736  
 737    // Process session files for the date range
 738    const stats = await processSessionFiles(allSessionFiles, {
 739      fromDate: fromDateStr,
 740    })
 741  
 742    return processedStatsToClaudeCodeStats(stats)
 743  }
 744  
 745  /**
 746   * Convert ProcessedStats to ClaudeCodeStats.
 747   * Used for filtered date ranges that bypass the cache.
 748   */
 749  function processedStatsToClaudeCodeStats(
 750    stats: ProcessedStats,
 751  ): ClaudeCodeStats {
 752    const dailyActivitySorted = stats.dailyActivity
 753      .slice()
 754      .sort((a, b) => a.date.localeCompare(b.date))
 755    const dailyModelTokensSorted = stats.dailyModelTokens
 756      .slice()
 757      .sort((a, b) => a.date.localeCompare(b.date))
 758  
 759    // Calculate streaks from daily activity
 760    const streaks = calculateStreaks(dailyActivitySorted)
 761  
 762    // Find longest session
 763    let longestSession: SessionStats | null = null
 764    for (const session of stats.sessionStats) {
 765      if (!longestSession || session.duration > longestSession.duration) {
 766        longestSession = session
 767      }
 768    }
 769  
 770    // Find first/last session dates
 771    let firstSessionDate: string | null = null
 772    let lastSessionDate: string | null = null
 773    for (const session of stats.sessionStats) {
 774      if (!firstSessionDate || session.timestamp < firstSessionDate) {
 775        firstSessionDate = session.timestamp
 776      }
 777      if (!lastSessionDate || session.timestamp > lastSessionDate) {
 778        lastSessionDate = session.timestamp
 779      }
 780    }
 781  
 782    // Peak activity day
 783    const peakActivityDay =
 784      dailyActivitySorted.length > 0
 785        ? dailyActivitySorted.reduce((max, d) =>
 786            d.messageCount > max.messageCount ? d : max,
 787          ).date
 788        : null
 789  
 790    // Peak activity hour
 791    const hourEntries = Object.entries(stats.hourCounts)
 792    const peakActivityHour =
 793      hourEntries.length > 0
 794        ? parseInt(
 795            hourEntries.reduce((max, [hour, count]) =>
 796              count > parseInt(max[1].toString()) ? [hour, count] : max,
 797            )[0],
 798            10,
 799          )
 800        : null
 801  
 802    // Total days in range
 803    const totalDays =
 804      firstSessionDate && lastSessionDate
 805        ? Math.ceil(
 806            (new Date(lastSessionDate).getTime() -
 807              new Date(firstSessionDate).getTime()) /
 808              (1000 * 60 * 60 * 24),
 809          ) + 1
 810        : 0
 811  
 812    const result: ClaudeCodeStats = {
 813      totalSessions: stats.sessionStats.length,
 814      totalMessages: stats.totalMessages,
 815      totalDays,
 816      activeDays: stats.dailyActivity.length,
 817      streaks,
 818      dailyActivity: dailyActivitySorted,
 819      dailyModelTokens: dailyModelTokensSorted,
 820      longestSession,
 821      modelUsage: stats.modelUsage,
 822      firstSessionDate,
 823      lastSessionDate,
 824      peakActivityDay,
 825      peakActivityHour,
 826      totalSpeculationTimeSavedMs: stats.totalSpeculationTimeSavedMs,
 827    }
 828  
 829    if (feature('SHOT_STATS') && stats.shotDistribution) {
 830      result.shotDistribution = stats.shotDistribution
 831      const totalWithShots = Object.values(stats.shotDistribution).reduce(
 832        (sum, n) => sum + n,
 833        0,
 834      )
 835      result.oneShotRate =
 836        totalWithShots > 0
 837          ? Math.round(((stats.shotDistribution[1] || 0) / totalWithShots) * 100)
 838          : 0
 839    }
 840  
 841    return result
 842  }
 843  
 844  /**
 845   * Get the next day after a given date string (YYYY-MM-DD format).
 846   */
 847  function getNextDay(dateStr: string): string {
 848    const date = new Date(dateStr)
 849    date.setDate(date.getDate() + 1)
 850    return toDateString(date)
 851  }
 852  
 853  function calculateStreaks(dailyActivity: DailyActivity[]): StreakInfo {
 854    if (dailyActivity.length === 0) {
 855      return {
 856        currentStreak: 0,
 857        longestStreak: 0,
 858        currentStreakStart: null,
 859        longestStreakStart: null,
 860        longestStreakEnd: null,
 861      }
 862    }
 863  
 864    const today = new Date()
 865    today.setHours(0, 0, 0, 0)
 866  
 867    // Calculate current streak (working backwards from today)
 868    let currentStreak = 0
 869    let currentStreakStart: string | null = null
 870    const checkDate = new Date(today)
 871  
 872    // Build a set of active dates for quick lookup
 873    const activeDates = new Set(dailyActivity.map(d => d.date))
 874  
 875    while (true) {
 876      const dateStr = toDateString(checkDate)
 877      if (!activeDates.has(dateStr)) {
 878        break
 879      }
 880      currentStreak++
 881      currentStreakStart = dateStr
 882      checkDate.setDate(checkDate.getDate() - 1)
 883    }
 884  
 885    // Calculate longest streak
 886    let longestStreak = 0
 887    let longestStreakStart: string | null = null
 888    let longestStreakEnd: string | null = null
 889  
 890    if (dailyActivity.length > 0) {
 891      const sortedDates = Array.from(activeDates).sort()
 892      let tempStreak = 1
 893      let tempStart = sortedDates[0]!
 894  
 895      for (let i = 1; i < sortedDates.length; i++) {
 896        const prevDate = new Date(sortedDates[i - 1]!)
 897        const currDate = new Date(sortedDates[i]!)
 898  
 899        const dayDiff = Math.round(
 900          (currDate.getTime() - prevDate.getTime()) / (1000 * 60 * 60 * 24),
 901        )
 902  
 903        if (dayDiff === 1) {
 904          tempStreak++
 905        } else {
 906          if (tempStreak > longestStreak) {
 907            longestStreak = tempStreak
 908            longestStreakStart = tempStart
 909            longestStreakEnd = sortedDates[i - 1]!
 910          }
 911          tempStreak = 1
 912          tempStart = sortedDates[i]!
 913        }
 914      }
 915  
 916      // Check final streak
 917      if (tempStreak > longestStreak) {
 918        longestStreak = tempStreak
 919        longestStreakStart = tempStart
 920        longestStreakEnd = sortedDates.at(-1)!
 921      }
 922    }
 923  
 924    return {
 925      currentStreak,
 926      longestStreak,
 927      currentStreakStart,
 928      longestStreakStart,
 929      longestStreakEnd,
 930    }
 931  }
 932  
 933  const SHOT_COUNT_REGEX = /(\d+)-shotted by/
 934  
 935  /**
 936   * Extract the shot count from PR attribution text in a `gh pr create` Bash call.
 937   * The attribution format is: "N-shotted by model-name"
 938   * Returns the shot count, or null if not found.
 939   */
 940  function extractShotCountFromMessages(
 941    messages: TranscriptMessage[],
 942  ): number | null {
 943    for (const m of messages) {
 944      if (m.type !== 'assistant') continue
 945      const content = m.message?.content
 946      if (!Array.isArray(content)) continue
 947      for (const block of content) {
 948        if (
 949          block.type !== 'tool_use' ||
 950          !SHELL_TOOL_NAMES.includes(block.name) ||
 951          typeof block.input !== 'object' ||
 952          block.input === null ||
 953          !('command' in block.input) ||
 954          typeof block.input.command !== 'string'
 955        ) {
 956          continue
 957        }
 958        const match = SHOT_COUNT_REGEX.exec(block.input.command)
 959        if (match) {
 960          return parseInt(match[1]!, 10)
 961        }
 962      }
 963    }
 964    return null
 965  }
 966  
 967  // Transcript message types — must match isTranscriptMessage() in sessionStorage.ts.
 968  // The canonical dateKey (see processSessionFiles) reads mainMessages[0].timestamp,
 969  // where mainMessages = entries.filter(isTranscriptMessage).filter(!isSidechain).
 970  // This peek must extract the same value to be a safe skip optimization.
 971  const TRANSCRIPT_MESSAGE_TYPES = new Set([
 972    'user',
 973    'assistant',
 974    'attachment',
 975    'system',
 976    'progress',
 977  ])
 978  
 979  /**
 980   * Peeks at the head of a session file to get the session start date.
 981   * Uses a small 4 KB read to avoid loading the full file.
 982   *
 983   * Session files typically begin with non-transcript entries (`mode`,
 984   * `file-history-snapshot`, `attribution-snapshot`) before the first transcript
 985   * message, so we scan lines until we hit one. Each complete line is JSON-parsed
 986   * — naive string search is unsafe here because `file-history-snapshot` entries
 987   * embed a nested `snapshot.timestamp` carrying the *previous* session's date
 988   * (written by copyFileHistoryForResume), which would cause resumed sessions to
 989   * be miscategorised as old and silently dropped from stats.
 990   *
 991   * Returns a YYYY-MM-DD string, or null if no transcript message fits in the
 992   * head (caller falls through to the full read — safe default).
 993   */
 994  export async function readSessionStartDate(
 995    filePath: string,
 996  ): Promise<string | null> {
 997    try {
 998      const fd = await open(filePath, 'r')
 999      try {
1000        const buf = Buffer.allocUnsafe(4096)
1001        const { bytesRead } = await fd.read(buf, 0, buf.length, 0)
1002        if (bytesRead === 0) return null
1003        const head = buf.toString('utf8', 0, bytesRead)
1004  
1005        // Only trust complete lines — the 4KB boundary may bisect a JSON entry.
1006        const lastNewline = head.lastIndexOf('\n')
1007        if (lastNewline < 0) return null
1008  
1009        for (const line of head.slice(0, lastNewline).split('\n')) {
1010          if (!line) continue
1011          let entry: {
1012            type?: unknown
1013            timestamp?: unknown
1014            isSidechain?: unknown
1015          }
1016          try {
1017            entry = jsonParse(line)
1018          } catch {
1019            continue
1020          }
1021          if (typeof entry.type !== 'string') continue
1022          if (!TRANSCRIPT_MESSAGE_TYPES.has(entry.type)) continue
1023          if (entry.isSidechain === true) continue
1024          if (typeof entry.timestamp !== 'string') return null
1025          const date = new Date(entry.timestamp)
1026          if (Number.isNaN(date.getTime())) return null
1027          return toDateString(date)
1028        }
1029        return null
1030      } finally {
1031        await fd.close()
1032      }
1033    } catch {
1034      return null
1035    }
1036  }
1037  
1038  function getEmptyStats(): ClaudeCodeStats {
1039    return {
1040      totalSessions: 0,
1041      totalMessages: 0,
1042      totalDays: 0,
1043      activeDays: 0,
1044      streaks: {
1045        currentStreak: 0,
1046        longestStreak: 0,
1047        currentStreakStart: null,
1048        longestStreakStart: null,
1049        longestStreakEnd: null,
1050      },
1051      dailyActivity: [],
1052      dailyModelTokens: [],
1053      longestSession: null,
1054      modelUsage: {},
1055      firstSessionDate: null,
1056      lastSessionDate: null,
1057      peakActivityDay: null,
1058      peakActivityHour: null,
1059      totalSpeculationTimeSavedMs: 0,
1060    }
1061  }