/ utils / queryProfiler.ts
queryProfiler.ts
  1  /**
  2   * Query profiling utility for measuring and reporting time spent in the query
  3   * pipeline from user input to first token arrival. Enable by setting CLAUDE_CODE_PROFILE_QUERY=1
  4   *
  5   * Uses Node.js built-in performance hooks API for standard timing measurement.
  6   * Tracks each query session with detailed checkpoints for identifying bottlenecks.
  7   *
  8   * Checkpoints tracked (in order):
  9   * - query_user_input_received: Start of profiling
 10   * - query_context_loading_start/end: Loading system prompts and contexts
 11   * - query_query_start: Entry to query call from REPL
 12   * - query_fn_entry: Entry to query() function
 13   * - query_microcompact_start/end: Microcompaction of messages
 14   * - query_autocompact_start/end: Autocompaction check
 15   * - query_setup_start/end: StreamingToolExecutor and model setup
 16   * - query_api_loop_start: Start of API retry loop
 17   * - query_api_streaming_start: Start of streaming API call
 18   * - query_tool_schema_build_start/end: Building tool schemas
 19   * - query_message_normalization_start/end: Normalizing messages
 20   * - query_client_creation_start/end: Creating Anthropic client
 21   * - query_api_request_sent: HTTP request dispatched (before await, inside retry body)
 22   * - query_response_headers_received: .withResponse() resolved (headers arrived)
 23   * - query_first_chunk_received: First streaming chunk received (TTFT)
 24   * - query_api_streaming_end: Streaming complete
 25   * - query_tool_execution_start/end: Tool execution
 26   * - query_recursive_call: Before recursive query call
 27   * - query_end: End of query
 28   */
 29  
 30  import { logForDebugging } from './debug.js'
 31  import { isEnvTruthy } from './envUtils.js'
 32  import { formatMs, formatTimelineLine, getPerformance } from './profilerBase.js'
 33  
 34  // Module-level state - initialized once when the module loads
 35  // eslint-disable-next-line custom-rules/no-process-env-top-level
 36  const ENABLED = isEnvTruthy(process.env.CLAUDE_CODE_PROFILE_QUERY)
 37  
 38  // Track memory snapshots separately (perf_hooks doesn't track memory)
 39  const memorySnapshots = new Map<string, NodeJS.MemoryUsage>()
 40  
 41  // Track query count for reporting
 42  let queryCount = 0
 43  
 44  // Track first token received time separately for summary
 45  let firstTokenTime: number | null = null
 46  
 47  /**
 48   * Start profiling a new query session
 49   */
 50  export function startQueryProfile(): void {
 51    if (!ENABLED) return
 52  
 53    const perf = getPerformance()
 54  
 55    // Clear previous marks and memory snapshots
 56    perf.clearMarks()
 57    memorySnapshots.clear()
 58    firstTokenTime = null
 59  
 60    queryCount++
 61  
 62    // Record the start checkpoint
 63    queryCheckpoint('query_user_input_received')
 64  }
 65  
 66  /**
 67   * Record a checkpoint with the given name
 68   */
 69  export function queryCheckpoint(name: string): void {
 70    if (!ENABLED) return
 71  
 72    const perf = getPerformance()
 73    perf.mark(name)
 74    memorySnapshots.set(name, process.memoryUsage())
 75  
 76    // Track first token specially
 77    if (name === 'query_first_chunk_received' && firstTokenTime === null) {
 78      const marks = perf.getEntriesByType('mark')
 79      if (marks.length > 0) {
 80        const lastMark = marks[marks.length - 1]
 81        firstTokenTime = lastMark?.startTime ?? 0
 82      }
 83    }
 84  }
 85  
 86  /**
 87   * End the current query profiling session
 88   */
 89  export function endQueryProfile(): void {
 90    if (!ENABLED) return
 91  
 92    queryCheckpoint('query_profile_end')
 93  }
 94  
 95  /**
 96   * Identify slow operations (> 100ms delta)
 97   */
 98  function getSlowWarning(deltaMs: number, name: string): string {
 99    // Don't flag the first checkpoint as slow - it measures time from process start,
100    // not actual processing overhead
101    if (name === 'query_user_input_received') {
102      return ''
103    }
104  
105    if (deltaMs > 1000) {
106      return ` ⚠️  VERY SLOW`
107    }
108    if (deltaMs > 100) {
109      return ` ⚠️  SLOW`
110    }
111  
112    // Specific warnings for known bottlenecks
113    if (name.includes('git_status') && deltaMs > 50) {
114      return ' ⚠️  git status'
115    }
116    if (name.includes('tool_schema') && deltaMs > 50) {
117      return ' ⚠️  tool schemas'
118    }
119    if (name.includes('client_creation') && deltaMs > 50) {
120      return ' ⚠️  client creation'
121    }
122  
123    return ''
124  }
125  
126  /**
127   * Get a formatted report of all checkpoints for the current/last query
128   */
129  function getQueryProfileReport(): string {
130    if (!ENABLED) {
131      return 'Query profiling not enabled (set CLAUDE_CODE_PROFILE_QUERY=1)'
132    }
133  
134    const perf = getPerformance()
135    const marks = perf.getEntriesByType('mark')
136    if (marks.length === 0) {
137      return 'No query profiling checkpoints recorded'
138    }
139  
140    const lines: string[] = []
141    lines.push('='.repeat(80))
142    lines.push(`QUERY PROFILING REPORT - Query #${queryCount}`)
143    lines.push('='.repeat(80))
144    lines.push('')
145  
146    // Use first mark as baseline (query start time) to show relative times
147    const baselineTime = marks[0]?.startTime ?? 0
148    let prevTime = baselineTime
149    let apiRequestSentTime = 0
150    let firstChunkTime = 0
151  
152    for (const mark of marks) {
153      const relativeTime = mark.startTime - baselineTime
154      const deltaMs = mark.startTime - prevTime
155      lines.push(
156        formatTimelineLine(
157          relativeTime,
158          deltaMs,
159          mark.name,
160          memorySnapshots.get(mark.name),
161          10,
162          9,
163          getSlowWarning(deltaMs, mark.name),
164        ),
165      )
166  
167      // Track key milestones for summary (use relative times)
168      if (mark.name === 'query_api_request_sent') {
169        apiRequestSentTime = relativeTime
170      }
171      if (mark.name === 'query_first_chunk_received') {
172        firstChunkTime = relativeTime
173      }
174  
175      prevTime = mark.startTime
176    }
177  
178    // Calculate summary statistics (relative to baseline)
179    const lastMark = marks[marks.length - 1]
180    const totalTime = lastMark ? lastMark.startTime - baselineTime : 0
181  
182    lines.push('')
183    lines.push('-'.repeat(80))
184  
185    if (firstChunkTime > 0) {
186      const preRequestOverhead = apiRequestSentTime
187      const networkLatency = firstChunkTime - apiRequestSentTime
188      const preRequestPercent = (
189        (preRequestOverhead / firstChunkTime) *
190        100
191      ).toFixed(1)
192      const networkPercent = ((networkLatency / firstChunkTime) * 100).toFixed(1)
193  
194      lines.push(`Total TTFT: ${formatMs(firstChunkTime)}ms`)
195      lines.push(
196        `  - Pre-request overhead: ${formatMs(preRequestOverhead)}ms (${preRequestPercent}%)`,
197      )
198      lines.push(
199        `  - Network latency: ${formatMs(networkLatency)}ms (${networkPercent}%)`,
200      )
201    } else {
202      lines.push(`Total time: ${formatMs(totalTime)}ms`)
203    }
204  
205    // Add phase summary
206    lines.push(getPhaseSummary(marks, baselineTime))
207  
208    lines.push('='.repeat(80))
209  
210    return lines.join('\n')
211  }
212  
213  /**
214   * Get phase-based summary showing time spent in each major phase
215   */
216  function getPhaseSummary(
217    marks: Array<{ name: string; startTime: number }>,
218    baselineTime: number,
219  ): string {
220    const phases: Array<{ name: string; start: string; end: string }> = [
221      {
222        name: 'Context loading',
223        start: 'query_context_loading_start',
224        end: 'query_context_loading_end',
225      },
226      {
227        name: 'Microcompact',
228        start: 'query_microcompact_start',
229        end: 'query_microcompact_end',
230      },
231      {
232        name: 'Autocompact',
233        start: 'query_autocompact_start',
234        end: 'query_autocompact_end',
235      },
236      { name: 'Query setup', start: 'query_setup_start', end: 'query_setup_end' },
237      {
238        name: 'Tool schemas',
239        start: 'query_tool_schema_build_start',
240        end: 'query_tool_schema_build_end',
241      },
242      {
243        name: 'Message normalization',
244        start: 'query_message_normalization_start',
245        end: 'query_message_normalization_end',
246      },
247      {
248        name: 'Client creation',
249        start: 'query_client_creation_start',
250        end: 'query_client_creation_end',
251      },
252      {
253        name: 'Network TTFB',
254        start: 'query_api_request_sent',
255        end: 'query_first_chunk_received',
256      },
257      {
258        name: 'Tool execution',
259        start: 'query_tool_execution_start',
260        end: 'query_tool_execution_end',
261      },
262    ]
263  
264    const markMap = new Map(marks.map(m => [m.name, m.startTime - baselineTime]))
265  
266    const lines: string[] = []
267    lines.push('')
268    lines.push('PHASE BREAKDOWN:')
269  
270    for (const phase of phases) {
271      const startTime = markMap.get(phase.start)
272      const endTime = markMap.get(phase.end)
273  
274      if (startTime !== undefined && endTime !== undefined) {
275        const duration = endTime - startTime
276        const bar = '█'.repeat(Math.min(Math.ceil(duration / 10), 50)) // 1 block per 10ms, max 50
277        lines.push(
278          `  ${phase.name.padEnd(22)} ${formatMs(duration).padStart(10)}ms ${bar}`,
279        )
280      }
281    }
282  
283    // Calculate pre-API overhead (everything before api_request_sent)
284    const apiRequestSent = markMap.get('query_api_request_sent')
285    if (apiRequestSent !== undefined) {
286      lines.push('')
287      lines.push(
288        `  ${'Total pre-API overhead'.padEnd(22)} ${formatMs(apiRequestSent).padStart(10)}ms`,
289      )
290    }
291  
292    return lines.join('\n')
293  }
294  
295  /**
296   * Log the query profile report to debug output
297   */
298  export function logQueryProfileReport(): void {
299    if (!ENABLED) return
300    logForDebugging(getQueryProfileReport())
301  }