/ utils / headlessProfiler.ts
headlessProfiler.ts
  1  /**
  2   * Headless mode profiling utility for measuring per-turn latency in -p (print) mode.
  3   *
  4   * Tracks key timing phases per turn:
  5   * - Time to system message output (turn 0 only)
  6   * - Time to first query started
  7   * - Time to first API response (TTFT)
  8   *
  9   * Uses Node.js built-in performance hooks API for standard timing measurement.
 10   * Sampled logging: 100% of ant users, 5% of external users.
 11   *
 12   * Set CLAUDE_CODE_PROFILE_STARTUP=1 for detailed logging output.
 13   */
 14  
 15  import { getIsNonInteractiveSession } from '../bootstrap/state.js'
 16  import {
 17    type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 18    logEvent,
 19  } from '../services/analytics/index.js'
 20  import { logForDebugging } from './debug.js'
 21  import { isEnvTruthy } from './envUtils.js'
 22  import { getPerformance } from './profilerBase.js'
 23  import { jsonStringify } from './slowOperations.js'
 24  
 25  // Detailed profiling mode - same env var as startupProfiler
 26  // eslint-disable-next-line custom-rules/no-process-env-top-level
 27  const DETAILED_PROFILING = isEnvTruthy(process.env.CLAUDE_CODE_PROFILE_STARTUP)
 28  
 29  // Sampling for Statsig logging: 100% ant, 5% external
 30  // Decision made once at module load - non-sampled users pay no profiling cost
 31  const STATSIG_SAMPLE_RATE = 0.05
 32  // eslint-disable-next-line custom-rules/no-process-env-top-level
 33  const STATSIG_LOGGING_SAMPLED =
 34    process.env.USER_TYPE === 'ant' || Math.random() < STATSIG_SAMPLE_RATE
 35  
 36  // Enable profiling if either detailed mode OR sampled for Statsig
 37  const SHOULD_PROFILE = DETAILED_PROFILING || STATSIG_LOGGING_SAMPLED
 38  
 39  // Use a unique prefix to avoid conflicts with other profiler marks
 40  const MARK_PREFIX = 'headless_'
 41  
 42  // Track current turn number (auto-incremented by headlessProfilerStartTurn)
 43  let currentTurnNumber = -1
 44  
 45  /**
 46   * Clear all headless profiler marks from performance timeline
 47   */
 48  function clearHeadlessMarks(): void {
 49    const perf = getPerformance()
 50    const allMarks = perf.getEntriesByType('mark')
 51    for (const mark of allMarks) {
 52      if (mark.name.startsWith(MARK_PREFIX)) {
 53        perf.clearMarks(mark.name)
 54      }
 55    }
 56  }
 57  
 58  /**
 59   * Start a new turn for profiling. Clears previous marks, increments turn number,
 60   * and records turn_start. Call this at the beginning of each user message processing.
 61   */
 62  export function headlessProfilerStartTurn(): void {
 63    // Only profile in headless/non-interactive mode
 64    if (!getIsNonInteractiveSession()) return
 65    // Only profile if enabled
 66    if (!SHOULD_PROFILE) return
 67  
 68    currentTurnNumber++
 69    clearHeadlessMarks()
 70  
 71    const perf = getPerformance()
 72    perf.mark(`${MARK_PREFIX}turn_start`)
 73  
 74    if (DETAILED_PROFILING) {
 75      logForDebugging(`[headlessProfiler] Started turn ${currentTurnNumber}`)
 76    }
 77  }
 78  
 79  /**
 80   * Record a checkpoint with the given name.
 81   * Only records if in headless mode and profiling is enabled.
 82   */
 83  export function headlessProfilerCheckpoint(name: string): void {
 84    // Only profile in headless/non-interactive mode
 85    if (!getIsNonInteractiveSession()) return
 86    // Only profile if enabled
 87    if (!SHOULD_PROFILE) return
 88  
 89    const perf = getPerformance()
 90    perf.mark(`${MARK_PREFIX}${name}`)
 91  
 92    if (DETAILED_PROFILING) {
 93      logForDebugging(
 94        `[headlessProfiler] Checkpoint: ${name} at ${perf.now().toFixed(1)}ms`,
 95      )
 96    }
 97  }
 98  
 99  /**
100   * Log headless latency metrics for the current turn to Statsig.
101   * Call this at the end of each turn (before processing next user message).
102   */
103  export function logHeadlessProfilerTurn(): void {
104    // Only log in headless mode
105    if (!getIsNonInteractiveSession()) return
106    // Only log if enabled
107    if (!SHOULD_PROFILE) return
108  
109    const perf = getPerformance()
110    const allMarks = perf.getEntriesByType('mark')
111  
112    // Filter to only our headless marks
113    const marks = allMarks.filter(mark => mark.name.startsWith(MARK_PREFIX))
114    if (marks.length === 0) return
115  
116    // Build checkpoint lookup (strip prefix for easier access)
117    const checkpointTimes = new Map<string, number>()
118    for (const mark of marks) {
119      const name = mark.name.slice(MARK_PREFIX.length)
120      checkpointTimes.set(name, mark.startTime)
121    }
122  
123    const turnStart = checkpointTimes.get('turn_start')
124    if (turnStart === undefined) return
125  
126    // Compute phase durations relative to turn_start
127    const metadata: Record<string, number | string | undefined> = {
128      turn_number: currentTurnNumber,
129    }
130  
131    // Time to system message from process start (only meaningful for turn 0)
132    // Use absolute time since perf_hooks startTime is relative to process start
133    const systemMessageTime = checkpointTimes.get('system_message_yielded')
134    if (systemMessageTime !== undefined && currentTurnNumber === 0) {
135      metadata.time_to_system_message_ms = Math.round(systemMessageTime)
136    }
137  
138    // Time to query start
139    const queryStartTime = checkpointTimes.get('query_started')
140    if (queryStartTime !== undefined) {
141      metadata.time_to_query_start_ms = Math.round(queryStartTime - turnStart)
142    }
143  
144    // Time to first response (first chunk from API)
145    const firstChunkTime = checkpointTimes.get('first_chunk')
146    if (firstChunkTime !== undefined) {
147      metadata.time_to_first_response_ms = Math.round(firstChunkTime - turnStart)
148    }
149  
150    // Query overhead (time between query start and API request sent)
151    const apiRequestTime = checkpointTimes.get('api_request_sent')
152    if (queryStartTime !== undefined && apiRequestTime !== undefined) {
153      metadata.query_overhead_ms = Math.round(apiRequestTime - queryStartTime)
154    }
155  
156    // Add checkpoint count for debugging
157    metadata.checkpoint_count = marks.length
158  
159    // Add entrypoint for segmentation (sdk-ts, sdk-py, sdk-cli, or undefined)
160    if (process.env.CLAUDE_CODE_ENTRYPOINT) {
161      metadata.entrypoint = process.env.CLAUDE_CODE_ENTRYPOINT
162    }
163  
164    // Log to Statsig if sampled
165    if (STATSIG_LOGGING_SAMPLED) {
166      logEvent(
167        'tengu_headless_latency',
168        metadata as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
169      )
170    }
171  
172    // Log detailed output if CLAUDE_CODE_PROFILE_STARTUP=1
173    if (DETAILED_PROFILING) {
174      logForDebugging(
175        `[headlessProfiler] Turn ${currentTurnNumber} metrics: ${jsonStringify(metadata)}`,
176      )
177    }
178  }