Cradicle Explorer

/ utils / telemetry / betaSessionTracing.ts
betaSessionTracing.ts
  1  /**
  2   * Beta Session Tracing for Claude Code
  3   *
  4   * This module contains beta tracing features enabled when
  5   * ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT are set.
  6   *
  7   * For external users, tracing is enabled in SDK/headless mode, or in
  8   * interactive mode when the org is allowlisted via the
  9   * tengu_trace_lantern GrowthBook gate.
 10   * For ant users, tracing is enabled in all modes.
 11   *
 12   * Visibility Rules:
 13   * | Content          | External | Ant  |
 14   * |------------------|----------|------|
 15   * | System prompts   | ✅                  | ✅   |
 16   * | Model output     | ✅                  | ✅   |
 17   * | Thinking output  | ❌                  | ✅   |
 18   * | Tools            | ✅                  | ✅   |
 19   * | new_context      | ✅                  | ✅   |
 20   *
 21   * Features:
 22   * - Per-agent message tracking with hash-based deduplication
 23   * - System prompt logging (once per unique hash)
 24   * - Hook execution spans
 25   * - Detailed new_context attributes for LLM requests
 26   */
 27  
 28  import type { Span } from '@opentelemetry/api'
 29  import { createHash } from 'crypto'
 30  import { getIsNonInteractiveSession } from '../../bootstrap/state.js'
 31  import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js'
 32  import { sanitizeToolNameForAnalytics } from '../../services/analytics/metadata.js'
 33  import type { AssistantMessage, UserMessage } from '../../types/message.js'
 34  import { isEnvTruthy } from '../envUtils.js'
 35  import { jsonParse, jsonStringify } from '../slowOperations.js'
 36  import { logOTelEvent } from './events.js'
 37  
 38  // Message type for API calls (UserMessage or AssistantMessage)
 39  type APIMessage = UserMessage | AssistantMessage
 40  
 41  /**
 42   * Track hashes we've already logged this session (system prompts, tools, etc).
 43   *
 44   * WHY: System prompts and tool schemas are large and rarely change within a session.
 45   * Sending full content on every request would be wasteful. Instead, we hash and
 46   * only log the full content once per unique hash.
 47   */
 48  const seenHashes = new Set<string>()
 49  
 50  /**
 51   * Track the last reported message hash per querySource (agent) for incremental context.
 52   *
 53   * WHY: When debugging traces, we want to see what NEW information was added each turn,
 54   * not the entire conversation history (which can be huge). By tracking the last message
 55   * we reported per agent, we can compute and send only the delta (new messages since
 56   * the last request). This is tracked per-agent (querySource) because different agents
 57   * (main thread, subagents, warmup requests) have independent conversation contexts.
 58   */
 59  const lastReportedMessageHash = new Map<string, string>()
 60  
 61  /**
 62   * Clear tracking state after compaction.
 63   * Old hashes are irrelevant once messages have been replaced.
 64   */
 65  export function clearBetaTracingState(): void {
 66    seenHashes.clear()
 67    lastReportedMessageHash.clear()
 68  }
 69  
 70  const MAX_CONTENT_SIZE = 60 * 1024 // 60KB (Honeycomb limit is 64KB, staying safe)
 71  
 72  /**
 73   * Check if beta detailed tracing is enabled.
 74   * - Requires ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT
 75   * - For external users, enabled in SDK/headless mode OR when org is
 76   *   allowlisted via the tengu_trace_lantern GrowthBook gate
 77   */
 78  export function isBetaTracingEnabled(): boolean {
 79    const baseEnabled =
 80      isEnvTruthy(process.env.ENABLE_BETA_TRACING_DETAILED) &&
 81      Boolean(process.env.BETA_TRACING_ENDPOINT)
 82  
 83    if (!baseEnabled) {
 84      return false
 85    }
 86  
 87    // For external users, enable in SDK/headless mode OR when org is allowlisted.
 88    // Gate reads from disk cache, so first run after allowlisting returns false;
 89    // works from second run onward (same behavior as enhanced_telemetry_beta).
 90    if (process.env.USER_TYPE !== 'ant') {
 91      return (
 92        getIsNonInteractiveSession() ||
 93        getFeatureValue_CACHED_MAY_BE_STALE('tengu_trace_lantern', false)
 94      )
 95    }
 96  
 97    return true
 98  }
 99  
100  /**
101   * Truncate content to fit within Honeycomb limits.
102   */
103  export function truncateContent(
104    content: string,
105    maxSize: number = MAX_CONTENT_SIZE,
106  ): { content: string; truncated: boolean } {
107    if (content.length <= maxSize) {
108      return { content, truncated: false }
109    }
110  
111    return {
112      content:
113        content.slice(0, maxSize) +
114        '\n\n[TRUNCATED - Content exceeds 60KB limit]',
115      truncated: true,
116    }
117  }
118  
119  /**
120   * Generate a short hash (first 12 hex chars of SHA-256).
121   */
122  function shortHash(content: string): string {
123    return createHash('sha256').update(content).digest('hex').slice(0, 12)
124  }
125  
126  /**
127   * Generate a hash for a system prompt.
128   */
129  function hashSystemPrompt(systemPrompt: string): string {
130    return `sp_${shortHash(systemPrompt)}`
131  }
132  
133  /**
134   * Generate a hash for a message based on its content.
135   */
136  function hashMessage(message: APIMessage): string {
137    const content = jsonStringify(message.message.content)
138    return `msg_${shortHash(content)}`
139  }
140  
141  // Regex to detect content wrapped in <system-reminder> tags
142  const SYSTEM_REMINDER_REGEX =
143    /^<system-reminder>\n?([\s\S]*?)\n?<\/system-reminder>$/
144  
145  /**
146   * Check if text is entirely a system reminder (wrapped in <system-reminder> tags).
147   * Returns the inner content if it is, null otherwise.
148   */
149  function extractSystemReminderContent(text: string): string | null {
150    const match = text.trim().match(SYSTEM_REMINDER_REGEX)
151    return match && match[1] ? match[1].trim() : null
152  }
153  
154  /**
155   * Result of formatting messages - separates regular content from system reminders.
156   */
157  interface FormattedMessages {
158    contextParts: string[]
159    systemReminders: string[]
160  }
161  
162  /**
163   * Format user messages for new_context display, separating system reminders.
164   * Only handles user messages (assistant messages are filtered out before this is called).
165   */
166  function formatMessagesForContext(messages: UserMessage[]): FormattedMessages {
167    const contextParts: string[] = []
168    const systemReminders: string[] = []
169  
170    for (const message of messages) {
171      const content = message.message.content
172      if (typeof content === 'string') {
173        const reminderContent = extractSystemReminderContent(content)
174        if (reminderContent) {
175          systemReminders.push(reminderContent)
176        } else {
177          contextParts.push(`[USER]\n${content}`)
178        }
179      } else if (Array.isArray(content)) {
180        for (const block of content) {
181          if (block.type === 'text') {
182            const reminderContent = extractSystemReminderContent(block.text)
183            if (reminderContent) {
184              systemReminders.push(reminderContent)
185            } else {
186              contextParts.push(`[USER]\n${block.text}`)
187            }
188          } else if (block.type === 'tool_result') {
189            const resultContent =
190              typeof block.content === 'string'
191                ? block.content
192                : jsonStringify(block.content)
193            // Tool results can also contain system reminders (e.g., malware warning)
194            const reminderContent = extractSystemReminderContent(resultContent)
195            if (reminderContent) {
196              systemReminders.push(reminderContent)
197            } else {
198              contextParts.push(
199                `[TOOL RESULT: ${block.tool_use_id}]\n${resultContent}`,
200              )
201            }
202          }
203        }
204      }
205    }
206  
207    return { contextParts, systemReminders }
208  }
209  
210  export interface LLMRequestNewContext {
211    /** System prompt (typically only on first request or if changed) */
212    systemPrompt?: string
213    /** Query source identifying the agent/purpose (e.g., 'repl_main_thread', 'agent:builtin') */
214    querySource?: string
215    /** Tool schemas sent with the request */
216    tools?: string
217  }
218  
219  /**
220   * Add beta attributes to an interaction span.
221   * Adds new_context with the user prompt.
222   */
223  export function addBetaInteractionAttributes(
224    span: Span,
225    userPrompt: string,
226  ): void {
227    if (!isBetaTracingEnabled()) {
228      return
229    }
230  
231    const { content: truncatedPrompt, truncated } = truncateContent(
232      `[USER PROMPT]\n${userPrompt}`,
233    )
234    span.setAttributes({
235      new_context: truncatedPrompt,
236      ...(truncated && {
237        new_context_truncated: true,
238        new_context_original_length: userPrompt.length,
239      }),
240    })
241  }
242  
243  /**
244   * Add beta attributes to an LLM request span.
245   * Handles system prompt logging and new_context computation.
246   */
247  export function addBetaLLMRequestAttributes(
248    span: Span,
249    newContext?: LLMRequestNewContext,
250    messagesForAPI?: APIMessage[],
251  ): void {
252    if (!isBetaTracingEnabled()) {
253      return
254    }
255  
256    // Add system prompt info to the span
257    if (newContext?.systemPrompt) {
258      const promptHash = hashSystemPrompt(newContext.systemPrompt)
259      const preview = newContext.systemPrompt.slice(0, 500)
260  
261      // Always add hash, preview, and length to the span
262      span.setAttribute('system_prompt_hash', promptHash)
263      span.setAttribute('system_prompt_preview', preview)
264      span.setAttribute('system_prompt_length', newContext.systemPrompt.length)
265  
266      // Log the full system prompt only once per unique hash this session
267      if (!seenHashes.has(promptHash)) {
268        seenHashes.add(promptHash)
269  
270        // Truncate for the log if needed
271        const { content: truncatedPrompt, truncated } = truncateContent(
272          newContext.systemPrompt,
273        )
274  
275        void logOTelEvent('system_prompt', {
276          system_prompt_hash: promptHash,
277          system_prompt: truncatedPrompt,
278          system_prompt_length: String(newContext.systemPrompt.length),
279          ...(truncated && { system_prompt_truncated: 'true' }),
280        })
281      }
282    }
283  
284    // Add tools info to the span
285    if (newContext?.tools) {
286      try {
287        const toolsArray = jsonParse(newContext.tools) as Record<
288          string,
289          unknown
290        >[]
291  
292        // Build array of {name, hash} for each tool
293        const toolsWithHashes = toolsArray.map(tool => {
294          const toolJson = jsonStringify(tool)
295          const toolHash = shortHash(toolJson)
296          return {
297            name: typeof tool.name === 'string' ? tool.name : 'unknown',
298            hash: toolHash,
299            json: toolJson,
300          }
301        })
302  
303        // Set span attribute with array of name/hash pairs
304        span.setAttribute(
305          'tools',
306          jsonStringify(
307            toolsWithHashes.map(({ name, hash }) => ({ name, hash })),
308          ),
309        )
310        span.setAttribute('tools_count', toolsWithHashes.length)
311  
312        // Log each tool's full description once per unique hash
313        for (const { name, hash, json } of toolsWithHashes) {
314          if (!seenHashes.has(`tool_${hash}`)) {
315            seenHashes.add(`tool_${hash}`)
316  
317            const { content: truncatedTool, truncated } = truncateContent(json)
318  
319            void logOTelEvent('tool', {
320              tool_name: sanitizeToolNameForAnalytics(name),
321              tool_hash: hash,
322              tool: truncatedTool,
323              ...(truncated && { tool_truncated: 'true' }),
324            })
325          }
326        }
327      } catch {
328        // If parsing fails, log the raw tools string
329        span.setAttribute('tools_parse_error', true)
330      }
331    }
332  
333    // Add new_context using hash-based tracking (visible to all users)
334    if (messagesForAPI && messagesForAPI.length > 0 && newContext?.querySource) {
335      const querySource = newContext.querySource
336      const lastHash = lastReportedMessageHash.get(querySource)
337  
338      // Find where the last reported message is in the array
339      let startIndex = 0
340      if (lastHash) {
341        for (let i = 0; i < messagesForAPI.length; i++) {
342          const msg = messagesForAPI[i]
343          if (msg && hashMessage(msg) === lastHash) {
344            startIndex = i + 1 // Start after the last reported message
345            break
346          }
347        }
348        // If lastHash not found, startIndex stays 0 (send everything)
349      }
350  
351      // Get new messages (filter out assistant messages - we only want user input/tool results)
352      const newMessages = messagesForAPI
353        .slice(startIndex)
354        .filter((m): m is UserMessage => m.type === 'user')
355  
356      if (newMessages.length > 0) {
357        // Format new messages, separating system reminders from regular content
358        const { contextParts, systemReminders } =
359          formatMessagesForContext(newMessages)
360  
361        // Set new_context (regular user content and tool results)
362        if (contextParts.length > 0) {
363          const fullContext = contextParts.join('\n\n---\n\n')
364          const { content: truncatedContext, truncated } =
365            truncateContent(fullContext)
366  
367          span.setAttributes({
368            new_context: truncatedContext,
369            new_context_message_count: newMessages.length,
370            ...(truncated && {
371              new_context_truncated: true,
372              new_context_original_length: fullContext.length,
373            }),
374          })
375        }
376  
377        // Set system_reminders as a separate attribute
378        if (systemReminders.length > 0) {
379          const fullReminders = systemReminders.join('\n\n---\n\n')
380          const { content: truncatedReminders, truncated: remindersTruncated } =
381            truncateContent(fullReminders)
382  
383          span.setAttributes({
384            system_reminders: truncatedReminders,
385            system_reminders_count: systemReminders.length,
386            ...(remindersTruncated && {
387              system_reminders_truncated: true,
388              system_reminders_original_length: fullReminders.length,
389            }),
390          })
391        }
392  
393        // Update last reported hash to the last message in the array
394        const lastMessage = messagesForAPI[messagesForAPI.length - 1]
395        if (lastMessage) {
396          lastReportedMessageHash.set(querySource, hashMessage(lastMessage))
397        }
398      }
399    }
400  }
401  
402  /**
403   * Add beta attributes to endLLMRequestSpan.
404   * Handles model_output and thinking_output truncation.
405   */
406  export function addBetaLLMResponseAttributes(
407    endAttributes: Record<string, string | number | boolean>,
408    metadata?: {
409      modelOutput?: string
410      thinkingOutput?: string
411    },
412  ): void {
413    if (!isBetaTracingEnabled() || !metadata) {
414      return
415    }
416  
417    // Add model_output (text content) - visible to all users
418    if (metadata.modelOutput !== undefined) {
419      const { content: modelOutput, truncated: outputTruncated } =
420        truncateContent(metadata.modelOutput)
421      endAttributes['response.model_output'] = modelOutput
422      if (outputTruncated) {
423        endAttributes['response.model_output_truncated'] = true
424        endAttributes['response.model_output_original_length'] =
425          metadata.modelOutput.length
426      }
427    }
428  
429    // Add thinking_output - ant-only
430    if (
431      process.env.USER_TYPE === 'ant' &&
432      metadata.thinkingOutput !== undefined
433    ) {
434      const { content: thinkingOutput, truncated: thinkingTruncated } =
435        truncateContent(metadata.thinkingOutput)
436      endAttributes['response.thinking_output'] = thinkingOutput
437      if (thinkingTruncated) {
438        endAttributes['response.thinking_output_truncated'] = true
439        endAttributes['response.thinking_output_original_length'] =
440          metadata.thinkingOutput.length
441      }
442    }
443  }
444  
445  /**
446   * Add beta attributes to startToolSpan.
447   * Adds tool_input with the serialized tool input.
448   */
449  export function addBetaToolInputAttributes(
450    span: Span,
451    toolName: string,
452    toolInput: string,
453  ): void {
454    if (!isBetaTracingEnabled()) {
455      return
456    }
457  
458    const { content: truncatedInput, truncated } = truncateContent(
459      `[TOOL INPUT: ${toolName}]\n${toolInput}`,
460    )
461    span.setAttributes({
462      tool_input: truncatedInput,
463      ...(truncated && {
464        tool_input_truncated: true,
465        tool_input_original_length: toolInput.length,
466      }),
467    })
468  }
469  
470  /**
471   * Add beta attributes to endToolSpan.
472   * Adds new_context with the tool result.
473   */
474  export function addBetaToolResultAttributes(
475    endAttributes: Record<string, string | number | boolean>,
476    toolName: string | number | boolean,
477    toolResult: string,
478  ): void {
479    if (!isBetaTracingEnabled()) {
480      return
481    }
482  
483    const { content: truncatedResult, truncated } = truncateContent(
484      `[TOOL RESULT: ${toolName}]\n${toolResult}`,
485    )
486    endAttributes['new_context'] = truncatedResult
487    if (truncated) {
488      endAttributes['new_context_truncated'] = true
489      endAttributes['new_context_original_length'] = toolResult.length
490    }
491  }