/ utils / sideQuery.ts
sideQuery.ts
  1  import type Anthropic from '@anthropic-ai/sdk'
  2  import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages.js'
  3  import {
  4    getLastApiCompletionTimestamp,
  5    setLastApiCompletionTimestamp,
  6  } from '../bootstrap/state.js'
  7  import { STRUCTURED_OUTPUTS_BETA_HEADER } from '../constants/betas.js'
  8  import type { QuerySource } from '../constants/querySource.js'
  9  import {
 10    getAttributionHeader,
 11    getCLISyspromptPrefix,
 12  } from '../constants/system.js'
 13  import { logEvent } from '../services/analytics/index.js'
 14  import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from '../services/analytics/metadata.js'
 15  import { getAPIMetadata } from '../services/api/claude.js'
 16  import { getAnthropicClient } from '../services/api/client.js'
 17  import { getModelBetas, modelSupportsStructuredOutputs } from './betas.js'
 18  import { computeFingerprint } from './fingerprint.js'
 19  import { normalizeModelStringForAPI } from './model/model.js'
 20  
 21  type MessageParam = Anthropic.MessageParam
 22  type TextBlockParam = Anthropic.TextBlockParam
 23  type Tool = Anthropic.Tool
 24  type ToolChoice = Anthropic.ToolChoice
 25  type BetaMessage = Anthropic.Beta.Messages.BetaMessage
 26  type BetaJSONOutputFormat = Anthropic.Beta.Messages.BetaJSONOutputFormat
 27  type BetaThinkingConfigParam = Anthropic.Beta.Messages.BetaThinkingConfigParam
 28  
 29  export type SideQueryOptions = {
 30    /** Model to use for the query */
 31    model: string
 32    /**
 33     * System prompt - string or array of text blocks (will be prefixed with CLI attribution).
 34     *
 35     * The attribution header is always placed in its own TextBlockParam block to ensure
 36     * server-side parsing correctly extracts the cc_entrypoint value without including
 37     * system prompt content.
 38     */
 39    system?: string | TextBlockParam[]
 40    /** Messages to send (supports cache_control on content blocks) */
 41    messages: MessageParam[]
 42    /** Optional tools (supports both standard Tool[] and BetaToolUnion[] for custom tool types) */
 43    tools?: Tool[] | BetaToolUnion[]
 44    /** Optional tool choice (use { type: 'tool', name: 'x' } for forced output) */
 45    tool_choice?: ToolChoice
 46    /** Optional JSON output format for structured responses */
 47    output_format?: BetaJSONOutputFormat
 48    /** Max tokens (default: 1024) */
 49    max_tokens?: number
 50    /** Max retries (default: 2) */
 51    maxRetries?: number
 52    /** Abort signal */
 53    signal?: AbortSignal
 54    /** Skip CLI system prompt prefix (keeps attribution header for OAuth). For internal classifiers that provide their own prompt. */
 55    skipSystemPromptPrefix?: boolean
 56    /** Temperature override */
 57    temperature?: number
 58    /** Thinking budget (enables thinking), or `false` to send `{ type: 'disabled' }`. */
 59    thinking?: number | false
 60    /** Stop sequences — generation stops when any of these strings is emitted */
 61    stop_sequences?: string[]
 62    /** Attributes this call in tengu_api_success for COGS joining against reporting.sampling_calls. */
 63    querySource: QuerySource
 64  }
 65  
 66  /**
 67   * Extract text from first user message for fingerprint computation.
 68   */
 69  function extractFirstUserMessageText(messages: MessageParam[]): string {
 70    const firstUserMessage = messages.find(m => m.role === 'user')
 71    if (!firstUserMessage) return ''
 72  
 73    const content = firstUserMessage.content
 74    if (typeof content === 'string') return content
 75  
 76    // Array of content blocks - find first text block
 77    const textBlock = content.find(block => block.type === 'text')
 78    return textBlock?.type === 'text' ? textBlock.text : ''
 79  }
 80  
 81  /**
 82   * Lightweight API wrapper for "side queries" outside the main conversation loop.
 83   *
 84   * Use this instead of direct client.beta.messages.create() calls to ensure
 85   * proper OAuth token validation with fingerprint attribution headers.
 86   *
 87   * This handles:
 88   * - Fingerprint computation for OAuth validation
 89   * - Attribution header injection
 90   * - CLI system prompt prefix
 91   * - Proper betas for the model
 92   * - API metadata
 93   * - Model string normalization (strips [1m] suffix for API)
 94   *
 95   * @example
 96   * // Permission explainer
 97   * await sideQuery({ querySource: 'permission_explainer', model, system: SYSTEM_PROMPT, messages, tools, tool_choice })
 98   *
 99   * @example
100   * // Session search
101   * await sideQuery({ querySource: 'session_search', model, system: SEARCH_PROMPT, messages })
102   *
103   * @example
104   * // Model validation
105   * await sideQuery({ querySource: 'model_validation', model, max_tokens: 1, messages: [{ role: 'user', content: 'Hi' }] })
106   */
107  export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
108    const {
109      model,
110      system,
111      messages,
112      tools,
113      tool_choice,
114      output_format,
115      max_tokens = 1024,
116      maxRetries = 2,
117      signal,
118      skipSystemPromptPrefix,
119      temperature,
120      thinking,
121      stop_sequences,
122    } = opts
123  
124    const client = await getAnthropicClient({
125      maxRetries,
126      model,
127      source: 'side_query',
128    })
129    const betas = [...getModelBetas(model)]
130    // Add structured-outputs beta if using output_format and provider supports it
131    if (
132      output_format &&
133      modelSupportsStructuredOutputs(model) &&
134      !betas.includes(STRUCTURED_OUTPUTS_BETA_HEADER)
135    ) {
136      betas.push(STRUCTURED_OUTPUTS_BETA_HEADER)
137    }
138  
139    // Extract first user message text for fingerprint
140    const messageText = extractFirstUserMessageText(messages)
141  
142    // Compute fingerprint for OAuth attribution
143    const fingerprint = computeFingerprint(messageText, MACRO.VERSION)
144    const attributionHeader = getAttributionHeader(fingerprint)
145  
146    // Build system as array to keep attribution header in its own block
147    // (prevents server-side parsing from including system content in cc_entrypoint)
148    const systemBlocks: TextBlockParam[] = [
149      attributionHeader ? { type: 'text', text: attributionHeader } : null,
150      // Skip CLI system prompt prefix for internal classifiers that provide their own prompt
151      ...(skipSystemPromptPrefix
152        ? []
153        : [
154            {
155              type: 'text' as const,
156              text: getCLISyspromptPrefix({
157                isNonInteractive: false,
158                hasAppendSystemPrompt: false,
159              }),
160            },
161          ]),
162      ...(Array.isArray(system)
163        ? system
164        : system
165          ? [{ type: 'text' as const, text: system }]
166          : []),
167    ].filter((block): block is TextBlockParam => block !== null)
168  
169    let thinkingConfig: BetaThinkingConfigParam | undefined
170    if (thinking === false) {
171      thinkingConfig = { type: 'disabled' }
172    } else if (thinking !== undefined) {
173      thinkingConfig = {
174        type: 'enabled',
175        budget_tokens: Math.min(thinking, max_tokens - 1),
176      }
177    }
178  
179    const normalizedModel = normalizeModelStringForAPI(model)
180    const start = Date.now()
181    // biome-ignore lint/plugin: this IS the wrapper that handles OAuth attribution
182    const response = await client.beta.messages.create(
183      {
184        model: normalizedModel,
185        max_tokens,
186        system: systemBlocks,
187        messages,
188        ...(tools && { tools }),
189        ...(tool_choice && { tool_choice }),
190        ...(output_format && { output_config: { format: output_format } }),
191        ...(temperature !== undefined && { temperature }),
192        ...(stop_sequences && { stop_sequences }),
193        ...(thinkingConfig && { thinking: thinkingConfig }),
194        ...(betas.length > 0 && { betas }),
195        metadata: getAPIMetadata(),
196      },
197      { signal },
198    )
199  
200    const requestId =
201      (response as { _request_id?: string | null })._request_id ?? undefined
202    const now = Date.now()
203    const lastCompletion = getLastApiCompletionTimestamp()
204    logEvent('tengu_api_success', {
205      requestId:
206        requestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
207      querySource:
208        opts.querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
209      model:
210        normalizedModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
211      inputTokens: response.usage.input_tokens,
212      outputTokens: response.usage.output_tokens,
213      cachedInputTokens: response.usage.cache_read_input_tokens ?? 0,
214      uncachedInputTokens: response.usage.cache_creation_input_tokens ?? 0,
215      durationMsIncludingRetries: now - start,
216      timeSinceLastApiCallMs:
217        lastCompletion !== null ? now - lastCompletion : undefined,
218    })
219    setLastApiCompletionTimestamp(now)
220  
221    return response
222  }