/ utils / betas.ts
betas.ts
  1  import { feature } from 'bun:bundle'
  2  import memoize from 'lodash-es/memoize.js'
  3  import {
  4    checkStatsigFeatureGate_CACHED_MAY_BE_STALE,
  5    getFeatureValue_CACHED_MAY_BE_STALE,
  6  } from 'src/services/analytics/growthbook.js'
  7  import { getIsNonInteractiveSession, getSdkBetas } from '../bootstrap/state.js'
  8  import {
  9    BEDROCK_EXTRA_PARAMS_HEADERS,
 10    CLAUDE_CODE_20250219_BETA_HEADER,
 11    CLI_INTERNAL_BETA_HEADER,
 12    CONTEXT_1M_BETA_HEADER,
 13    CONTEXT_MANAGEMENT_BETA_HEADER,
 14    INTERLEAVED_THINKING_BETA_HEADER,
 15    PROMPT_CACHING_SCOPE_BETA_HEADER,
 16    REDACT_THINKING_BETA_HEADER,
 17    STRUCTURED_OUTPUTS_BETA_HEADER,
 18    SUMMARIZE_CONNECTOR_TEXT_BETA_HEADER,
 19    TOKEN_EFFICIENT_TOOLS_BETA_HEADER,
 20    TOOL_SEARCH_BETA_HEADER_1P,
 21    TOOL_SEARCH_BETA_HEADER_3P,
 22    WEB_SEARCH_BETA_HEADER,
 23  } from '../constants/betas.js'
 24  import { OAUTH_BETA_HEADER } from '../constants/oauth.js'
 25  import { isClaudeAISubscriber } from './auth.js'
 26  import { has1mContext } from './context.js'
 27  import { isEnvDefinedFalsy, isEnvTruthy } from './envUtils.js'
 28  import { getCanonicalName } from './model/model.js'
 29  import { get3PModelCapabilityOverride } from './model/modelSupportOverrides.js'
 30  import { getAPIProvider } from './model/providers.js'
 31  import { getInitialSettings } from './settings/settings.js'
 32  
 33  /**
 34   * SDK-provided betas that are allowed for API key users.
 35   * Only betas in this list can be passed via SDK options.
 36   */
 37  const ALLOWED_SDK_BETAS = [CONTEXT_1M_BETA_HEADER]
 38  
 39  /**
 40   * Filter betas to only include those in the allowlist.
 41   * Returns allowed and disallowed betas separately.
 42   */
 43  function partitionBetasByAllowlist(betas: string[]): {
 44    allowed: string[]
 45    disallowed: string[]
 46  } {
 47    const allowed: string[] = []
 48    const disallowed: string[] = []
 49    for (const beta of betas) {
 50      if (ALLOWED_SDK_BETAS.includes(beta)) {
 51        allowed.push(beta)
 52      } else {
 53        disallowed.push(beta)
 54      }
 55    }
 56    return { allowed, disallowed }
 57  }
 58  
 59  /**
 60   * Filter SDK betas to only include allowed ones.
 61   * Warns about disallowed betas and subscriber restrictions.
 62   * Returns undefined if no valid betas remain or if user is a subscriber.
 63   */
 64  export function filterAllowedSdkBetas(
 65    sdkBetas: string[] | undefined,
 66  ): string[] | undefined {
 67    if (!sdkBetas || sdkBetas.length === 0) {
 68      return undefined
 69    }
 70  
 71    if (isClaudeAISubscriber()) {
 72      // biome-ignore lint/suspicious/noConsole: intentional warning
 73      console.warn(
 74        'Warning: Custom betas are only available for API key users. Ignoring provided betas.',
 75      )
 76      return undefined
 77    }
 78  
 79    const { allowed, disallowed } = partitionBetasByAllowlist(sdkBetas)
 80    for (const beta of disallowed) {
 81      // biome-ignore lint/suspicious/noConsole: intentional warning
 82      console.warn(
 83        `Warning: Beta header '${beta}' is not allowed. Only the following betas are supported: ${ALLOWED_SDK_BETAS.join(', ')}`,
 84      )
 85    }
 86    return allowed.length > 0 ? allowed : undefined
 87  }
 88  
 89  // Generally, foundry supports all 1P features;
 90  // however out of an abundance of caution, we do not enable any which are behind an experiment
 91  
 92  export function modelSupportsISP(model: string): boolean {
 93    const supported3P = get3PModelCapabilityOverride(
 94      model,
 95      'interleaved_thinking',
 96    )
 97    if (supported3P !== undefined) {
 98      return supported3P
 99    }
100    const canonical = getCanonicalName(model)
101    const provider = getAPIProvider()
102    // Foundry supports interleaved thinking for all models
103    if (provider === 'foundry') {
104      return true
105    }
106    if (provider === 'firstParty') {
107      return !canonical.includes('claude-3-')
108    }
109    return (
110      canonical.includes('claude-opus-4') || canonical.includes('claude-sonnet-4')
111    )
112  }
113  
114  function vertexModelSupportsWebSearch(model: string): boolean {
115    const canonical = getCanonicalName(model)
116    // Web search only supported on Claude 4.0+ models on Vertex
117    return (
118      canonical.includes('claude-opus-4') ||
119      canonical.includes('claude-sonnet-4') ||
120      canonical.includes('claude-haiku-4')
121    )
122  }
123  
124  // Context management is supported on Claude 4+ models
125  export function modelSupportsContextManagement(model: string): boolean {
126    const canonical = getCanonicalName(model)
127    const provider = getAPIProvider()
128    if (provider === 'foundry') {
129      return true
130    }
131    if (provider === 'firstParty') {
132      return !canonical.includes('claude-3-')
133    }
134    return (
135      canonical.includes('claude-opus-4') ||
136      canonical.includes('claude-sonnet-4') ||
137      canonical.includes('claude-haiku-4')
138    )
139  }
140  
141  // @[MODEL LAUNCH]: Add the new model ID to this list if it supports structured outputs.
142  export function modelSupportsStructuredOutputs(model: string): boolean {
143    const canonical = getCanonicalName(model)
144    const provider = getAPIProvider()
145    // Structured outputs only supported on firstParty and Foundry (not Bedrock/Vertex yet)
146    if (provider !== 'firstParty' && provider !== 'foundry') {
147      return false
148    }
149    return (
150      canonical.includes('claude-sonnet-4-6') ||
151      canonical.includes('claude-sonnet-4-5') ||
152      canonical.includes('claude-opus-4-1') ||
153      canonical.includes('claude-opus-4-5') ||
154      canonical.includes('claude-opus-4-6') ||
155      canonical.includes('claude-haiku-4-5')
156    )
157  }
158  
159  // @[MODEL LAUNCH]: Add the new model if it supports auto mode (specifically PI probes) — ask in #proj-claude-code-safety-research.
160  export function modelSupportsAutoMode(model: string): boolean {
161    if (feature('TRANSCRIPT_CLASSIFIER')) {
162      const m = getCanonicalName(model)
163      // External: firstParty-only at launch (PI probes not wired for
164      // Bedrock/Vertex/Foundry yet). Checked before allowModels so the GB
165      // override can't enable auto mode on unsupported providers.
166      if (process.env.USER_TYPE !== 'ant' && getAPIProvider() !== 'firstParty') {
167        return false
168      }
169      // GrowthBook override: tengu_auto_mode_config.allowModels force-enables
170      // auto mode for listed models, bypassing the denylist/allowlist below.
171      // Exact model IDs (e.g. "claude-strudel-v6-p") match only that model;
172      // canonical names (e.g. "claude-strudel") match the whole family.
173      const config = getFeatureValue_CACHED_MAY_BE_STALE<{
174        allowModels?: string[]
175      }>('tengu_auto_mode_config', {})
176      const rawLower = model.toLowerCase()
177      if (
178        config?.allowModels?.some(
179          am => am.toLowerCase() === rawLower || am.toLowerCase() === m,
180        )
181      ) {
182        return true
183      }
184      if (process.env.USER_TYPE === 'ant') {
185        // Denylist: block known-unsupported claude models, allow everything else (ant-internal models etc.)
186        if (m.includes('claude-3-')) return false
187        // claude-*-4 not followed by -[6-9]: blocks bare -4, -4-YYYYMMDD, -4@, -4-0 thru -4-5
188        if (/claude-(opus|sonnet|haiku)-4(?!-[6-9])/.test(m)) return false
189        return true
190      }
191      // External allowlist (firstParty already checked above).
192      return /^claude-(opus|sonnet)-4-6/.test(m)
193    }
194    return false
195  }
196  
197  /**
198   * Get the correct tool search beta header for the current API provider.
199   * - Claude API / Foundry: advanced-tool-use-2025-11-20
200   * - Vertex AI / Bedrock: tool-search-tool-2025-10-19
201   */
202  export function getToolSearchBetaHeader(): string {
203    const provider = getAPIProvider()
204    if (provider === 'vertex' || provider === 'bedrock') {
205      return TOOL_SEARCH_BETA_HEADER_3P
206    }
207    return TOOL_SEARCH_BETA_HEADER_1P
208  }
209  
210  /**
211   * Check if experimental betas should be included.
212   * These are betas that are only available on firstParty provider
213   * and may not be supported by proxies or other providers.
214   */
215  export function shouldIncludeFirstPartyOnlyBetas(): boolean {
216    return (
217      (getAPIProvider() === 'firstParty' || getAPIProvider() === 'foundry') &&
218      !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS)
219    )
220  }
221  
222  /**
223   * Global-scope prompt caching is firstParty only. Foundry is excluded because
224   * GrowthBook never bucketed Foundry users into the rollout experiment — the
225   * treatment data is firstParty-only.
226   */
227  export function shouldUseGlobalCacheScope(): boolean {
228    return (
229      getAPIProvider() === 'firstParty' &&
230      !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS)
231    )
232  }
233  
234  export const getAllModelBetas = memoize((model: string): string[] => {
235    const betaHeaders = []
236    const isHaiku = getCanonicalName(model).includes('haiku')
237    const provider = getAPIProvider()
238    const includeFirstPartyOnlyBetas = shouldIncludeFirstPartyOnlyBetas()
239  
240    if (!isHaiku) {
241      betaHeaders.push(CLAUDE_CODE_20250219_BETA_HEADER)
242      if (
243        process.env.USER_TYPE === 'ant' &&
244        process.env.CLAUDE_CODE_ENTRYPOINT === 'cli'
245      ) {
246        if (CLI_INTERNAL_BETA_HEADER) {
247          betaHeaders.push(CLI_INTERNAL_BETA_HEADER)
248        }
249      }
250    }
251    if (isClaudeAISubscriber()) {
252      betaHeaders.push(OAUTH_BETA_HEADER)
253    }
254    if (has1mContext(model)) {
255      betaHeaders.push(CONTEXT_1M_BETA_HEADER)
256    }
257    if (
258      !isEnvTruthy(process.env.DISABLE_INTERLEAVED_THINKING) &&
259      modelSupportsISP(model)
260    ) {
261      betaHeaders.push(INTERLEAVED_THINKING_BETA_HEADER)
262    }
263  
264    // Skip the API-side Haiku thinking summarizer — the summary is only used
265    // for ctrl+o display, which interactive users rarely open. The API returns
266    // redacted_thinking blocks instead; AssistantRedactedThinkingMessage already
267    // renders those as a stub. SDK / print-mode keep summaries because callers
268    // may iterate over thinking content. Users can opt back in via settings.json
269    // showThinkingSummaries.
270    if (
271      includeFirstPartyOnlyBetas &&
272      modelSupportsISP(model) &&
273      !getIsNonInteractiveSession() &&
274      getInitialSettings().showThinkingSummaries !== true
275    ) {
276      betaHeaders.push(REDACT_THINKING_BETA_HEADER)
277    }
278  
279    // POC: server-side connector-text summarization (anti-distillation). The
280    // API buffers assistant text between tool calls, summarizes it, and returns
281    // the summary with a signature so the original can be restored on subsequent
282    // turns — same mechanism as thinking blocks. Ant-only while we measure
283    // TTFT/TTLT/capacity; betas already flow to tengu_api_success for splitting.
284    // Backend independently requires Capability.ANTHROPIC_INTERNAL_RESEARCH.
285    //
286    // USE_CONNECTOR_TEXT_SUMMARIZATION is tri-state: =1 forces on (opt-in even
287    // if GB is off), =0 forces off (opt-out of a GB rollout you were bucketed
288    // into), unset defers to GB.
289    if (
290      SUMMARIZE_CONNECTOR_TEXT_BETA_HEADER &&
291      process.env.USER_TYPE === 'ant' &&
292      includeFirstPartyOnlyBetas &&
293      !isEnvDefinedFalsy(process.env.USE_CONNECTOR_TEXT_SUMMARIZATION) &&
294      (isEnvTruthy(process.env.USE_CONNECTOR_TEXT_SUMMARIZATION) ||
295        getFeatureValue_CACHED_MAY_BE_STALE('tengu_slate_prism', false))
296    ) {
297      betaHeaders.push(SUMMARIZE_CONNECTOR_TEXT_BETA_HEADER)
298    }
299  
300    // Add context management beta for tool clearing (ant opt-in) or thinking preservation
301    const antOptedIntoToolClearing =
302      isEnvTruthy(process.env.USE_API_CONTEXT_MANAGEMENT) &&
303      process.env.USER_TYPE === 'ant'
304  
305    const thinkingPreservationEnabled = modelSupportsContextManagement(model)
306  
307    if (
308      shouldIncludeFirstPartyOnlyBetas() &&
309      (antOptedIntoToolClearing || thinkingPreservationEnabled)
310    ) {
311      betaHeaders.push(CONTEXT_MANAGEMENT_BETA_HEADER)
312    }
313    // Add strict tool use beta if experiment is enabled.
314    // Gate on includeFirstPartyOnlyBetas: CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS
315    // already strips schema.strict from tool bodies at api.ts's choke point, but
316    // this header was escaping that kill switch. Proxy gateways that look like
317    // firstParty but forward to Vertex reject this header with 400.
318    // github.com/deshaw/anthropic-issues/issues/5
319    const strictToolsEnabled =
320      checkStatsigFeatureGate_CACHED_MAY_BE_STALE('tengu_tool_pear')
321    // 3P default: false. API rejects strict + token-efficient-tools together
322    // (tool_use.py:139), so these are mutually exclusive — strict wins.
323    const tokenEfficientToolsEnabled =
324      !strictToolsEnabled &&
325      getFeatureValue_CACHED_MAY_BE_STALE('tengu_amber_json_tools', false)
326    if (
327      includeFirstPartyOnlyBetas &&
328      modelSupportsStructuredOutputs(model) &&
329      strictToolsEnabled
330    ) {
331      betaHeaders.push(STRUCTURED_OUTPUTS_BETA_HEADER)
332    }
333    // JSON tool_use format (FC v3) — ~4.5% output token reduction vs ANTML.
334    // Sends the v2 header (2026-03-28) added in anthropics/anthropic#337072 to
335    // isolate the CC A/B cohort from ~9.2M/week existing v1 senders. Ant-only
336    // while the restored JsonToolUseOutputParser soaks.
337    if (
338      process.env.USER_TYPE === 'ant' &&
339      includeFirstPartyOnlyBetas &&
340      tokenEfficientToolsEnabled
341    ) {
342      betaHeaders.push(TOKEN_EFFICIENT_TOOLS_BETA_HEADER)
343    }
344  
345    // Add web search beta for Vertex Claude 4.0+ models only
346    if (provider === 'vertex' && vertexModelSupportsWebSearch(model)) {
347      betaHeaders.push(WEB_SEARCH_BETA_HEADER)
348    }
349    // Foundry only ships models that already support Web Search
350    if (provider === 'foundry') {
351      betaHeaders.push(WEB_SEARCH_BETA_HEADER)
352    }
353  
354    // Always send the beta header for 1P. The header is a no-op without a scope field.
355    if (includeFirstPartyOnlyBetas) {
356      betaHeaders.push(PROMPT_CACHING_SCOPE_BETA_HEADER)
357    }
358  
359    // If ANTHROPIC_BETAS is set, split it by commas and add to betaHeaders.
360    // This is an explicit user opt-in, so honor it regardless of model.
361    if (process.env.ANTHROPIC_BETAS) {
362      betaHeaders.push(
363        ...process.env.ANTHROPIC_BETAS.split(',')
364          .map(_ => _.trim())
365          .filter(Boolean),
366      )
367    }
368    return betaHeaders
369  })
370  
371  export const getModelBetas = memoize((model: string): string[] => {
372    const modelBetas = getAllModelBetas(model)
373    if (getAPIProvider() === 'bedrock') {
374      return modelBetas.filter(b => !BEDROCK_EXTRA_PARAMS_HEADERS.has(b))
375    }
376    return modelBetas
377  })
378  
379  export const getBedrockExtraBodyParamsBetas = memoize(
380    (model: string): string[] => {
381      const modelBetas = getAllModelBetas(model)
382      return modelBetas.filter(b => BEDROCK_EXTRA_PARAMS_HEADERS.has(b))
383    },
384  )
385  
386  /**
387   * Merge SDK-provided betas with auto-detected model betas.
388   * SDK betas are read from global state (set via setSdkBetas in main.tsx).
389   * The betas are pre-filtered by filterAllowedSdkBetas which handles
390   * subscriber checks and allowlist validation with warnings.
391   *
392   * @param options.isAgenticQuery - When true, ensures the beta headers needed
393   *   for agentic queries are present. For non-Haiku models these are already
394   *   included by getAllModelBetas(); for Haiku they're excluded since
395   *   non-agentic calls (compaction, classifiers, token estimation) don't need them.
396   */
397  export function getMergedBetas(
398    model: string,
399    options?: { isAgenticQuery?: boolean },
400  ): string[] {
401    const baseBetas = [...getModelBetas(model)]
402  
403    // Agentic queries always need claude-code and cli-internal beta headers.
404    // For non-Haiku models these are already in baseBetas; for Haiku they're
405    // excluded by getAllModelBetas() since non-agentic Haiku calls don't need them.
406    if (options?.isAgenticQuery) {
407      if (!baseBetas.includes(CLAUDE_CODE_20250219_BETA_HEADER)) {
408        baseBetas.push(CLAUDE_CODE_20250219_BETA_HEADER)
409      }
410      if (
411        process.env.USER_TYPE === 'ant' &&
412        process.env.CLAUDE_CODE_ENTRYPOINT === 'cli' &&
413        CLI_INTERNAL_BETA_HEADER &&
414        !baseBetas.includes(CLI_INTERNAL_BETA_HEADER)
415      ) {
416        baseBetas.push(CLI_INTERNAL_BETA_HEADER)
417      }
418    }
419  
420    const sdkBetas = getSdkBetas()
421  
422    if (!sdkBetas || sdkBetas.length === 0) {
423      return baseBetas
424    }
425  
426    // Merge SDK betas without duplicates (already filtered by filterAllowedSdkBetas)
427    return [...baseBetas, ...sdkBetas.filter(b => !baseBetas.includes(b))]
428  }
429  
430  export function clearBetasCaches(): void {
431    getAllModelBetas.cache?.clear?.()
432    getModelBetas.cache?.clear?.()
433    getBedrockExtraBodyParamsBetas.cache?.clear?.()
434  }