Cradicle Explorer

/ src / services / claudeAiLimits.ts
claudeAiLimits.ts
  1  import { APIError } from '@anthropic-ai/sdk'
  2  import type { MessageParam } from '@anthropic-ai/sdk/resources/index.mjs'
  3  import isEqual from 'lodash-es/isEqual.js'
  4  import { getIsNonInteractiveSession } from '../bootstrap/state.js'
  5  import { isClaudeAISubscriber } from '../utils/auth.js'
  6  import { getModelBetas } from '../utils/betas.js'
  7  import { getGlobalConfig, saveGlobalConfig } from '../utils/config.js'
  8  import { logError } from '../utils/log.js'
  9  import { getSmallFastModel } from '../utils/model/model.js'
 10  import { isEssentialTrafficOnly } from '../utils/privacyLevel.js'
 11  import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from './analytics/index.js'
 12  import { logEvent } from './analytics/index.js'
 13  import { getAPIMetadata } from './api/claude.js'
 14  import { getAnthropicClient } from './api/client.js'
 15  import {
 16    processRateLimitHeaders,
 17    shouldProcessRateLimits,
 18  } from './rateLimitMocking.js'
 19  
 20  // Re-export message functions from centralized location
 21  export {
 22    getRateLimitErrorMessage,
 23    getRateLimitWarning,
 24    getUsingOverageText,
 25  } from './rateLimitMessages.js'
 26  
 27  type QuotaStatus = 'allowed' | 'allowed_warning' | 'rejected'
 28  
 29  type RateLimitType =
 30    | 'five_hour'
 31    | 'seven_day'
 32    | 'seven_day_opus'
 33    | 'seven_day_sonnet'
 34    | 'overage'
 35  
 36  export type { RateLimitType }
 37  
 38  type EarlyWarningThreshold = {
 39    utilization: number // 0-1 scale: trigger warning when usage >= this
 40    timePct: number // 0-1 scale: trigger warning when time elapsed <= this
 41  }
 42  
 43  type EarlyWarningConfig = {
 44    rateLimitType: RateLimitType
 45    claimAbbrev: '5h' | '7d'
 46    windowSeconds: number
 47    thresholds: EarlyWarningThreshold[]
 48  }
 49  
 50  // Early warning configurations in priority order (checked first to last)
 51  // Used as fallback when server doesn't send surpassed-threshold header
 52  // Warns users when they're consuming quota faster than the time window allows
 53  const EARLY_WARNING_CONFIGS: EarlyWarningConfig[] = [
 54    {
 55      rateLimitType: 'five_hour',
 56      claimAbbrev: '5h',
 57      windowSeconds: 5 * 60 * 60,
 58      thresholds: [{ utilization: 0.9, timePct: 0.72 }],
 59    },
 60    {
 61      rateLimitType: 'seven_day',
 62      claimAbbrev: '7d',
 63      windowSeconds: 7 * 24 * 60 * 60,
 64      thresholds: [
 65        { utilization: 0.75, timePct: 0.6 },
 66        { utilization: 0.5, timePct: 0.35 },
 67        { utilization: 0.25, timePct: 0.15 },
 68      ],
 69    },
 70  ]
 71  
 72  // Maps claim abbreviations to rate limit types for header-based detection
 73  const EARLY_WARNING_CLAIM_MAP: Record<string, RateLimitType> = {
 74    '5h': 'five_hour',
 75    '7d': 'seven_day',
 76    overage: 'overage',
 77  }
 78  
 79  const RATE_LIMIT_DISPLAY_NAMES: Record<RateLimitType, string> = {
 80    five_hour: 'session limit',
 81    seven_day: 'weekly limit',
 82    seven_day_opus: 'Opus limit',
 83    seven_day_sonnet: 'Sonnet limit',
 84    overage: 'extra usage limit',
 85  }
 86  
 87  export function getRateLimitDisplayName(type: RateLimitType): string {
 88    return RATE_LIMIT_DISPLAY_NAMES[type] || type
 89  }
 90  
 91  /**
 92   * Calculate what fraction of a time window has elapsed.
 93   * Used for time-relative early warning fallback.
 94   * @param resetsAt - Unix epoch timestamp in seconds when the limit resets
 95   * @param windowSeconds - Duration of the window in seconds
 96   * @returns fraction (0-1) of the window that has elapsed
 97   */
 98  function computeTimeProgress(resetsAt: number, windowSeconds: number): number {
 99    const nowSeconds = Date.now() / 1000
100    const windowStart = resetsAt - windowSeconds
101    const elapsed = nowSeconds - windowStart
102    return Math.max(0, Math.min(1, elapsed / windowSeconds))
103  }
104  
105  // Reason why overage is disabled/rejected
106  // These values come from the API's unified limiter
107  export type OverageDisabledReason =
108    | 'overage_not_provisioned' // Overage is not provisioned for this org or seat tier
109    | 'org_level_disabled' // Organization doesn't have overage enabled
110    | 'org_level_disabled_until' // Organization overage temporarily disabled
111    | 'out_of_credits' // Organization has insufficient credits
112    | 'seat_tier_level_disabled' // Seat tier doesn't have overage enabled
113    | 'member_level_disabled' // Account specifically has overage disabled
114    | 'seat_tier_zero_credit_limit' // Seat tier has a zero credit limit
115    | 'group_zero_credit_limit' // Resolved group limit has a zero credit limit
116    | 'member_zero_credit_limit' // Account has a zero credit limit
117    | 'org_service_level_disabled' // Org service specifically has overage disabled
118    | 'org_service_zero_credit_limit' // Org service has a zero credit limit
119    | 'no_limits_configured' // No overage limits configured for account
120    | 'unknown' // Unknown reason, should not happen
121  
122  export type ClaudeAILimits = {
123    status: QuotaStatus
124    // unifiedRateLimitFallbackAvailable is currently used to warn users that set
125    // their model to Opus whenever they are about to run out of quota. It does
126    // not change the actual model that is used.
127    unifiedRateLimitFallbackAvailable: boolean
128    resetsAt?: number
129    rateLimitType?: RateLimitType
130    utilization?: number
131    overageStatus?: QuotaStatus
132    overageResetsAt?: number
133    overageDisabledReason?: OverageDisabledReason
134    isUsingOverage?: boolean
135    surpassedThreshold?: number
136  }
137  
138  // Exported for testing only
139  export let currentLimits: ClaudeAILimits = {
140    status: 'allowed',
141    unifiedRateLimitFallbackAvailable: false,
142    isUsingOverage: false,
143  }
144  
145  /**
146   * Raw per-window utilization from response headers, tracked on every API
147   * response (unlike currentLimits.utilization which is only set when a warning
148   * threshold fires). Exposed to statusline scripts via getRawUtilization().
149   */
150  type RawWindowUtilization = {
151    utilization: number // 0-1 fraction
152    resets_at: number // unix epoch seconds
153  }
154  type RawUtilization = {
155    five_hour?: RawWindowUtilization
156    seven_day?: RawWindowUtilization
157  }
158  let rawUtilization: RawUtilization = {}
159  
160  export function getRawUtilization(): RawUtilization {
161    return rawUtilization
162  }
163  
164  function extractRawUtilization(headers: globalThis.Headers): RawUtilization {
165    const result: RawUtilization = {}
166    for (const [key, abbrev] of [
167      ['five_hour', '5h'],
168      ['seven_day', '7d'],
169    ] as const) {
170      const util = headers.get(
171        `anthropic-ratelimit-unified-${abbrev}-utilization`,
172      )
173      const reset = headers.get(`anthropic-ratelimit-unified-${abbrev}-reset`)
174      if (util !== null && reset !== null) {
175        result[key] = { utilization: Number(util), resets_at: Number(reset) }
176      }
177    }
178    return result
179  }
180  
181  type StatusChangeListener = (limits: ClaudeAILimits) => void
182  export const statusListeners: Set<StatusChangeListener> = new Set()
183  
184  export function emitStatusChange(limits: ClaudeAILimits) {
185    currentLimits = limits
186    statusListeners.forEach(listener => listener(limits))
187    const hoursTillReset = Math.round(
188      (limits.resetsAt ? limits.resetsAt - Date.now() / 1000 : 0) / (60 * 60),
189    )
190  
191    logEvent('tengu_claudeai_limits_status_changed', {
192      status:
193        limits.status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
194      unifiedRateLimitFallbackAvailable: limits.unifiedRateLimitFallbackAvailable,
195      hoursTillReset,
196    })
197  }
198  
199  async function makeTestQuery() {
200    const model = getSmallFastModel()
201    const anthropic = await getAnthropicClient({
202      maxRetries: 0,
203      model,
204      source: 'quota_check',
205    })
206    const messages: MessageParam[] = [{ role: 'user', content: 'quota' }]
207    const betas = getModelBetas(model)
208    // biome-ignore lint/plugin: quota check needs raw response access via asResponse()
209    return anthropic.beta.messages
210      .create({
211        model,
212        max_tokens: 1,
213        messages,
214        metadata: getAPIMetadata(),
215        ...(betas.length > 0 ? { betas } : {}),
216      })
217      .asResponse()
218  }
219  
220  export async function checkQuotaStatus(): Promise<void> {
221    // Skip network requests if nonessential traffic is disabled
222    if (isEssentialTrafficOnly()) {
223      return
224    }
225  
226    // Check if we should process rate limits (real subscriber or mock testing)
227    if (!shouldProcessRateLimits(isClaudeAISubscriber())) {
228      return
229    }
230  
231    // In non-interactive mode (-p), the real query follows immediately and
232    // extractQuotaStatusFromHeaders() will update limits from its response
233    // headers (claude.ts), so skip this pre-check API call.
234    if (getIsNonInteractiveSession()) {
235      return
236    }
237  
238    try {
239      // Make a minimal request to check quota
240      const raw = await makeTestQuery()
241  
242      // Update limits based on the response
243      extractQuotaStatusFromHeaders(raw.headers)
244    } catch (error) {
245      if (error instanceof APIError) {
246        extractQuotaStatusFromError(error)
247      }
248    }
249  }
250  
251  /**
252   * Check if early warning should be triggered based on surpassed-threshold header.
253   * Returns ClaudeAILimits if a threshold was surpassed, null otherwise.
254   */
255  function getHeaderBasedEarlyWarning(
256    headers: globalThis.Headers,
257    unifiedRateLimitFallbackAvailable: boolean,
258  ): ClaudeAILimits | null {
259    // Check each claim type for surpassed threshold header
260    for (const [claimAbbrev, rateLimitType] of Object.entries(
261      EARLY_WARNING_CLAIM_MAP,
262    )) {
263      const surpassedThreshold = headers.get(
264        `anthropic-ratelimit-unified-${claimAbbrev}-surpassed-threshold`,
265      )
266  
267      // If threshold header is present, user has crossed a warning threshold
268      if (surpassedThreshold !== null) {
269        const utilizationHeader = headers.get(
270          `anthropic-ratelimit-unified-${claimAbbrev}-utilization`,
271        )
272        const resetHeader = headers.get(
273          `anthropic-ratelimit-unified-${claimAbbrev}-reset`,
274        )
275  
276        const utilization = utilizationHeader
277          ? Number(utilizationHeader)
278          : undefined
279        const resetsAt = resetHeader ? Number(resetHeader) : undefined
280  
281        return {
282          status: 'allowed_warning',
283          resetsAt,
284          rateLimitType: rateLimitType as RateLimitType,
285          utilization,
286          unifiedRateLimitFallbackAvailable,
287          isUsingOverage: false,
288          surpassedThreshold: Number(surpassedThreshold),
289        }
290      }
291    }
292  
293    return null
294  }
295  
296  /**
297   * Check if time-relative early warning should be triggered for a rate limit type.
298   * Fallback when server doesn't send surpassed-threshold header.
299   * Returns ClaudeAILimits if thresholds are exceeded, null otherwise.
300   */
301  function getTimeRelativeEarlyWarning(
302    headers: globalThis.Headers,
303    config: EarlyWarningConfig,
304    unifiedRateLimitFallbackAvailable: boolean,
305  ): ClaudeAILimits | null {
306    const { rateLimitType, claimAbbrev, windowSeconds, thresholds } = config
307  
308    const utilizationHeader = headers.get(
309      `anthropic-ratelimit-unified-${claimAbbrev}-utilization`,
310    )
311    const resetHeader = headers.get(
312      `anthropic-ratelimit-unified-${claimAbbrev}-reset`,
313    )
314  
315    if (utilizationHeader === null || resetHeader === null) {
316      return null
317    }
318  
319    const utilization = Number(utilizationHeader)
320    const resetsAt = Number(resetHeader)
321    const timeProgress = computeTimeProgress(resetsAt, windowSeconds)
322  
323    // Check if any threshold is exceeded: high usage early in the window
324    const shouldWarn = thresholds.some(
325      t => utilization >= t.utilization && timeProgress <= t.timePct,
326    )
327  
328    if (!shouldWarn) {
329      return null
330    }
331  
332    return {
333      status: 'allowed_warning',
334      resetsAt,
335      rateLimitType,
336      utilization,
337      unifiedRateLimitFallbackAvailable,
338      isUsingOverage: false,
339    }
340  }
341  
342  /**
343   * Get early warning limits using header-based detection with time-relative fallback.
344   * 1. First checks for surpassed-threshold header (new server-side approach)
345   * 2. Falls back to time-relative thresholds (client-side calculation)
346   */
347  function getEarlyWarningFromHeaders(
348    headers: globalThis.Headers,
349    unifiedRateLimitFallbackAvailable: boolean,
350  ): ClaudeAILimits | null {
351    // Try header-based detection first (preferred when API sends the header)
352    const headerBasedWarning = getHeaderBasedEarlyWarning(
353      headers,
354      unifiedRateLimitFallbackAvailable,
355    )
356    if (headerBasedWarning) {
357      return headerBasedWarning
358    }
359  
360    // Fallback: Use time-relative thresholds (client-side calculation)
361    // This catches users burning quota faster than sustainable
362    for (const config of EARLY_WARNING_CONFIGS) {
363      const timeRelativeWarning = getTimeRelativeEarlyWarning(
364        headers,
365        config,
366        unifiedRateLimitFallbackAvailable,
367      )
368      if (timeRelativeWarning) {
369        return timeRelativeWarning
370      }
371    }
372  
373    return null
374  }
375  
376  function computeNewLimitsFromHeaders(
377    headers: globalThis.Headers,
378  ): ClaudeAILimits {
379    const status =
380      (headers.get('anthropic-ratelimit-unified-status') as QuotaStatus) ||
381      'allowed'
382    const resetsAtHeader = headers.get('anthropic-ratelimit-unified-reset')
383    const resetsAt = resetsAtHeader ? Number(resetsAtHeader) : undefined
384    const unifiedRateLimitFallbackAvailable =
385      headers.get('anthropic-ratelimit-unified-fallback') === 'available'
386  
387    // Headers for rate limit type and overage support
388    const rateLimitType = headers.get(
389      'anthropic-ratelimit-unified-representative-claim',
390    ) as RateLimitType | null
391    const overageStatus = headers.get(
392      'anthropic-ratelimit-unified-overage-status',
393    ) as QuotaStatus | null
394    const overageResetsAtHeader = headers.get(
395      'anthropic-ratelimit-unified-overage-reset',
396    )
397    const overageResetsAt = overageResetsAtHeader
398      ? Number(overageResetsAtHeader)
399      : undefined
400  
401    // Reason why overage is disabled (spending cap or wallet empty)
402    const overageDisabledReason = headers.get(
403      'anthropic-ratelimit-unified-overage-disabled-reason',
404    ) as OverageDisabledReason | null
405  
406    // Determine if we're using overage (standard limits rejected but overage allowed)
407    const isUsingOverage =
408      status === 'rejected' &&
409      (overageStatus === 'allowed' || overageStatus === 'allowed_warning')
410  
411    // Check for early warning based on surpassed-threshold header
412    // If status is allowed/allowed_warning and we find a surpassed threshold, show warning
413    let finalStatus: QuotaStatus = status
414    if (status === 'allowed' || status === 'allowed_warning') {
415      const earlyWarning = getEarlyWarningFromHeaders(
416        headers,
417        unifiedRateLimitFallbackAvailable,
418      )
419      if (earlyWarning) {
420        return earlyWarning
421      }
422      // No early warning threshold surpassed
423      finalStatus = 'allowed'
424    }
425  
426    return {
427      status: finalStatus,
428      resetsAt,
429      unifiedRateLimitFallbackAvailable,
430      ...(rateLimitType && { rateLimitType }),
431      ...(overageStatus && { overageStatus }),
432      ...(overageResetsAt && { overageResetsAt }),
433      ...(overageDisabledReason && { overageDisabledReason }),
434      isUsingOverage,
435    }
436  }
437  
438  /**
439   * Cache the extra usage disabled reason from API headers.
440   */
441  function cacheExtraUsageDisabledReason(headers: globalThis.Headers): void {
442    // A null reason means extra usage is enabled (no disabled reason header)
443    const reason =
444      headers.get('anthropic-ratelimit-unified-overage-disabled-reason') ?? null
445    const cached = getGlobalConfig().cachedExtraUsageDisabledReason
446    if (cached !== reason) {
447      saveGlobalConfig(current => ({
448        ...current,
449        cachedExtraUsageDisabledReason: reason,
450      }))
451    }
452  }
453  
454  export function extractQuotaStatusFromHeaders(
455    headers: globalThis.Headers,
456  ): void {
457    // Check if we need to process rate limits
458    const isSubscriber = isClaudeAISubscriber()
459  
460    if (!shouldProcessRateLimits(isSubscriber)) {
461      // If we have any rate limit state, clear it
462      rawUtilization = {}
463      if (currentLimits.status !== 'allowed' || currentLimits.resetsAt) {
464        const defaultLimits: ClaudeAILimits = {
465          status: 'allowed',
466          unifiedRateLimitFallbackAvailable: false,
467          isUsingOverage: false,
468        }
469        emitStatusChange(defaultLimits)
470      }
471      return
472    }
473  
474    // Process headers (applies mocks from /mock-limits command if active)
475    const headersToUse = processRateLimitHeaders(headers)
476    rawUtilization = extractRawUtilization(headersToUse)
477    const newLimits = computeNewLimitsFromHeaders(headersToUse)
478  
479    // Cache extra usage status (persists across sessions)
480    cacheExtraUsageDisabledReason(headersToUse)
481  
482    if (!isEqual(currentLimits, newLimits)) {
483      emitStatusChange(newLimits)
484    }
485  }
486  
487  export function extractQuotaStatusFromError(error: APIError): void {
488    if (
489      !shouldProcessRateLimits(isClaudeAISubscriber()) ||
490      error.status !== 429
491    ) {
492      return
493    }
494  
495    try {
496      let newLimits = { ...currentLimits }
497      if (error.headers) {
498        // Process headers (applies mocks from /mock-limits command if active)
499        const headersToUse = processRateLimitHeaders(error.headers)
500        rawUtilization = extractRawUtilization(headersToUse)
501        newLimits = computeNewLimitsFromHeaders(headersToUse)
502  
503        // Cache extra usage status (persists across sessions)
504        cacheExtraUsageDisabledReason(headersToUse)
505      }
506      // For errors, always set status to rejected even if headers are not present.
507      newLimits.status = 'rejected'
508  
509      if (!isEqual(currentLimits, newLimits)) {
510        emitStatusChange(newLimits)
511      }
512    } catch (e) {
513      logError(e as Error)
514    }
515  }