/ src / bridge / envLessBridgeConfig.ts
envLessBridgeConfig.ts
  1  import { z } from 'zod/v4'
  2  import { getFeatureValue_DEPRECATED } from '../services/analytics/growthbook.js'
  3  import { lazySchema } from '../utils/lazySchema.js'
  4  import { lt } from '../utils/semver.js'
  5  import { isEnvLessBridgeEnabled } from './bridgeEnabled.js'
  6  
  7  export type EnvLessBridgeConfig = {
  8    // withRetry — init-phase backoff (createSession, POST /bridge, recovery /bridge)
  9    init_retry_max_attempts: number
 10    init_retry_base_delay_ms: number
 11    init_retry_jitter_fraction: number
 12    init_retry_max_delay_ms: number
 13    // axios timeout for POST /sessions, POST /bridge, POST /archive
 14    http_timeout_ms: number
 15    // BoundedUUIDSet ring size (echo + re-delivery dedup)
 16    uuid_dedup_buffer_size: number
 17    // CCRClient worker heartbeat cadence. Server TTL is 60s — 20s gives 3× margin.
 18    heartbeat_interval_ms: number
 19    // ±fraction of interval — per-beat jitter to spread fleet load.
 20    heartbeat_jitter_fraction: number
 21    // Fire proactive JWT refresh this long before expires_in. Larger buffer =
 22    // more frequent refresh (refresh cadence ≈ expires_in - buffer).
 23    token_refresh_buffer_ms: number
 24    // Archive POST timeout in teardown(). Distinct from http_timeout_ms because
 25    // gracefulShutdown races runCleanupFunctions() against a 2s cap — a 10s
 26    // axios timeout on a slow/stalled archive burns the whole budget on a
 27    // request that forceExit will kill anyway.
 28    teardown_archive_timeout_ms: number
 29    // Deadline for onConnect after transport.connect(). If neither onConnect
 30    // nor onClose fires before this, emit tengu_bridge_repl_connect_timeout
 31    // — the only telemetry for the ~1% of sessions that emit `started` then
 32    // go silent (no error, no event, just nothing).
 33    connect_timeout_ms: number
 34    // Semver floor for the env-less bridge path. Separate from the v1
 35    // tengu_bridge_min_version config so a v2-specific bug can force upgrades
 36    // without blocking v1 (env-based) clients, and vice versa.
 37    min_version: string
 38    // When true, tell users their claude.ai app may be too old to see v2
 39    // sessions — lets us roll the v2 bridge before the app ships the new
 40    // session-list query.
 41    should_show_app_upgrade_message: boolean
 42  }
 43  
 44  export const DEFAULT_ENV_LESS_BRIDGE_CONFIG: EnvLessBridgeConfig = {
 45    init_retry_max_attempts: 3,
 46    init_retry_base_delay_ms: 500,
 47    init_retry_jitter_fraction: 0.25,
 48    init_retry_max_delay_ms: 4000,
 49    http_timeout_ms: 10_000,
 50    uuid_dedup_buffer_size: 2000,
 51    heartbeat_interval_ms: 20_000,
 52    heartbeat_jitter_fraction: 0.1,
 53    token_refresh_buffer_ms: 300_000,
 54    teardown_archive_timeout_ms: 1500,
 55    connect_timeout_ms: 15_000,
 56    min_version: '0.0.0',
 57    should_show_app_upgrade_message: false,
 58  }
 59  
 60  // Floors reject the whole object on violation (fall back to DEFAULT) rather
 61  // than partially trusting — same defense-in-depth as pollConfig.ts.
 62  const envLessBridgeConfigSchema = lazySchema(() =>
 63    z.object({
 64      init_retry_max_attempts: z.number().int().min(1).max(10).default(3),
 65      init_retry_base_delay_ms: z.number().int().min(100).default(500),
 66      init_retry_jitter_fraction: z.number().min(0).max(1).default(0.25),
 67      init_retry_max_delay_ms: z.number().int().min(500).default(4000),
 68      http_timeout_ms: z.number().int().min(2000).default(10_000),
 69      uuid_dedup_buffer_size: z.number().int().min(100).max(50_000).default(2000),
 70      // Server TTL is 60s. Floor 5s prevents thrash; cap 30s keeps ≥2× margin.
 71      heartbeat_interval_ms: z
 72        .number()
 73        .int()
 74        .min(5000)
 75        .max(30_000)
 76        .default(20_000),
 77      // ±fraction per beat. Cap 0.5: at max interval (30s) × 1.5 = 45s worst case,
 78      // still under the 60s TTL.
 79      heartbeat_jitter_fraction: z.number().min(0).max(0.5).default(0.1),
 80      // Floor 30s prevents tight-looping. Cap 30min rejects buffer-vs-delay
 81      // semantic inversion: ops entering expires_in-5min (the *delay until
 82      // refresh*) instead of 5min (the *buffer before expiry*) yields
 83      // delayMs = expires_in - buffer ≈ 5min instead of ≈4h. Both are positive
 84      // durations so .min() alone can't distinguish; .max() catches the
 85      // inverted value since buffer ≥ 30min is nonsensical for a multi-hour JWT.
 86      token_refresh_buffer_ms: z
 87        .number()
 88        .int()
 89        .min(30_000)
 90        .max(1_800_000)
 91        .default(300_000),
 92      // Cap 2000 keeps this under gracefulShutdown's 2s cleanup race — a higher
 93      // timeout just lies to axios since forceExit kills the socket regardless.
 94      teardown_archive_timeout_ms: z
 95        .number()
 96        .int()
 97        .min(500)
 98        .max(2000)
 99        .default(1500),
100      // Observed p99 connect is ~2-3s; 15s is ~5× headroom. Floor 5s bounds
101      // false-positive rate under transient slowness; cap 60s bounds how long
102      // a truly-stalled session stays dark.
103      connect_timeout_ms: z.number().int().min(5_000).max(60_000).default(15_000),
104      min_version: z
105        .string()
106        .refine(v => {
107          try {
108            lt(v, '0.0.0')
109            return true
110          } catch {
111            return false
112          }
113        })
114        .default('0.0.0'),
115      should_show_app_upgrade_message: z.boolean().default(false),
116    }),
117  )
118  
119  /**
120   * Fetch the env-less bridge timing config from GrowthBook. Read once per
121   * initEnvLessBridgeCore call — config is fixed for the lifetime of a bridge
122   * session.
123   *
124   * Uses the blocking getter (not _CACHED_MAY_BE_STALE) because /remote-control
125   * runs well after GrowthBook init — initializeGrowthBook() resolves instantly,
126   * so there's no startup penalty, and we get the fresh in-memory remoteEval
127   * value instead of the stale-on-first-read disk cache. The _DEPRECATED suffix
128   * warns against startup-path usage, which this isn't.
129   */
130  export async function getEnvLessBridgeConfig(): Promise<EnvLessBridgeConfig> {
131    const raw = await getFeatureValue_DEPRECATED<unknown>(
132      'tengu_bridge_repl_v2_config',
133      DEFAULT_ENV_LESS_BRIDGE_CONFIG,
134    )
135    const parsed = envLessBridgeConfigSchema().safeParse(raw)
136    return parsed.success ? parsed.data : DEFAULT_ENV_LESS_BRIDGE_CONFIG
137  }
138  
139  /**
140   * Returns an error message if the current CLI version is below the minimum
141   * required for the env-less (v2) bridge path, or null if the version is fine.
142   *
143   * v2 analogue of checkBridgeMinVersion() — reads from tengu_bridge_repl_v2_config
144   * instead of tengu_bridge_min_version so the two implementations can enforce
145   * independent floors.
146   */
147  export async function checkEnvLessBridgeMinVersion(): Promise<string | null> {
148    const cfg = await getEnvLessBridgeConfig()
149    if (cfg.min_version && lt(MACRO.VERSION, cfg.min_version)) {
150      return `Your version of Claude Code (${MACRO.VERSION}) is too old for Remote Control.\nVersion ${cfg.min_version} or higher is required. Run \`claude update\` to update.`
151    }
152    return null
153  }
154  
155  /**
156   * Whether to nudge users toward upgrading their claude.ai app when a
157   * Remote Control session starts. True only when the v2 bridge is active
158   * AND the should_show_app_upgrade_message config bit is set — lets us
159   * roll the v2 bridge before the app ships the new session-list query.
160   */
161  export async function shouldShowAppUpgradeMessage(): Promise<boolean> {
162    if (!isEnvLessBridgeEnabled()) return false
163    const cfg = await getEnvLessBridgeConfig()
164    return cfg.should_show_app_upgrade_message
165  }