/ bridge / bridgeMain.ts
bridgeMain.ts
   1  import { feature } from 'bun:bundle'
   2  import { randomUUID } from 'crypto'
   3  import { hostname, tmpdir } from 'os'
   4  import { basename, join, resolve } from 'path'
   5  import { getRemoteSessionUrl } from '../constants/product.js'
   6  import { shutdownDatadog } from '../services/analytics/datadog.js'
   7  import { shutdown1PEventLogging } from '../services/analytics/firstPartyEventLogger.js'
   8  import { checkGate_CACHED_OR_BLOCKING } from '../services/analytics/growthbook.js'
   9  import {
  10    type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  11    logEvent,
  12    logEventAsync,
  13  } from '../services/analytics/index.js'
  14  import { isInBundledMode } from '../utils/bundledMode.js'
  15  import { logForDebugging } from '../utils/debug.js'
  16  import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
  17  import { isEnvTruthy, isInProtectedNamespace } from '../utils/envUtils.js'
  18  import { errorMessage } from '../utils/errors.js'
  19  import { truncateToWidth } from '../utils/format.js'
  20  import { logError } from '../utils/log.js'
  21  import { sleep } from '../utils/sleep.js'
  22  import { createAgentWorktree, removeAgentWorktree } from '../utils/worktree.js'
  23  import {
  24    BridgeFatalError,
  25    createBridgeApiClient,
  26    isExpiredErrorType,
  27    isSuppressible403,
  28    validateBridgeId,
  29  } from './bridgeApi.js'
  30  import { formatDuration } from './bridgeStatusUtil.js'
  31  import { createBridgeLogger } from './bridgeUI.js'
  32  import { createCapacityWake } from './capacityWake.js'
  33  import { describeAxiosError } from './debugUtils.js'
  34  import { createTokenRefreshScheduler } from './jwtUtils.js'
  35  import { getPollIntervalConfig } from './pollConfig.js'
  36  import { toCompatSessionId, toInfraSessionId } from './sessionIdCompat.js'
  37  import { createSessionSpawner, safeFilenameId } from './sessionRunner.js'
  38  import { getTrustedDeviceToken } from './trustedDevice.js'
  39  import {
  40    BRIDGE_LOGIN_ERROR,
  41    type BridgeApiClient,
  42    type BridgeConfig,
  43    type BridgeLogger,
  44    DEFAULT_SESSION_TIMEOUT_MS,
  45    type SessionDoneStatus,
  46    type SessionHandle,
  47    type SessionSpawner,
  48    type SessionSpawnOpts,
  49    type SpawnMode,
  50  } from './types.js'
  51  import {
  52    buildCCRv2SdkUrl,
  53    buildSdkUrl,
  54    decodeWorkSecret,
  55    registerWorker,
  56    sameSessionId,
  57  } from './workSecret.js'
  58  
  59  export type BackoffConfig = {
  60    connInitialMs: number
  61    connCapMs: number
  62    connGiveUpMs: number
  63    generalInitialMs: number
  64    generalCapMs: number
  65    generalGiveUpMs: number
  66    /** SIGTERM→SIGKILL grace period on shutdown. Default 30s. */
  67    shutdownGraceMs?: number
  68    /** stopWorkWithRetry base delay (1s/2s/4s backoff). Default 1000ms. */
  69    stopWorkBaseDelayMs?: number
  70  }
  71  
  72  const DEFAULT_BACKOFF: BackoffConfig = {
  73    connInitialMs: 2_000,
  74    connCapMs: 120_000, // 2 minutes
  75    connGiveUpMs: 600_000, // 10 minutes
  76    generalInitialMs: 500,
  77    generalCapMs: 30_000,
  78    generalGiveUpMs: 600_000, // 10 minutes
  79  }
  80  
  81  /** Status update interval for the live display (ms). */
  82  const STATUS_UPDATE_INTERVAL_MS = 1_000
  83  const SPAWN_SESSIONS_DEFAULT = 32
  84  
  85  /**
  86   * GrowthBook gate for multi-session spawn modes (--spawn / --capacity / --create-session-in-dir).
  87   * Sibling of tengu_ccr_bridge_multi_environment (multiple envs per host:dir) —
  88   * this one enables multiple sessions per environment.
  89   * Rollout staged via targeting rules: ants first, then gradual external.
  90   *
  91   * Uses the blocking gate check so a stale disk-cache miss doesn't unfairly
  92   * deny access. The fast path (cache has true) is still instant; only the
  93   * cold-start path awaits the server fetch, and that fetch also seeds the
  94   * disk cache for next time.
  95   */
  96  async function isMultiSessionSpawnEnabled(): Promise<boolean> {
  97    return checkGate_CACHED_OR_BLOCKING('tengu_ccr_bridge_multi_session')
  98  }
  99  
 100  /**
 101   * Returns the threshold for detecting system sleep/wake in the poll loop.
 102   * Must exceed the max backoff cap — otherwise normal backoff delays trigger
 103   * false sleep detection (resetting the error budget indefinitely). Using
 104   * 2× the connection backoff cap, matching the pattern in WebSocketTransport
 105   * and replBridge.
 106   */
 107  function pollSleepDetectionThresholdMs(backoff: BackoffConfig): number {
 108    return backoff.connCapMs * 2
 109  }
 110  
 111  /**
 112   * Returns the args that must precede CLI flags when spawning a child claude
 113   * process. In compiled binaries, process.execPath is the claude binary itself
 114   * and args go directly to it. In npm installs (node running cli.js),
 115   * process.execPath is the node runtime — the child spawn must pass the script
 116   * path as the first arg, otherwise node interprets --sdk-url as a node option
 117   * and exits with "bad option: --sdk-url". See anthropics/claude-code#28334.
 118   */
 119  function spawnScriptArgs(): string[] {
 120    if (isInBundledMode() || !process.argv[1]) {
 121      return []
 122    }
 123    return [process.argv[1]]
 124  }
 125  
 126  /** Attempt to spawn a session; returns error string if spawn throws. */
 127  function safeSpawn(
 128    spawner: SessionSpawner,
 129    opts: SessionSpawnOpts,
 130    dir: string,
 131  ): SessionHandle | string {
 132    try {
 133      return spawner.spawn(opts, dir)
 134    } catch (err) {
 135      const errMsg = errorMessage(err)
 136      logError(new Error(`Session spawn failed: ${errMsg}`))
 137      return errMsg
 138    }
 139  }
 140  
 141  export async function runBridgeLoop(
 142    config: BridgeConfig,
 143    environmentId: string,
 144    environmentSecret: string,
 145    api: BridgeApiClient,
 146    spawner: SessionSpawner,
 147    logger: BridgeLogger,
 148    signal: AbortSignal,
 149    backoffConfig: BackoffConfig = DEFAULT_BACKOFF,
 150    initialSessionId?: string,
 151    getAccessToken?: () => string | undefined | Promise<string | undefined>,
 152  ): Promise<void> {
 153    // Local abort controller so that onSessionDone can stop the poll loop.
 154    // Linked to the incoming signal so external aborts also work.
 155    const controller = new AbortController()
 156    if (signal.aborted) {
 157      controller.abort()
 158    } else {
 159      signal.addEventListener('abort', () => controller.abort(), { once: true })
 160    }
 161    const loopSignal = controller.signal
 162  
 163    const activeSessions = new Map<string, SessionHandle>()
 164    const sessionStartTimes = new Map<string, number>()
 165    const sessionWorkIds = new Map<string, string>()
 166    // Compat-surface ID (session_*) computed once at spawn and cached so
 167    // cleanup and status-update ticks use the same key regardless of whether
 168    // the tengu_bridge_repl_v2_cse_shim_enabled gate flips mid-session.
 169    const sessionCompatIds = new Map<string, string>()
 170    // Session ingress JWTs for heartbeat auth, keyed by sessionId.
 171    // Stored separately from handle.accessToken because the token refresh
 172    // scheduler overwrites that field with the OAuth token (~3h55m in).
 173    const sessionIngressTokens = new Map<string, string>()
 174    const sessionTimers = new Map<string, ReturnType<typeof setTimeout>>()
 175    const completedWorkIds = new Set<string>()
 176    const sessionWorktrees = new Map<
 177      string,
 178      {
 179        worktreePath: string
 180        worktreeBranch?: string
 181        gitRoot?: string
 182        hookBased?: boolean
 183      }
 184    >()
 185    // Track sessions killed by the timeout watchdog so onSessionDone can
 186    // distinguish them from server-initiated or shutdown interrupts.
 187    const timedOutSessions = new Set<string>()
 188    // Sessions that already have a title (server-set or bridge-derived) so
 189    // onFirstUserMessage doesn't clobber a user-assigned --name / web rename.
 190    // Keyed by compatSessionId to match logger.setSessionTitle's key.
 191    const titledSessions = new Set<string>()
 192    // Signal to wake the at-capacity sleep early when a session completes,
 193    // so the bridge can immediately accept new work.
 194    const capacityWake = createCapacityWake(loopSignal)
 195  
 196    /**
 197     * Heartbeat all active work items.
 198     * Returns 'ok' if at least one heartbeat succeeded, 'auth_failed' if any
 199     * got a 401/403 (JWT expired — re-queued via reconnectSession so the next
 200     * poll delivers fresh work), or 'failed' if all failed for other reasons.
 201     */
 202    async function heartbeatActiveWorkItems(): Promise<
 203      'ok' | 'auth_failed' | 'fatal' | 'failed'
 204    > {
 205      let anySuccess = false
 206      let anyFatal = false
 207      const authFailedSessions: string[] = []
 208      for (const [sessionId] of activeSessions) {
 209        const workId = sessionWorkIds.get(sessionId)
 210        const ingressToken = sessionIngressTokens.get(sessionId)
 211        if (!workId || !ingressToken) {
 212          continue
 213        }
 214        try {
 215          await api.heartbeatWork(environmentId, workId, ingressToken)
 216          anySuccess = true
 217        } catch (err) {
 218          logForDebugging(
 219            `[bridge:heartbeat] Failed for sessionId=${sessionId} workId=${workId}: ${errorMessage(err)}`,
 220          )
 221          if (err instanceof BridgeFatalError) {
 222            logEvent('tengu_bridge_heartbeat_error', {
 223              status:
 224                err.status as unknown as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 225              error_type: (err.status === 401 || err.status === 403
 226                ? 'auth_failed'
 227                : 'fatal') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 228            })
 229            if (err.status === 401 || err.status === 403) {
 230              authFailedSessions.push(sessionId)
 231            } else {
 232              // 404/410 = environment expired or deleted — no point retrying
 233              anyFatal = true
 234            }
 235          }
 236        }
 237      }
 238      // JWT expired → trigger server-side re-dispatch. Without this, work stays
 239      // ACK'd out of the Redis PEL and poll returns empty forever (CC-1263).
 240      // The existingHandle path below delivers the fresh token to the child.
 241      // sessionId is already in the format /bridge/reconnect expects: it comes
 242      // from work.data.id, which matches the server's EnvironmentInstance store
 243      // (cse_* under the compat gate, session_* otherwise).
 244      for (const sessionId of authFailedSessions) {
 245        logger.logVerbose(
 246          `Session ${sessionId} token expired — re-queuing via bridge/reconnect`,
 247        )
 248        try {
 249          await api.reconnectSession(environmentId, sessionId)
 250          logForDebugging(
 251            `[bridge:heartbeat] Re-queued sessionId=${sessionId} via bridge/reconnect`,
 252          )
 253        } catch (err) {
 254          logger.logError(
 255            `Failed to refresh session ${sessionId} token: ${errorMessage(err)}`,
 256          )
 257          logForDebugging(
 258            `[bridge:heartbeat] reconnectSession(${sessionId}) failed: ${errorMessage(err)}`,
 259            { level: 'error' },
 260          )
 261        }
 262      }
 263      if (anyFatal) {
 264        return 'fatal'
 265      }
 266      if (authFailedSessions.length > 0) {
 267        return 'auth_failed'
 268      }
 269      return anySuccess ? 'ok' : 'failed'
 270    }
 271  
 272    // Sessions spawned with CCR v2 env vars. v2 children cannot use OAuth
 273    // tokens (CCR worker endpoints validate the JWT's session_id claim,
 274    // register_worker.go:32), so onRefresh triggers server re-dispatch
 275    // instead — the next poll delivers fresh work with a new JWT via the
 276    // existingHandle path below.
 277    const v2Sessions = new Set<string>()
 278  
 279    // Proactive token refresh: schedules a timer 5min before the session
 280    // ingress JWT expires. v1 delivers OAuth directly; v2 calls
 281    // reconnectSession to trigger server re-dispatch (CC-1263: without
 282    // this, v2 daemon sessions silently die at ~5h since the server does
 283    // not auto-re-dispatch ACK'd work on lease expiry).
 284    const tokenRefresh = getAccessToken
 285      ? createTokenRefreshScheduler({
 286          getAccessToken,
 287          onRefresh: (sessionId, oauthToken) => {
 288            const handle = activeSessions.get(sessionId)
 289            if (!handle) {
 290              return
 291            }
 292            if (v2Sessions.has(sessionId)) {
 293              logger.logVerbose(
 294                `Refreshing session ${sessionId} token via bridge/reconnect`,
 295              )
 296              void api
 297                .reconnectSession(environmentId, sessionId)
 298                .catch((err: unknown) => {
 299                  logger.logError(
 300                    `Failed to refresh session ${sessionId} token: ${errorMessage(err)}`,
 301                  )
 302                  logForDebugging(
 303                    `[bridge:token] reconnectSession(${sessionId}) failed: ${errorMessage(err)}`,
 304                    { level: 'error' },
 305                  )
 306                })
 307            } else {
 308              handle.updateAccessToken(oauthToken)
 309            }
 310          },
 311          label: 'bridge',
 312        })
 313      : null
 314    const loopStartTime = Date.now()
 315    // Track all in-flight cleanup promises (stopWork, worktree removal) so
 316    // the shutdown sequence can await them before process.exit().
 317    const pendingCleanups = new Set<Promise<unknown>>()
 318    function trackCleanup(p: Promise<unknown>): void {
 319      pendingCleanups.add(p)
 320      void p.finally(() => pendingCleanups.delete(p))
 321    }
 322    let connBackoff = 0
 323    let generalBackoff = 0
 324    let connErrorStart: number | null = null
 325    let generalErrorStart: number | null = null
 326    let lastPollErrorTime: number | null = null
 327    let statusUpdateTimer: ReturnType<typeof setInterval> | null = null
 328    // Set by BridgeFatalError and give-up paths so the shutdown block can
 329    // skip the resume message (resume is impossible after env expiry/auth
 330    // failure/sustained connection errors).
 331    let fatalExit = false
 332  
 333    logForDebugging(
 334      `[bridge:work] Starting poll loop spawnMode=${config.spawnMode} maxSessions=${config.maxSessions} environmentId=${environmentId}`,
 335    )
 336    logForDiagnosticsNoPII('info', 'bridge_loop_started', {
 337      max_sessions: config.maxSessions,
 338      spawn_mode: config.spawnMode,
 339    })
 340  
 341    // For ant users, show where session debug logs will land so they can tail them.
 342    // sessionRunner.ts uses the same base path. File appears once a session spawns.
 343    if (process.env.USER_TYPE === 'ant') {
 344      let debugGlob: string
 345      if (config.debugFile) {
 346        const ext = config.debugFile.lastIndexOf('.')
 347        debugGlob =
 348          ext > 0
 349            ? `${config.debugFile.slice(0, ext)}-*${config.debugFile.slice(ext)}`
 350            : `${config.debugFile}-*`
 351      } else {
 352        debugGlob = join(tmpdir(), 'claude', 'bridge-session-*.log')
 353      }
 354      logger.setDebugLogPath(debugGlob)
 355    }
 356  
 357    logger.printBanner(config, environmentId)
 358  
 359    // Seed the logger's session count + spawn mode before any render. Without
 360    // this, setAttached() below renders with the logger's default sessionMax=1,
 361    // showing "Capacity: 0/1" until the status ticker kicks in (which is gated
 362    // by !initialSessionId and only starts after the poll loop picks up work).
 363    logger.updateSessionCount(0, config.maxSessions, config.spawnMode)
 364  
 365    // If an initial session was pre-created, show its URL from the start so
 366    // the user can click through immediately (matching /remote-control behavior).
 367    if (initialSessionId) {
 368      logger.setAttached(initialSessionId)
 369    }
 370  
 371    /** Refresh the inline status display. Shows idle or active depending on state. */
 372    function updateStatusDisplay(): void {
 373      // Push the session count (no-op when maxSessions === 1) so the
 374      // next renderStatusLine tick shows the current count.
 375      logger.updateSessionCount(
 376        activeSessions.size,
 377        config.maxSessions,
 378        config.spawnMode,
 379      )
 380  
 381      // Push per-session activity into the multi-session display.
 382      for (const [sid, handle] of activeSessions) {
 383        const act = handle.currentActivity
 384        if (act) {
 385          logger.updateSessionActivity(sessionCompatIds.get(sid) ?? sid, act)
 386        }
 387      }
 388  
 389      if (activeSessions.size === 0) {
 390        logger.updateIdleStatus()
 391        return
 392      }
 393  
 394      // Show the most recently started session that is still actively working.
 395      // Sessions whose current activity is 'result' or 'error' are between
 396      // turns — the CLI emitted its result but the process stays alive waiting
 397      // for the next user message.  Skip updating so the status line keeps
 398      // whatever state it had (Attached / session title).
 399      const [sessionId, handle] = [...activeSessions.entries()].pop()!
 400      const startTime = sessionStartTimes.get(sessionId)
 401      if (!startTime) return
 402  
 403      const activity = handle.currentActivity
 404      if (!activity || activity.type === 'result' || activity.type === 'error') {
 405        // Session is between turns — keep current status (Attached/titled).
 406        // In multi-session mode, still refresh so bullet-list activities stay current.
 407        if (config.maxSessions > 1) logger.refreshDisplay()
 408        return
 409      }
 410  
 411      const elapsed = formatDuration(Date.now() - startTime)
 412  
 413      // Build trail from recent tool activities (last 5)
 414      const trail = handle.activities
 415        .filter(a => a.type === 'tool_start')
 416        .slice(-5)
 417        .map(a => a.summary)
 418  
 419      logger.updateSessionStatus(sessionId, elapsed, activity, trail)
 420    }
 421  
 422    /** Start the status display update ticker. */
 423    function startStatusUpdates(): void {
 424      stopStatusUpdates()
 425      // Call immediately so the first transition (e.g. Connecting → Ready)
 426      // happens without delay, avoiding concurrent timer races.
 427      updateStatusDisplay()
 428      statusUpdateTimer = setInterval(
 429        updateStatusDisplay,
 430        STATUS_UPDATE_INTERVAL_MS,
 431      )
 432    }
 433  
 434    /** Stop the status display update ticker. */
 435    function stopStatusUpdates(): void {
 436      if (statusUpdateTimer) {
 437        clearInterval(statusUpdateTimer)
 438        statusUpdateTimer = null
 439      }
 440    }
 441  
 442    function onSessionDone(
 443      sessionId: string,
 444      startTime: number,
 445      handle: SessionHandle,
 446    ): (status: SessionDoneStatus) => void {
 447      return (rawStatus: SessionDoneStatus): void => {
 448        const workId = sessionWorkIds.get(sessionId)
 449        activeSessions.delete(sessionId)
 450        sessionStartTimes.delete(sessionId)
 451        sessionWorkIds.delete(sessionId)
 452        sessionIngressTokens.delete(sessionId)
 453        const compatId = sessionCompatIds.get(sessionId) ?? sessionId
 454        sessionCompatIds.delete(sessionId)
 455        logger.removeSession(compatId)
 456        titledSessions.delete(compatId)
 457        v2Sessions.delete(sessionId)
 458        // Clear per-session timeout timer
 459        const timer = sessionTimers.get(sessionId)
 460        if (timer) {
 461          clearTimeout(timer)
 462          sessionTimers.delete(sessionId)
 463        }
 464        // Clear token refresh timer
 465        tokenRefresh?.cancel(sessionId)
 466        // Wake the at-capacity sleep so the bridge can accept new work immediately
 467        capacityWake.wake()
 468  
 469        // If the session was killed by the timeout watchdog, treat it as a
 470        // failed session (not a server/shutdown interrupt) so we still call
 471        // stopWork and archiveSession below.
 472        const wasTimedOut = timedOutSessions.delete(sessionId)
 473        const status: SessionDoneStatus =
 474          wasTimedOut && rawStatus === 'interrupted' ? 'failed' : rawStatus
 475        const durationMs = Date.now() - startTime
 476  
 477        logForDebugging(
 478          `[bridge:session] sessionId=${sessionId} workId=${workId ?? 'unknown'} exited status=${status} duration=${formatDuration(durationMs)}`,
 479        )
 480        logEvent('tengu_bridge_session_done', {
 481          status:
 482            status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 483          duration_ms: durationMs,
 484        })
 485        logForDiagnosticsNoPII('info', 'bridge_session_done', {
 486          status,
 487          duration_ms: durationMs,
 488        })
 489  
 490        // Clear the status display before printing final log
 491        logger.clearStatus()
 492        stopStatusUpdates()
 493  
 494        // Build error message from stderr if available
 495        const stderrSummary =
 496          handle.lastStderr.length > 0 ? handle.lastStderr.join('\n') : undefined
 497        let failureMessage: string | undefined
 498  
 499        switch (status) {
 500          case 'completed':
 501            logger.logSessionComplete(sessionId, durationMs)
 502            break
 503          case 'failed':
 504            // Skip failure log during shutdown — the child exits non-zero when
 505            // killed, which is expected and not a real failure.
 506            // Also skip for timeout-killed sessions — the timeout watchdog
 507            // already logged a clear timeout message.
 508            if (!wasTimedOut && !loopSignal.aborted) {
 509              failureMessage = stderrSummary ?? 'Process exited with error'
 510              logger.logSessionFailed(sessionId, failureMessage)
 511              logError(new Error(`Bridge session failed: ${failureMessage}`))
 512            }
 513            break
 514          case 'interrupted':
 515            logger.logVerbose(`Session ${sessionId} interrupted`)
 516            break
 517        }
 518  
 519        // Notify the server that this work item is done. Skip for interrupted
 520        // sessions — interrupts are either server-initiated (the server already
 521        // knows) or caused by bridge shutdown (which calls stopWork() separately).
 522        if (status !== 'interrupted' && workId) {
 523          trackCleanup(
 524            stopWorkWithRetry(
 525              api,
 526              environmentId,
 527              workId,
 528              logger,
 529              backoffConfig.stopWorkBaseDelayMs,
 530            ),
 531          )
 532          completedWorkIds.add(workId)
 533        }
 534  
 535        // Clean up worktree if one was created for this session
 536        const wt = sessionWorktrees.get(sessionId)
 537        if (wt) {
 538          sessionWorktrees.delete(sessionId)
 539          trackCleanup(
 540            removeAgentWorktree(
 541              wt.worktreePath,
 542              wt.worktreeBranch,
 543              wt.gitRoot,
 544              wt.hookBased,
 545            ).catch((err: unknown) =>
 546              logger.logVerbose(
 547                `Failed to remove worktree ${wt.worktreePath}: ${errorMessage(err)}`,
 548              ),
 549            ),
 550          )
 551        }
 552  
 553        // Lifecycle decision: in multi-session mode, keep the bridge running
 554        // after a session completes. In single-session mode, abort the poll
 555        // loop so the bridge exits cleanly.
 556        if (status !== 'interrupted' && !loopSignal.aborted) {
 557          if (config.spawnMode !== 'single-session') {
 558            // Multi-session: archive the completed session so it doesn't linger
 559            // as stale in the web UI. archiveSession is idempotent (409 if already
 560            // archived), so double-archiving at shutdown is safe.
 561            // sessionId arrived as cse_* from the work poll (infrastructure-layer
 562            // tag). archiveSession hits /v1/sessions/{id}/archive which is the
 563            // compat surface and validates TagSession (session_*). Re-tag — same
 564            // UUID underneath.
 565            trackCleanup(
 566              api
 567                .archiveSession(compatId)
 568                .catch((err: unknown) =>
 569                  logger.logVerbose(
 570                    `Failed to archive session ${sessionId}: ${errorMessage(err)}`,
 571                  ),
 572                ),
 573            )
 574            logForDebugging(
 575              `[bridge:session] Session ${status}, returning to idle (multi-session mode)`,
 576            )
 577          } else {
 578            // Single-session: coupled lifecycle — tear down environment
 579            logForDebugging(
 580              `[bridge:session] Session ${status}, aborting poll loop to tear down environment`,
 581            )
 582            controller.abort()
 583            return
 584          }
 585        }
 586  
 587        if (!loopSignal.aborted) {
 588          startStatusUpdates()
 589        }
 590      }
 591    }
 592  
 593    // Start the idle status display immediately — unless we have a pre-created
 594    // session, in which case setAttached() already set up the display and the
 595    // poll loop will start status updates when it picks up the session.
 596    if (!initialSessionId) {
 597      startStatusUpdates()
 598    }
 599  
 600    while (!loopSignal.aborted) {
 601      // Fetched once per iteration — the GrowthBook cache refreshes every
 602      // 5 min, so a loop running at the at-capacity rate picks up config
 603      // changes within one sleep cycle.
 604      const pollConfig = getPollIntervalConfig()
 605  
 606      try {
 607        const work = await api.pollForWork(
 608          environmentId,
 609          environmentSecret,
 610          loopSignal,
 611          pollConfig.reclaim_older_than_ms,
 612        )
 613  
 614        // Log reconnection if we were previously disconnected
 615        const wasDisconnected =
 616          connErrorStart !== null || generalErrorStart !== null
 617        if (wasDisconnected) {
 618          const disconnectedMs =
 619            Date.now() - (connErrorStart ?? generalErrorStart ?? Date.now())
 620          logger.logReconnected(disconnectedMs)
 621          logForDebugging(
 622            `[bridge:poll] Reconnected after ${formatDuration(disconnectedMs)}`,
 623          )
 624          logEvent('tengu_bridge_reconnected', {
 625            disconnected_ms: disconnectedMs,
 626          })
 627        }
 628  
 629        connBackoff = 0
 630        generalBackoff = 0
 631        connErrorStart = null
 632        generalErrorStart = null
 633        lastPollErrorTime = null
 634  
 635        // Null response = no work available in the queue.
 636        // Add a minimum delay to avoid hammering the server.
 637        if (!work) {
 638          // Use live check (not a snapshot) since sessions can end during poll.
 639          const atCap = activeSessions.size >= config.maxSessions
 640          if (atCap) {
 641            const atCapMs = pollConfig.multisession_poll_interval_ms_at_capacity
 642            // Heartbeat loops WITHOUT polling. When at-capacity polling is also
 643            // enabled (atCapMs > 0), the loop tracks a deadline and breaks out
 644            // to poll at that interval — heartbeat and poll compose instead of
 645            // one suppressing the other. We break out to poll when:
 646            //   - Poll deadline reached (atCapMs > 0 only)
 647            //   - Auth fails (JWT expired → poll refreshes tokens)
 648            //   - Capacity wake fires (session ended → poll for new work)
 649            //   - Loop aborted (shutdown)
 650            if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
 651              logEvent('tengu_bridge_heartbeat_mode_entered', {
 652                active_sessions: activeSessions.size,
 653                heartbeat_interval_ms:
 654                  pollConfig.non_exclusive_heartbeat_interval_ms,
 655              })
 656              // Deadline computed once at entry — GB updates to atCapMs don't
 657              // shift an in-flight deadline (next entry picks up the new value).
 658              const pollDeadline = atCapMs > 0 ? Date.now() + atCapMs : null
 659              let hbResult: 'ok' | 'auth_failed' | 'fatal' | 'failed' = 'ok'
 660              let hbCycles = 0
 661              while (
 662                !loopSignal.aborted &&
 663                activeSessions.size >= config.maxSessions &&
 664                (pollDeadline === null || Date.now() < pollDeadline)
 665              ) {
 666                // Re-read config each cycle so GrowthBook updates take effect
 667                const hbConfig = getPollIntervalConfig()
 668                if (hbConfig.non_exclusive_heartbeat_interval_ms <= 0) break
 669  
 670                // Capture capacity signal BEFORE the async heartbeat call so
 671                // a session ending during the HTTP request is caught by the
 672                // subsequent sleep (instead of being lost to a replaced controller).
 673                const cap = capacityWake.signal()
 674  
 675                hbResult = await heartbeatActiveWorkItems()
 676                if (hbResult === 'auth_failed' || hbResult === 'fatal') {
 677                  cap.cleanup()
 678                  break
 679                }
 680  
 681                hbCycles++
 682                await sleep(
 683                  hbConfig.non_exclusive_heartbeat_interval_ms,
 684                  cap.signal,
 685                )
 686                cap.cleanup()
 687              }
 688  
 689              // Determine exit reason for telemetry
 690              const exitReason =
 691                hbResult === 'auth_failed' || hbResult === 'fatal'
 692                  ? hbResult
 693                  : loopSignal.aborted
 694                    ? 'shutdown'
 695                    : activeSessions.size < config.maxSessions
 696                      ? 'capacity_changed'
 697                      : pollDeadline !== null && Date.now() >= pollDeadline
 698                        ? 'poll_due'
 699                        : 'config_disabled'
 700              logEvent('tengu_bridge_heartbeat_mode_exited', {
 701                reason:
 702                  exitReason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 703                heartbeat_cycles: hbCycles,
 704                active_sessions: activeSessions.size,
 705              })
 706              if (exitReason === 'poll_due') {
 707                // bridgeApi throttles empty-poll logs (EMPTY_POLL_LOG_INTERVAL=100)
 708                // so the once-per-10min poll_due poll is invisible at counter=2.
 709                // Log it here so verification runs see both endpoints in the debug log.
 710                logForDebugging(
 711                  `[bridge:poll] Heartbeat poll_due after ${hbCycles} cycles — falling through to pollForWork`,
 712                )
 713              }
 714  
 715              // On auth_failed or fatal, sleep before polling to avoid a tight
 716              // poll+heartbeat loop. Auth_failed: heartbeatActiveWorkItems
 717              // already called reconnectSession — the sleep gives the server
 718              // time to propagate the re-queue. Fatal (404/410): may be a
 719              // single work item GCd while the environment is still valid.
 720              // Use atCapMs if enabled, else the heartbeat interval as a floor
 721              // (guaranteed > 0 here) so heartbeat-only configs don't tight-loop.
 722              if (hbResult === 'auth_failed' || hbResult === 'fatal') {
 723                const cap = capacityWake.signal()
 724                await sleep(
 725                  atCapMs > 0
 726                    ? atCapMs
 727                    : pollConfig.non_exclusive_heartbeat_interval_ms,
 728                  cap.signal,
 729                )
 730                cap.cleanup()
 731              }
 732            } else if (atCapMs > 0) {
 733              // Heartbeat disabled: slow poll as liveness signal.
 734              const cap = capacityWake.signal()
 735              await sleep(atCapMs, cap.signal)
 736              cap.cleanup()
 737            }
 738          } else {
 739            const interval =
 740              activeSessions.size > 0
 741                ? pollConfig.multisession_poll_interval_ms_partial_capacity
 742                : pollConfig.multisession_poll_interval_ms_not_at_capacity
 743            await sleep(interval, loopSignal)
 744          }
 745          continue
 746        }
 747  
 748        // At capacity — we polled to keep the heartbeat alive, but cannot
 749        // accept new work right now. We still enter the switch below so that
 750        // token refreshes for existing sessions are processed (the case
 751        // 'session' handler checks for existing sessions before the inner
 752        // capacity guard).
 753        const atCapacityBeforeSwitch = activeSessions.size >= config.maxSessions
 754  
 755        // Skip work items that have already been completed and stopped.
 756        // The server may re-deliver stale work before processing our stop
 757        // request, which would otherwise cause a duplicate session spawn.
 758        if (completedWorkIds.has(work.id)) {
 759          logForDebugging(
 760            `[bridge:work] Skipping already-completed workId=${work.id}`,
 761          )
 762          // Respect capacity throttle — without a sleep here, persistent stale
 763          // redeliveries would tight-loop at poll-request speed (the !work
 764          // branch above is the only sleep, and work != null skips it).
 765          if (atCapacityBeforeSwitch) {
 766            const cap = capacityWake.signal()
 767            if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
 768              await heartbeatActiveWorkItems()
 769              await sleep(
 770                pollConfig.non_exclusive_heartbeat_interval_ms,
 771                cap.signal,
 772              )
 773            } else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
 774              await sleep(
 775                pollConfig.multisession_poll_interval_ms_at_capacity,
 776                cap.signal,
 777              )
 778            }
 779            cap.cleanup()
 780          } else {
 781            await sleep(1000, loopSignal)
 782          }
 783          continue
 784        }
 785  
 786        // Decode the work secret for session spawning and to extract the JWT
 787        // used for the ack call below.
 788        let secret
 789        try {
 790          secret = decodeWorkSecret(work.secret)
 791        } catch (err) {
 792          const errMsg = errorMessage(err)
 793          logger.logError(
 794            `Failed to decode work secret for workId=${work.id}: ${errMsg}`,
 795          )
 796          logEvent('tengu_bridge_work_secret_failed', {})
 797          // Can't ack (needs the JWT we failed to decode). stopWork uses OAuth,
 798          // so it's callable here — prevents XAUTOCLAIM from re-delivering this
 799          // poisoned item every reclaim_older_than_ms cycle.
 800          completedWorkIds.add(work.id)
 801          trackCleanup(
 802            stopWorkWithRetry(
 803              api,
 804              environmentId,
 805              work.id,
 806              logger,
 807              backoffConfig.stopWorkBaseDelayMs,
 808            ),
 809          )
 810          // Respect capacity throttle before retrying — without a sleep here,
 811          // repeated decode failures at capacity would tight-loop at
 812          // poll-request speed (work != null skips the !work sleep above).
 813          if (atCapacityBeforeSwitch) {
 814            const cap = capacityWake.signal()
 815            if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
 816              await heartbeatActiveWorkItems()
 817              await sleep(
 818                pollConfig.non_exclusive_heartbeat_interval_ms,
 819                cap.signal,
 820              )
 821            } else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
 822              await sleep(
 823                pollConfig.multisession_poll_interval_ms_at_capacity,
 824                cap.signal,
 825              )
 826            }
 827            cap.cleanup()
 828          }
 829          continue
 830        }
 831  
 832        // Explicitly acknowledge after committing to handle the work — NOT
 833        // before. The at-capacity guard inside case 'session' can break
 834        // without spawning; acking there would permanently lose the work.
 835        // Ack failures are non-fatal: server re-delivers, and existingHandle
 836        // / completedWorkIds paths handle the dedup.
 837        const ackWork = async (): Promise<void> => {
 838          logForDebugging(`[bridge:work] Acknowledging workId=${work.id}`)
 839          try {
 840            await api.acknowledgeWork(
 841              environmentId,
 842              work.id,
 843              secret.session_ingress_token,
 844            )
 845          } catch (err) {
 846            logForDebugging(
 847              `[bridge:work] Acknowledge failed workId=${work.id}: ${errorMessage(err)}`,
 848            )
 849          }
 850        }
 851  
 852        const workType: string = work.data.type
 853        switch (work.data.type) {
 854          case 'healthcheck':
 855            await ackWork()
 856            logForDebugging('[bridge:work] Healthcheck received')
 857            logger.logVerbose('Healthcheck received')
 858            break
 859          case 'session': {
 860            const sessionId = work.data.id
 861            try {
 862              validateBridgeId(sessionId, 'session_id')
 863            } catch {
 864              await ackWork()
 865              logger.logError(`Invalid session_id received: ${sessionId}`)
 866              break
 867            }
 868  
 869            // If the session is already running, deliver the fresh token so
 870            // the child process can reconnect its WebSocket with the new
 871            // session ingress token. This handles the case where the server
 872            // re-dispatches work for an existing session after the WS drops.
 873            const existingHandle = activeSessions.get(sessionId)
 874            if (existingHandle) {
 875              existingHandle.updateAccessToken(secret.session_ingress_token)
 876              sessionIngressTokens.set(sessionId, secret.session_ingress_token)
 877              sessionWorkIds.set(sessionId, work.id)
 878              // Re-schedule next refresh from the fresh JWT's expiry. onRefresh
 879              // branches on v2Sessions so both v1 and v2 are safe here.
 880              tokenRefresh?.schedule(sessionId, secret.session_ingress_token)
 881              logForDebugging(
 882                `[bridge:work] Updated access token for existing sessionId=${sessionId} workId=${work.id}`,
 883              )
 884              await ackWork()
 885              break
 886            }
 887  
 888            // At capacity — token refresh for existing sessions is handled
 889            // above, but we cannot spawn new ones. The post-switch capacity
 890            // sleep will throttle the loop; just break here.
 891            if (activeSessions.size >= config.maxSessions) {
 892              logForDebugging(
 893                `[bridge:work] At capacity (${activeSessions.size}/${config.maxSessions}), cannot spawn new session for workId=${work.id}`,
 894              )
 895              break
 896            }
 897  
 898            await ackWork()
 899            const spawnStartTime = Date.now()
 900  
 901            // CCR v2 path: register this bridge as the session worker, get the
 902            // epoch, and point the child at /v1/code/sessions/{id}. The child
 903            // already has the full v2 client (SSETransport + CCRClient) — same
 904            // code path environment-manager launches in containers.
 905            //
 906            // v1 path: Session-Ingress WebSocket. Uses config.sessionIngressUrl
 907            // (not secret.api_base_url, which may point to a remote proxy tunnel
 908            // that doesn't know about locally-created sessions).
 909            let sdkUrl: string
 910            let useCcrV2 = false
 911            let workerEpoch: number | undefined
 912            // Server decides per-session via the work secret; env var is the
 913            // ant-dev override (e.g. forcing v2 before the server flag is on).
 914            if (
 915              secret.use_code_sessions === true ||
 916              isEnvTruthy(process.env.CLAUDE_BRIDGE_USE_CCR_V2)
 917            ) {
 918              sdkUrl = buildCCRv2SdkUrl(config.apiBaseUrl, sessionId)
 919              // Retry once on transient failure (network blip, 500) before
 920              // permanently giving up and killing the session.
 921              for (let attempt = 1; attempt <= 2; attempt++) {
 922                try {
 923                  workerEpoch = await registerWorker(
 924                    sdkUrl,
 925                    secret.session_ingress_token,
 926                  )
 927                  useCcrV2 = true
 928                  logForDebugging(
 929                    `[bridge:session] CCR v2: registered worker sessionId=${sessionId} epoch=${workerEpoch} attempt=${attempt}`,
 930                  )
 931                  break
 932                } catch (err) {
 933                  const errMsg = errorMessage(err)
 934                  if (attempt < 2) {
 935                    logForDebugging(
 936                      `[bridge:session] CCR v2: registerWorker attempt ${attempt} failed, retrying: ${errMsg}`,
 937                    )
 938                    await sleep(2_000, loopSignal)
 939                    if (loopSignal.aborted) break
 940                    continue
 941                  }
 942                  logger.logError(
 943                    `CCR v2 worker registration failed for session ${sessionId}: ${errMsg}`,
 944                  )
 945                  logError(new Error(`registerWorker failed: ${errMsg}`))
 946                  completedWorkIds.add(work.id)
 947                  trackCleanup(
 948                    stopWorkWithRetry(
 949                      api,
 950                      environmentId,
 951                      work.id,
 952                      logger,
 953                      backoffConfig.stopWorkBaseDelayMs,
 954                    ),
 955                  )
 956                }
 957              }
 958              if (!useCcrV2) break
 959            } else {
 960              sdkUrl = buildSdkUrl(config.sessionIngressUrl, sessionId)
 961            }
 962  
 963            // In worktree mode, on-demand sessions get an isolated git worktree
 964            // so concurrent sessions don't interfere with each other's file
 965            // changes. The pre-created initial session (if any) runs in
 966            // config.dir so the user's first session lands in the directory they
 967            // invoked `rc` from — matching the old single-session UX.
 968            // In same-dir and single-session modes, all sessions share config.dir.
 969            // Capture spawnMode before the await below — the `w` key handler
 970            // mutates config.spawnMode directly, and createAgentWorktree can
 971            // take 1-2s, so reading config.spawnMode after the await can
 972            // produce contradictory analytics (spawn_mode:'same-dir', in_worktree:true).
 973            const spawnModeAtDecision = config.spawnMode
 974            let sessionDir = config.dir
 975            let worktreeCreateMs = 0
 976            if (
 977              spawnModeAtDecision === 'worktree' &&
 978              (initialSessionId === undefined ||
 979                !sameSessionId(sessionId, initialSessionId))
 980            ) {
 981              const wtStart = Date.now()
 982              try {
 983                const wt = await createAgentWorktree(
 984                  `bridge-${safeFilenameId(sessionId)}`,
 985                )
 986                worktreeCreateMs = Date.now() - wtStart
 987                sessionWorktrees.set(sessionId, {
 988                  worktreePath: wt.worktreePath,
 989                  worktreeBranch: wt.worktreeBranch,
 990                  gitRoot: wt.gitRoot,
 991                  hookBased: wt.hookBased,
 992                })
 993                sessionDir = wt.worktreePath
 994                logForDebugging(
 995                  `[bridge:session] Created worktree for sessionId=${sessionId} at ${wt.worktreePath}`,
 996                )
 997              } catch (err) {
 998                const errMsg = errorMessage(err)
 999                logger.logError(
1000                  `Failed to create worktree for session ${sessionId}: ${errMsg}`,
1001                )
1002                logError(new Error(`Worktree creation failed: ${errMsg}`))
1003                completedWorkIds.add(work.id)
1004                trackCleanup(
1005                  stopWorkWithRetry(
1006                    api,
1007                    environmentId,
1008                    work.id,
1009                    logger,
1010                    backoffConfig.stopWorkBaseDelayMs,
1011                  ),
1012                )
1013                break
1014              }
1015            }
1016  
1017            logForDebugging(
1018              `[bridge:session] Spawning sessionId=${sessionId} sdkUrl=${sdkUrl}`,
1019            )
1020  
1021            // compat-surface session_* form for logger/Sessions-API calls.
1022            // Work poll returns cse_* under v2 compat; convert before spawn so
1023            // the onFirstUserMessage callback can close over it.
1024            const compatSessionId = toCompatSessionId(sessionId)
1025  
1026            const spawnResult = safeSpawn(
1027              spawner,
1028              {
1029                sessionId,
1030                sdkUrl,
1031                accessToken: secret.session_ingress_token,
1032                useCcrV2,
1033                workerEpoch,
1034                onFirstUserMessage: text => {
1035                  // Server-set titles (--name, web rename) win. fetchSessionTitle
1036                  // runs concurrently; if it already populated titledSessions,
1037                  // skip. If it hasn't resolved yet, the derived title sticks —
1038                  // acceptable since the server had no title at spawn time.
1039                  if (titledSessions.has(compatSessionId)) return
1040                  titledSessions.add(compatSessionId)
1041                  const title = deriveSessionTitle(text)
1042                  logger.setSessionTitle(compatSessionId, title)
1043                  logForDebugging(
1044                    `[bridge:title] derived title for ${compatSessionId}: ${title}`,
1045                  )
1046                  void import('./createSession.js')
1047                    .then(({ updateBridgeSessionTitle }) =>
1048                      updateBridgeSessionTitle(compatSessionId, title, {
1049                        baseUrl: config.apiBaseUrl,
1050                      }),
1051                    )
1052                    .catch(err =>
1053                      logForDebugging(
1054                        `[bridge:title] failed to update title for ${compatSessionId}: ${err}`,
1055                        { level: 'error' },
1056                      ),
1057                    )
1058                },
1059              },
1060              sessionDir,
1061            )
1062            if (typeof spawnResult === 'string') {
1063              logger.logError(
1064                `Failed to spawn session ${sessionId}: ${spawnResult}`,
1065              )
1066              // Clean up worktree if one was created for this session
1067              const wt = sessionWorktrees.get(sessionId)
1068              if (wt) {
1069                sessionWorktrees.delete(sessionId)
1070                trackCleanup(
1071                  removeAgentWorktree(
1072                    wt.worktreePath,
1073                    wt.worktreeBranch,
1074                    wt.gitRoot,
1075                    wt.hookBased,
1076                  ).catch((err: unknown) =>
1077                    logger.logVerbose(
1078                      `Failed to remove worktree ${wt.worktreePath}: ${errorMessage(err)}`,
1079                    ),
1080                  ),
1081                )
1082              }
1083              completedWorkIds.add(work.id)
1084              trackCleanup(
1085                stopWorkWithRetry(
1086                  api,
1087                  environmentId,
1088                  work.id,
1089                  logger,
1090                  backoffConfig.stopWorkBaseDelayMs,
1091                ),
1092              )
1093              break
1094            }
1095            const handle = spawnResult
1096  
1097            const spawnDurationMs = Date.now() - spawnStartTime
1098            logEvent('tengu_bridge_session_started', {
1099              active_sessions: activeSessions.size,
1100              spawn_mode:
1101                spawnModeAtDecision as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1102              in_worktree: sessionWorktrees.has(sessionId),
1103              spawn_duration_ms: spawnDurationMs,
1104              worktree_create_ms: worktreeCreateMs,
1105              inProtectedNamespace: isInProtectedNamespace(),
1106            })
1107            logForDiagnosticsNoPII('info', 'bridge_session_started', {
1108              spawn_mode: spawnModeAtDecision,
1109              in_worktree: sessionWorktrees.has(sessionId),
1110              spawn_duration_ms: spawnDurationMs,
1111              worktree_create_ms: worktreeCreateMs,
1112            })
1113  
1114            activeSessions.set(sessionId, handle)
1115            sessionWorkIds.set(sessionId, work.id)
1116            sessionIngressTokens.set(sessionId, secret.session_ingress_token)
1117            sessionCompatIds.set(sessionId, compatSessionId)
1118  
1119            const startTime = Date.now()
1120            sessionStartTimes.set(sessionId, startTime)
1121  
1122            // Use a generic prompt description since we no longer get startup_context
1123            logger.logSessionStart(sessionId, `Session ${sessionId}`)
1124  
1125            // Compute the actual debug file path (mirrors sessionRunner.ts logic)
1126            const safeId = safeFilenameId(sessionId)
1127            let sessionDebugFile: string | undefined
1128            if (config.debugFile) {
1129              const ext = config.debugFile.lastIndexOf('.')
1130              if (ext > 0) {
1131                sessionDebugFile = `${config.debugFile.slice(0, ext)}-${safeId}${config.debugFile.slice(ext)}`
1132              } else {
1133                sessionDebugFile = `${config.debugFile}-${safeId}`
1134              }
1135            } else if (config.verbose || process.env.USER_TYPE === 'ant') {
1136              sessionDebugFile = join(
1137                tmpdir(),
1138                'claude',
1139                `bridge-session-${safeId}.log`,
1140              )
1141            }
1142  
1143            if (sessionDebugFile) {
1144              logger.logVerbose(`Debug log: ${sessionDebugFile}`)
1145            }
1146  
1147            // Register in the sessions Map before starting status updates so the
1148            // first render tick shows the correct count and bullet list in sync.
1149            logger.addSession(
1150              compatSessionId,
1151              getRemoteSessionUrl(compatSessionId, config.sessionIngressUrl),
1152            )
1153  
1154            // Start live status updates and transition to "Attached" state.
1155            startStatusUpdates()
1156            logger.setAttached(compatSessionId)
1157  
1158            // One-shot title fetch. If the session already has a title (set via
1159            // --name, web rename, or /remote-control), display it and mark as
1160            // titled so the first-user-message fallback doesn't overwrite it.
1161            // Otherwise onFirstUserMessage derives one from the first prompt.
1162            void fetchSessionTitle(compatSessionId, config.apiBaseUrl)
1163              .then(title => {
1164                if (title && activeSessions.has(sessionId)) {
1165                  titledSessions.add(compatSessionId)
1166                  logger.setSessionTitle(compatSessionId, title)
1167                  logForDebugging(
1168                    `[bridge:title] server title for ${compatSessionId}: ${title}`,
1169                  )
1170                }
1171              })
1172              .catch(err =>
1173                logForDebugging(
1174                  `[bridge:title] failed to fetch title for ${compatSessionId}: ${err}`,
1175                  { level: 'error' },
1176                ),
1177              )
1178  
1179            // Start per-session timeout watchdog
1180            const timeoutMs =
1181              config.sessionTimeoutMs ?? DEFAULT_SESSION_TIMEOUT_MS
1182            if (timeoutMs > 0) {
1183              const timer = setTimeout(
1184                onSessionTimeout,
1185                timeoutMs,
1186                sessionId,
1187                timeoutMs,
1188                logger,
1189                timedOutSessions,
1190                handle,
1191              )
1192              sessionTimers.set(sessionId, timer)
1193            }
1194  
1195            // Schedule proactive token refresh before the JWT expires.
1196            // onRefresh branches on v2Sessions: v1 delivers OAuth to the
1197            // child, v2 triggers server re-dispatch via reconnectSession.
1198            if (useCcrV2) {
1199              v2Sessions.add(sessionId)
1200            }
1201            tokenRefresh?.schedule(sessionId, secret.session_ingress_token)
1202  
1203            void handle.done.then(onSessionDone(sessionId, startTime, handle))
1204            break
1205          }
1206          default:
1207            await ackWork()
1208            // Gracefully ignore unknown work types. The backend may send new
1209            // types before the bridge client is updated.
1210            logForDebugging(
1211              `[bridge:work] Unknown work type: ${workType}, skipping`,
1212            )
1213            break
1214        }
1215  
1216        // When at capacity, throttle the loop. The switch above still runs so
1217        // existing-session token refreshes are processed, but we sleep here
1218        // to avoid busy-looping. Include the capacity wake signal so the
1219        // sleep is interrupted immediately when a session completes.
1220        if (atCapacityBeforeSwitch) {
1221          const cap = capacityWake.signal()
1222          if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
1223            await heartbeatActiveWorkItems()
1224            await sleep(
1225              pollConfig.non_exclusive_heartbeat_interval_ms,
1226              cap.signal,
1227            )
1228          } else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
1229            await sleep(
1230              pollConfig.multisession_poll_interval_ms_at_capacity,
1231              cap.signal,
1232            )
1233          }
1234          cap.cleanup()
1235        }
1236      } catch (err) {
1237        if (loopSignal.aborted) {
1238          break
1239        }
1240  
1241        // Fatal errors (401/403) — no point retrying, auth won't fix itself
1242        if (err instanceof BridgeFatalError) {
1243          fatalExit = true
1244          // Server-enforced expiry gets a clean status message, not an error
1245          if (isExpiredErrorType(err.errorType)) {
1246            logger.logStatus(err.message)
1247          } else if (isSuppressible403(err)) {
1248            // Cosmetic 403 errors (e.g., external_poll_sessions scope,
1249            // environments:manage permission) — don't show to user
1250            logForDebugging(`[bridge:work] Suppressed 403 error: ${err.message}`)
1251          } else {
1252            logger.logError(err.message)
1253            logError(err)
1254          }
1255          logEvent('tengu_bridge_fatal_error', {
1256            status: err.status,
1257            error_type:
1258              err.errorType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1259          })
1260          logForDiagnosticsNoPII(
1261            isExpiredErrorType(err.errorType) ? 'info' : 'error',
1262            'bridge_fatal_error',
1263            { status: err.status, error_type: err.errorType },
1264          )
1265          break
1266        }
1267  
1268        const errMsg = describeAxiosError(err)
1269  
1270        if (isConnectionError(err) || isServerError(err)) {
1271          const now = Date.now()
1272  
1273          // Detect system sleep/wake: if the gap since the last poll error
1274          // greatly exceeds the expected backoff, the machine likely slept.
1275          // Reset error tracking so the bridge retries with a fresh budget.
1276          if (
1277            lastPollErrorTime !== null &&
1278            now - lastPollErrorTime > pollSleepDetectionThresholdMs(backoffConfig)
1279          ) {
1280            logForDebugging(
1281              `[bridge:work] Detected system sleep (${Math.round((now - lastPollErrorTime) / 1000)}s gap), resetting error budget`,
1282            )
1283            logForDiagnosticsNoPII('info', 'bridge_poll_sleep_detected', {
1284              gapMs: now - lastPollErrorTime,
1285            })
1286            connErrorStart = null
1287            connBackoff = 0
1288            generalErrorStart = null
1289            generalBackoff = 0
1290          }
1291          lastPollErrorTime = now
1292  
1293          if (!connErrorStart) {
1294            connErrorStart = now
1295          }
1296          const elapsed = now - connErrorStart
1297          if (elapsed >= backoffConfig.connGiveUpMs) {
1298            logger.logError(
1299              `Server unreachable for ${Math.round(elapsed / 60_000)} minutes, giving up.`,
1300            )
1301            logEvent('tengu_bridge_poll_give_up', {
1302              error_type:
1303                'connection' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1304              elapsed_ms: elapsed,
1305            })
1306            logForDiagnosticsNoPII('error', 'bridge_poll_give_up', {
1307              error_type: 'connection',
1308              elapsed_ms: elapsed,
1309            })
1310            fatalExit = true
1311            break
1312          }
1313  
1314          // Reset the other track when switching error types
1315          generalErrorStart = null
1316          generalBackoff = 0
1317  
1318          connBackoff = connBackoff
1319            ? Math.min(connBackoff * 2, backoffConfig.connCapMs)
1320            : backoffConfig.connInitialMs
1321          const delay = addJitter(connBackoff)
1322          logger.logVerbose(
1323            `Connection error, retrying in ${formatDelay(delay)} (${Math.round(elapsed / 1000)}s elapsed): ${errMsg}`,
1324          )
1325          logger.updateReconnectingStatus(
1326            formatDelay(delay),
1327            formatDuration(elapsed),
1328          )
1329          // The poll_due heartbeat-loop exit leaves a healthy lease exposed to
1330          // this backoff path. Heartbeat before each sleep so /poll outages
1331          // (the VerifyEnvironmentSecretAuth DB path heartbeat was introduced
1332          // to avoid) don't kill the 300s lease TTL. No-op when activeSessions
1333          // is empty or heartbeat is disabled.
1334          if (getPollIntervalConfig().non_exclusive_heartbeat_interval_ms > 0) {
1335            await heartbeatActiveWorkItems()
1336          }
1337          await sleep(delay, loopSignal)
1338        } else {
1339          const now = Date.now()
1340  
1341          // Sleep detection for general errors (same logic as connection errors)
1342          if (
1343            lastPollErrorTime !== null &&
1344            now - lastPollErrorTime > pollSleepDetectionThresholdMs(backoffConfig)
1345          ) {
1346            logForDebugging(
1347              `[bridge:work] Detected system sleep (${Math.round((now - lastPollErrorTime) / 1000)}s gap), resetting error budget`,
1348            )
1349            logForDiagnosticsNoPII('info', 'bridge_poll_sleep_detected', {
1350              gapMs: now - lastPollErrorTime,
1351            })
1352            connErrorStart = null
1353            connBackoff = 0
1354            generalErrorStart = null
1355            generalBackoff = 0
1356          }
1357          lastPollErrorTime = now
1358  
1359          if (!generalErrorStart) {
1360            generalErrorStart = now
1361          }
1362          const elapsed = now - generalErrorStart
1363          if (elapsed >= backoffConfig.generalGiveUpMs) {
1364            logger.logError(
1365              `Persistent errors for ${Math.round(elapsed / 60_000)} minutes, giving up.`,
1366            )
1367            logEvent('tengu_bridge_poll_give_up', {
1368              error_type:
1369                'general' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1370              elapsed_ms: elapsed,
1371            })
1372            logForDiagnosticsNoPII('error', 'bridge_poll_give_up', {
1373              error_type: 'general',
1374              elapsed_ms: elapsed,
1375            })
1376            fatalExit = true
1377            break
1378          }
1379  
1380          // Reset the other track when switching error types
1381          connErrorStart = null
1382          connBackoff = 0
1383  
1384          generalBackoff = generalBackoff
1385            ? Math.min(generalBackoff * 2, backoffConfig.generalCapMs)
1386            : backoffConfig.generalInitialMs
1387          const delay = addJitter(generalBackoff)
1388          logger.logVerbose(
1389            `Poll failed, retrying in ${formatDelay(delay)} (${Math.round(elapsed / 1000)}s elapsed): ${errMsg}`,
1390          )
1391          logger.updateReconnectingStatus(
1392            formatDelay(delay),
1393            formatDuration(elapsed),
1394          )
1395          if (getPollIntervalConfig().non_exclusive_heartbeat_interval_ms > 0) {
1396            await heartbeatActiveWorkItems()
1397          }
1398          await sleep(delay, loopSignal)
1399        }
1400      }
1401    }
1402  
1403    // Clean up
1404    stopStatusUpdates()
1405    logger.clearStatus()
1406  
1407    const loopDurationMs = Date.now() - loopStartTime
1408    logEvent('tengu_bridge_shutdown', {
1409      active_sessions: activeSessions.size,
1410      loop_duration_ms: loopDurationMs,
1411    })
1412    logForDiagnosticsNoPII('info', 'bridge_shutdown', {
1413      active_sessions: activeSessions.size,
1414      loop_duration_ms: loopDurationMs,
1415    })
1416  
1417    // Graceful shutdown: kill active sessions, report them as interrupted,
1418    // archive sessions, then deregister the environment so the web UI shows
1419    // the bridge as offline.
1420  
1421    // Collect all session IDs to archive on exit. This includes:
1422    // 1. Active sessions (snapshot before killing — onSessionDone clears maps)
1423    // 2. The initial auto-created session (may never have had work dispatched)
1424    // api.archiveSession is idempotent (409 if already archived), so
1425    // double-archiving is safe.
1426    const sessionsToArchive = new Set(activeSessions.keys())
1427    if (initialSessionId) {
1428      sessionsToArchive.add(initialSessionId)
1429    }
1430    // Snapshot before killing — onSessionDone clears sessionCompatIds.
1431    const compatIdSnapshot = new Map(sessionCompatIds)
1432  
1433    if (activeSessions.size > 0) {
1434      logForDebugging(
1435        `[bridge:shutdown] Shutting down ${activeSessions.size} active session(s)`,
1436      )
1437      logger.logStatus(
1438        `Shutting down ${activeSessions.size} active session(s)\u2026`,
1439      )
1440  
1441      // Snapshot work IDs before killing — onSessionDone clears the maps when
1442      // each child exits, so we need a copy for the stopWork calls below.
1443      const shutdownWorkIds = new Map(sessionWorkIds)
1444  
1445      for (const [sessionId, handle] of activeSessions.entries()) {
1446        logForDebugging(
1447          `[bridge:shutdown] Sending SIGTERM to sessionId=${sessionId}`,
1448        )
1449        handle.kill()
1450      }
1451  
1452      const timeout = new AbortController()
1453      await Promise.race([
1454        Promise.allSettled([...activeSessions.values()].map(h => h.done)),
1455        sleep(backoffConfig.shutdownGraceMs ?? 30_000, timeout.signal),
1456      ])
1457      timeout.abort()
1458  
1459      // SIGKILL any processes that didn't respond to SIGTERM within the grace window
1460      for (const [sid, handle] of activeSessions.entries()) {
1461        logForDebugging(`[bridge:shutdown] Force-killing stuck sessionId=${sid}`)
1462        handle.forceKill()
1463      }
1464  
1465      // Clear any remaining session timeout and refresh timers
1466      for (const timer of sessionTimers.values()) {
1467        clearTimeout(timer)
1468      }
1469      sessionTimers.clear()
1470      tokenRefresh?.cancelAll()
1471  
1472      // Clean up any remaining worktrees from active sessions.
1473      // Snapshot and clear the map first so onSessionDone (which may fire
1474      // during the await below when handle.done resolves) won't try to
1475      // remove the same worktrees again.
1476      if (sessionWorktrees.size > 0) {
1477        const remainingWorktrees = [...sessionWorktrees.values()]
1478        sessionWorktrees.clear()
1479        logForDebugging(
1480          `[bridge:shutdown] Cleaning up ${remainingWorktrees.length} worktree(s)`,
1481        )
1482        await Promise.allSettled(
1483          remainingWorktrees.map(wt =>
1484            removeAgentWorktree(
1485              wt.worktreePath,
1486              wt.worktreeBranch,
1487              wt.gitRoot,
1488              wt.hookBased,
1489            ),
1490          ),
1491        )
1492      }
1493  
1494      // Stop all active work items so the server knows they're done
1495      await Promise.allSettled(
1496        [...shutdownWorkIds.entries()].map(([sessionId, workId]) => {
1497          return api
1498            .stopWork(environmentId, workId, true)
1499            .catch(err =>
1500              logger.logVerbose(
1501                `Failed to stop work ${workId} for session ${sessionId}: ${errorMessage(err)}`,
1502              ),
1503            )
1504        }),
1505      )
1506    }
1507  
1508    // Ensure all in-flight cleanup (stopWork, worktree removal) from
1509    // onSessionDone completes before deregistering — otherwise
1510    // process.exit() can kill them mid-flight.
1511    if (pendingCleanups.size > 0) {
1512      await Promise.allSettled([...pendingCleanups])
1513    }
1514  
1515    // In single-session mode with a known session, leave the session and
1516    // environment alive so `claude remote-control --session-id=<id>` can resume.
1517    // The backend GCs stale environments via a 4h TTL (BRIDGE_LAST_POLL_TTL).
1518    // Archiving the session or deregistering the environment would make the
1519    // printed resume command a lie — deregister deletes Firestore + Redis stream.
1520    // Skip when the loop exited fatally (env expired, auth failed, give-up) —
1521    // resume is impossible in those cases and the message would contradict the
1522    // error already printed.
1523    // feature('KAIROS') gate: --session-id is ant-only; without the gate,
1524    // revert to the pre-PR behavior (archive + deregister on every shutdown).
1525    if (
1526      feature('KAIROS') &&
1527      config.spawnMode === 'single-session' &&
1528      initialSessionId &&
1529      !fatalExit
1530    ) {
1531      logger.logStatus(
1532        `Resume this session by running \`claude remote-control --continue\``,
1533      )
1534      logForDebugging(
1535        `[bridge:shutdown] Skipping archive+deregister to allow resume of session ${initialSessionId}`,
1536      )
1537      return
1538    }
1539  
1540    // Archive all known sessions so they don't linger as idle/running on the
1541    // server after the bridge goes offline.
1542    if (sessionsToArchive.size > 0) {
1543      logForDebugging(
1544        `[bridge:shutdown] Archiving ${sessionsToArchive.size} session(s)`,
1545      )
1546      await Promise.allSettled(
1547        [...sessionsToArchive].map(sessionId =>
1548          api
1549            .archiveSession(
1550              compatIdSnapshot.get(sessionId) ?? toCompatSessionId(sessionId),
1551            )
1552            .catch(err =>
1553              logger.logVerbose(
1554                `Failed to archive session ${sessionId}: ${errorMessage(err)}`,
1555              ),
1556            ),
1557        ),
1558      )
1559    }
1560  
1561    // Deregister the environment so the web UI shows the bridge as offline
1562    // and the Redis stream is cleaned up.
1563    try {
1564      await api.deregisterEnvironment(environmentId)
1565      logForDebugging(
1566        `[bridge:shutdown] Environment deregistered, bridge offline`,
1567      )
1568      logger.logVerbose('Environment deregistered.')
1569    } catch (err) {
1570      logger.logVerbose(`Failed to deregister environment: ${errorMessage(err)}`)
1571    }
1572  
1573    // Clear the crash-recovery pointer — the env is gone, pointer would be
1574    // stale. The early return above (resumable SIGINT shutdown) skips this,
1575    // leaving the pointer as a backup for the printed --session-id hint.
1576    const { clearBridgePointer } = await import('./bridgePointer.js')
1577    await clearBridgePointer(config.dir)
1578  
1579    logger.logVerbose('Environment offline.')
1580  }
1581  
1582  const CONNECTION_ERROR_CODES = new Set([
1583    'ECONNREFUSED',
1584    'ECONNRESET',
1585    'ETIMEDOUT',
1586    'ENETUNREACH',
1587    'EHOSTUNREACH',
1588  ])
1589  
1590  export function isConnectionError(err: unknown): boolean {
1591    if (
1592      err &&
1593      typeof err === 'object' &&
1594      'code' in err &&
1595      typeof err.code === 'string' &&
1596      CONNECTION_ERROR_CODES.has(err.code)
1597    ) {
1598      return true
1599    }
1600    return false
1601  }
1602  
1603  /** Detect HTTP 5xx errors from axios (code: 'ERR_BAD_RESPONSE'). */
1604  export function isServerError(err: unknown): boolean {
1605    return (
1606      !!err &&
1607      typeof err === 'object' &&
1608      'code' in err &&
1609      typeof err.code === 'string' &&
1610      err.code === 'ERR_BAD_RESPONSE'
1611    )
1612  }
1613  
1614  /** Add ±25% jitter to a delay value. */
1615  function addJitter(ms: number): number {
1616    return Math.max(0, ms + ms * 0.25 * (2 * Math.random() - 1))
1617  }
1618  
1619  function formatDelay(ms: number): string {
1620    return ms >= 1000 ? `${(ms / 1000).toFixed(1)}s` : `${Math.round(ms)}ms`
1621  }
1622  
1623  /**
1624   * Retry stopWork with exponential backoff (3 attempts, 1s/2s/4s).
1625   * Ensures the server learns the work item ended, preventing server-side zombies.
1626   */
1627  async function stopWorkWithRetry(
1628    api: BridgeApiClient,
1629    environmentId: string,
1630    workId: string,
1631    logger: BridgeLogger,
1632    baseDelayMs = 1000,
1633  ): Promise<void> {
1634    const MAX_ATTEMPTS = 3
1635  
1636    for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
1637      try {
1638        await api.stopWork(environmentId, workId, false)
1639        logForDebugging(
1640          `[bridge:work] stopWork succeeded for workId=${workId} on attempt ${attempt}/${MAX_ATTEMPTS}`,
1641        )
1642        return
1643      } catch (err) {
1644        // Auth/permission errors won't be fixed by retrying
1645        if (err instanceof BridgeFatalError) {
1646          if (isSuppressible403(err)) {
1647            logForDebugging(
1648              `[bridge:work] Suppressed stopWork 403 for ${workId}: ${err.message}`,
1649            )
1650          } else {
1651            logger.logError(`Failed to stop work ${workId}: ${err.message}`)
1652          }
1653          logForDiagnosticsNoPII('error', 'bridge_stop_work_failed', {
1654            attempts: attempt,
1655            fatal: true,
1656          })
1657          return
1658        }
1659        const errMsg = errorMessage(err)
1660        if (attempt < MAX_ATTEMPTS) {
1661          const delay = addJitter(baseDelayMs * Math.pow(2, attempt - 1))
1662          logger.logVerbose(
1663            `Failed to stop work ${workId} (attempt ${attempt}/${MAX_ATTEMPTS}), retrying in ${formatDelay(delay)}: ${errMsg}`,
1664          )
1665          await sleep(delay)
1666        } else {
1667          logger.logError(
1668            `Failed to stop work ${workId} after ${MAX_ATTEMPTS} attempts: ${errMsg}`,
1669          )
1670          logForDiagnosticsNoPII('error', 'bridge_stop_work_failed', {
1671            attempts: MAX_ATTEMPTS,
1672          })
1673        }
1674      }
1675    }
1676  }
1677  
1678  function onSessionTimeout(
1679    sessionId: string,
1680    timeoutMs: number,
1681    logger: BridgeLogger,
1682    timedOutSessions: Set<string>,
1683    handle: SessionHandle,
1684  ): void {
1685    logForDebugging(
1686      `[bridge:session] sessionId=${sessionId} timed out after ${formatDuration(timeoutMs)}`,
1687    )
1688    logEvent('tengu_bridge_session_timeout', {
1689      timeout_ms: timeoutMs,
1690    })
1691    logger.logSessionFailed(
1692      sessionId,
1693      `Session timed out after ${formatDuration(timeoutMs)}`,
1694    )
1695    timedOutSessions.add(sessionId)
1696    handle.kill()
1697  }
1698  
1699  export type ParsedArgs = {
1700    verbose: boolean
1701    sandbox: boolean
1702    debugFile?: string
1703    sessionTimeoutMs?: number
1704    permissionMode?: string
1705    name?: string
1706    /** Value passed to --spawn (if any); undefined if no --spawn flag was given. */
1707    spawnMode: SpawnMode | undefined
1708    /** Value passed to --capacity (if any); undefined if no --capacity flag was given. */
1709    capacity: number | undefined
1710    /** --[no-]create-session-in-dir override; undefined = use default (on). */
1711    createSessionInDir: boolean | undefined
1712    /** Resume an existing session instead of creating a new one. */
1713    sessionId?: string
1714    /** Resume the last session in this directory (reads bridge-pointer.json). */
1715    continueSession: boolean
1716    help: boolean
1717    error?: string
1718  }
1719  
1720  const SPAWN_FLAG_VALUES = ['session', 'same-dir', 'worktree'] as const
1721  
1722  function parseSpawnValue(raw: string | undefined): SpawnMode | string {
1723    if (raw === 'session') return 'single-session'
1724    if (raw === 'same-dir') return 'same-dir'
1725    if (raw === 'worktree') return 'worktree'
1726    return `--spawn requires one of: ${SPAWN_FLAG_VALUES.join(', ')} (got: ${raw ?? '<missing>'})`
1727  }
1728  
1729  function parseCapacityValue(raw: string | undefined): number | string {
1730    const n = raw === undefined ? NaN : parseInt(raw, 10)
1731    if (isNaN(n) || n < 1) {
1732      return `--capacity requires a positive integer (got: ${raw ?? '<missing>'})`
1733    }
1734    return n
1735  }
1736  
1737  export function parseArgs(args: string[]): ParsedArgs {
1738    let verbose = false
1739    let sandbox = false
1740    let debugFile: string | undefined
1741    let sessionTimeoutMs: number | undefined
1742    let permissionMode: string | undefined
1743    let name: string | undefined
1744    let help = false
1745    let spawnMode: SpawnMode | undefined
1746    let capacity: number | undefined
1747    let createSessionInDir: boolean | undefined
1748    let sessionId: string | undefined
1749    let continueSession = false
1750  
1751    for (let i = 0; i < args.length; i++) {
1752      const arg = args[i]!
1753      if (arg === '--help' || arg === '-h') {
1754        help = true
1755      } else if (arg === '--verbose' || arg === '-v') {
1756        verbose = true
1757      } else if (arg === '--sandbox') {
1758        sandbox = true
1759      } else if (arg === '--no-sandbox') {
1760        sandbox = false
1761      } else if (arg === '--debug-file' && i + 1 < args.length) {
1762        debugFile = resolve(args[++i]!)
1763      } else if (arg.startsWith('--debug-file=')) {
1764        debugFile = resolve(arg.slice('--debug-file='.length))
1765      } else if (arg === '--session-timeout' && i + 1 < args.length) {
1766        sessionTimeoutMs = parseInt(args[++i]!, 10) * 1000
1767      } else if (arg.startsWith('--session-timeout=')) {
1768        sessionTimeoutMs =
1769          parseInt(arg.slice('--session-timeout='.length), 10) * 1000
1770      } else if (arg === '--permission-mode' && i + 1 < args.length) {
1771        permissionMode = args[++i]!
1772      } else if (arg.startsWith('--permission-mode=')) {
1773        permissionMode = arg.slice('--permission-mode='.length)
1774      } else if (arg === '--name' && i + 1 < args.length) {
1775        name = args[++i]!
1776      } else if (arg.startsWith('--name=')) {
1777        name = arg.slice('--name='.length)
1778      } else if (
1779        feature('KAIROS') &&
1780        arg === '--session-id' &&
1781        i + 1 < args.length
1782      ) {
1783        sessionId = args[++i]!
1784        if (!sessionId) {
1785          return makeError('--session-id requires a value')
1786        }
1787      } else if (feature('KAIROS') && arg.startsWith('--session-id=')) {
1788        sessionId = arg.slice('--session-id='.length)
1789        if (!sessionId) {
1790          return makeError('--session-id requires a value')
1791        }
1792      } else if (feature('KAIROS') && (arg === '--continue' || arg === '-c')) {
1793        continueSession = true
1794      } else if (arg === '--spawn' || arg.startsWith('--spawn=')) {
1795        if (spawnMode !== undefined) {
1796          return makeError('--spawn may only be specified once')
1797        }
1798        const raw = arg.startsWith('--spawn=')
1799          ? arg.slice('--spawn='.length)
1800          : args[++i]
1801        const v = parseSpawnValue(raw)
1802        if (v === 'single-session' || v === 'same-dir' || v === 'worktree') {
1803          spawnMode = v
1804        } else {
1805          return makeError(v)
1806        }
1807      } else if (arg === '--capacity' || arg.startsWith('--capacity=')) {
1808        if (capacity !== undefined) {
1809          return makeError('--capacity may only be specified once')
1810        }
1811        const raw = arg.startsWith('--capacity=')
1812          ? arg.slice('--capacity='.length)
1813          : args[++i]
1814        const v = parseCapacityValue(raw)
1815        if (typeof v === 'number') capacity = v
1816        else return makeError(v)
1817      } else if (arg === '--create-session-in-dir') {
1818        createSessionInDir = true
1819      } else if (arg === '--no-create-session-in-dir') {
1820        createSessionInDir = false
1821      } else {
1822        return makeError(
1823          `Unknown argument: ${arg}\nRun 'claude remote-control --help' for usage.`,
1824        )
1825      }
1826    }
1827  
1828    // Note: gate check for --spawn/--capacity/--create-session-in-dir is in bridgeMain
1829    // (gate-aware error). Flag cross-validation happens here.
1830  
1831    // --capacity only makes sense for multi-session modes.
1832    if (spawnMode === 'single-session' && capacity !== undefined) {
1833      return makeError(
1834        `--capacity cannot be used with --spawn=session (single-session mode has fixed capacity 1).`,
1835      )
1836    }
1837  
1838    // --session-id / --continue resume a specific session on its original
1839    // environment; incompatible with spawn-related flags (which configure
1840    // fresh session creation), and mutually exclusive with each other.
1841    if (
1842      (sessionId || continueSession) &&
1843      (spawnMode !== undefined ||
1844        capacity !== undefined ||
1845        createSessionInDir !== undefined)
1846    ) {
1847      return makeError(
1848        `--session-id and --continue cannot be used with --spawn, --capacity, or --create-session-in-dir.`,
1849      )
1850    }
1851    if (sessionId && continueSession) {
1852      return makeError(`--session-id and --continue cannot be used together.`)
1853    }
1854  
1855    return {
1856      verbose,
1857      sandbox,
1858      debugFile,
1859      sessionTimeoutMs,
1860      permissionMode,
1861      name,
1862      spawnMode,
1863      capacity,
1864      createSessionInDir,
1865      sessionId,
1866      continueSession,
1867      help,
1868    }
1869  
1870    function makeError(error: string): ParsedArgs {
1871      return {
1872        verbose,
1873        sandbox,
1874        debugFile,
1875        sessionTimeoutMs,
1876        permissionMode,
1877        name,
1878        spawnMode,
1879        capacity,
1880        createSessionInDir,
1881        sessionId,
1882        continueSession,
1883        help,
1884        error,
1885      }
1886    }
1887  }
1888  
1889  async function printHelp(): Promise<void> {
1890    // Use EXTERNAL_PERMISSION_MODES for help text — internal modes (bubble)
1891    // are ant-only and auto is feature-gated; they're still accepted by validation.
1892    const { EXTERNAL_PERMISSION_MODES } = await import('../types/permissions.js')
1893    const modes = EXTERNAL_PERMISSION_MODES.join(', ')
1894    const showServer = await isMultiSessionSpawnEnabled()
1895    const serverOptions = showServer
1896      ? `  --spawn <mode>                   Spawn mode: same-dir, worktree, session
1897                                     (default: same-dir)
1898    --capacity <N>                   Max concurrent sessions in worktree or
1899                                     same-dir mode (default: ${SPAWN_SESSIONS_DEFAULT})
1900    --[no-]create-session-in-dir     Pre-create a session in the current
1901                                     directory; in worktree mode this session
1902                                     stays in cwd while on-demand sessions get
1903                                     isolated worktrees (default: on)
1904  `
1905      : ''
1906    const serverDescription = showServer
1907      ? `
1908    Remote Control runs as a persistent server that accepts multiple concurrent
1909    sessions in the current directory. One session is pre-created on start so
1910    you have somewhere to type immediately. Use --spawn=worktree to isolate
1911    each on-demand session in its own git worktree, or --spawn=session for
1912    the classic single-session mode (exits when that session ends). Press 'w'
1913    during runtime to toggle between same-dir and worktree.
1914  `
1915      : ''
1916    const serverNote = showServer
1917      ? `  - Worktree mode requires a git repository or WorktreeCreate/WorktreeRemove hooks
1918  `
1919      : ''
1920    const help = `
1921  Remote Control - Connect your local environment to claude.ai/code
1922  
1923  USAGE
1924    claude remote-control [options]
1925  OPTIONS
1926    --name <name>                    Name for the session (shown in claude.ai/code)
1927  ${
1928    feature('KAIROS')
1929      ? `  -c, --continue                   Resume the last session in this directory
1930    --session-id <id>                Resume a specific session by ID (cannot be
1931                                     used with spawn flags or --continue)
1932  `
1933      : ''
1934  }  --permission-mode <mode>         Permission mode for spawned sessions
1935                                     (${modes})
1936    --debug-file <path>              Write debug logs to file
1937    -v, --verbose                    Enable verbose output
1938    -h, --help                       Show this help
1939  ${serverOptions}
1940  DESCRIPTION
1941    Remote Control allows you to control sessions on your local device from
1942    claude.ai/code (https://claude.ai/code). Run this command in the
1943    directory you want to work in, then connect from the Claude app or web.
1944  ${serverDescription}
1945  NOTES
1946    - You must be logged in with a Claude account that has a subscription
1947    - Run \`claude\` first in the directory to accept the workspace trust dialog
1948  ${serverNote}`
1949    // biome-ignore lint/suspicious/noConsole: intentional help output
1950    console.log(help)
1951  }
1952  
1953  const TITLE_MAX_LEN = 80
1954  
1955  /** Derive a session title from a user message: first line, truncated. */
1956  function deriveSessionTitle(text: string): string {
1957    // Collapse whitespace — newlines/tabs would break the single-line status display.
1958    const flat = text.replace(/\s+/g, ' ').trim()
1959    return truncateToWidth(flat, TITLE_MAX_LEN)
1960  }
1961  
1962  /**
1963   * One-shot fetch of a session's title via GET /v1/sessions/{id}.
1964   *
1965   * Uses `getBridgeSession` from createSession.ts (ccr-byoc headers + org UUID)
1966   * rather than the environments-level bridgeApi client, whose headers make the
1967   * Sessions API return 404. Returns undefined if the session has no title yet
1968   * or the fetch fails — the caller falls back to deriving a title from the
1969   * first user message.
1970   */
1971  async function fetchSessionTitle(
1972    compatSessionId: string,
1973    baseUrl: string,
1974  ): Promise<string | undefined> {
1975    const { getBridgeSession } = await import('./createSession.js')
1976    const session = await getBridgeSession(compatSessionId, { baseUrl })
1977    return session?.title || undefined
1978  }
1979  
1980  export async function bridgeMain(args: string[]): Promise<void> {
1981    const parsed = parseArgs(args)
1982  
1983    if (parsed.help) {
1984      await printHelp()
1985      return
1986    }
1987    if (parsed.error) {
1988      // biome-ignore lint/suspicious/noConsole: intentional error output
1989      console.error(`Error: ${parsed.error}`)
1990      // eslint-disable-next-line custom-rules/no-process-exit
1991      process.exit(1)
1992    }
1993  
1994    const {
1995      verbose,
1996      sandbox,
1997      debugFile,
1998      sessionTimeoutMs,
1999      permissionMode,
2000      name,
2001      spawnMode: parsedSpawnMode,
2002      capacity: parsedCapacity,
2003      createSessionInDir: parsedCreateSessionInDir,
2004      sessionId: parsedSessionId,
2005      continueSession,
2006    } = parsed
2007    // Mutable so --continue can set it from the pointer file. The #20460
2008    // resume flow below then treats it the same as an explicit --session-id.
2009    let resumeSessionId = parsedSessionId
2010    // When --continue found a pointer, this is the directory it came from
2011    // (may be a worktree sibling, not `dir`). On resume-flow deterministic
2012    // failure, clear THIS file so --continue doesn't keep hitting the same
2013    // dead session. Undefined for explicit --session-id (leaves pointer alone).
2014    let resumePointerDir: string | undefined
2015  
2016    const usedMultiSessionFeature =
2017      parsedSpawnMode !== undefined ||
2018      parsedCapacity !== undefined ||
2019      parsedCreateSessionInDir !== undefined
2020  
2021    // Validate permission mode early so the user gets an error before
2022    // the bridge starts polling for work.
2023    if (permissionMode !== undefined) {
2024      const { PERMISSION_MODES } = await import('../types/permissions.js')
2025      const valid: readonly string[] = PERMISSION_MODES
2026      if (!valid.includes(permissionMode)) {
2027        // biome-ignore lint/suspicious/noConsole: intentional error output
2028        console.error(
2029          `Error: Invalid permission mode '${permissionMode}'. Valid modes: ${valid.join(', ')}`,
2030        )
2031        // eslint-disable-next-line custom-rules/no-process-exit
2032        process.exit(1)
2033      }
2034    }
2035  
2036    const dir = resolve('.')
2037  
2038    // The bridge fast-path bypasses init.ts, so we must enable config reading
2039    // before any code that transitively calls getGlobalConfig()
2040    const { enableConfigs, checkHasTrustDialogAccepted } = await import(
2041      '../utils/config.js'
2042    )
2043    enableConfigs()
2044  
2045    // Initialize analytics and error reporting sinks. The bridge bypasses the
2046    // setup() init flow, so we call initSinks() directly to attach sinks here.
2047    const { initSinks } = await import('../utils/sinks.js')
2048    initSinks()
2049  
2050    // Gate-aware validation: --spawn / --capacity / --create-session-in-dir require
2051    // the multi-session gate. parseArgs has already validated flag combinations;
2052    // here we only check the gate since that requires an async GrowthBook call.
2053    // Runs after enableConfigs() (GrowthBook cache reads global config) and after
2054    // initSinks() so the denial event can be enqueued.
2055    const multiSessionEnabled = await isMultiSessionSpawnEnabled()
2056    if (usedMultiSessionFeature && !multiSessionEnabled) {
2057      await logEventAsync('tengu_bridge_multi_session_denied', {
2058        used_spawn: parsedSpawnMode !== undefined,
2059        used_capacity: parsedCapacity !== undefined,
2060        used_create_session_in_dir: parsedCreateSessionInDir !== undefined,
2061      })
2062      // logEventAsync only enqueues — process.exit() discards buffered events.
2063      // Flush explicitly, capped at 500ms to match gracefulShutdown.ts.
2064      // (sleep() doesn't unref its timer, but process.exit() follows immediately
2065      // so the ref'd timer can't delay shutdown.)
2066      await Promise.race([
2067        Promise.all([shutdown1PEventLogging(), shutdownDatadog()]),
2068        sleep(500, undefined, { unref: true }),
2069      ]).catch(() => {})
2070      // biome-ignore lint/suspicious/noConsole: intentional error output
2071      console.error(
2072        'Error: Multi-session Remote Control is not enabled for your account yet.',
2073      )
2074      // eslint-disable-next-line custom-rules/no-process-exit
2075      process.exit(1)
2076    }
2077  
2078    // Set the bootstrap CWD so that trust checks, project config lookups, and
2079    // git utilities (getBranch, getRemoteUrl) resolve against the correct path.
2080    const { setOriginalCwd, setCwdState } = await import('../bootstrap/state.js')
2081    setOriginalCwd(dir)
2082    setCwdState(dir)
2083  
2084    // The bridge bypasses main.tsx (which renders the interactive TrustDialog via showSetupScreens),
2085    // so we must verify trust was previously established by a normal `claude` session.
2086    if (!checkHasTrustDialogAccepted()) {
2087      // biome-ignore lint/suspicious/noConsole:: intentional console output
2088      console.error(
2089        `Error: Workspace not trusted. Please run \`claude\` in ${dir} first to review and accept the workspace trust dialog.`,
2090      )
2091      // eslint-disable-next-line custom-rules/no-process-exit
2092      process.exit(1)
2093    }
2094  
2095    // Resolve auth
2096    const { clearOAuthTokenCache, checkAndRefreshOAuthTokenIfNeeded } =
2097      await import('../utils/auth.js')
2098    const { getBridgeAccessToken, getBridgeBaseUrl } = await import(
2099      './bridgeConfig.js'
2100    )
2101  
2102    const bridgeToken = getBridgeAccessToken()
2103    if (!bridgeToken) {
2104      // biome-ignore lint/suspicious/noConsole:: intentional console output
2105      console.error(BRIDGE_LOGIN_ERROR)
2106      // eslint-disable-next-line custom-rules/no-process-exit
2107      process.exit(1)
2108    }
2109  
2110    // First-time remote dialog — explain what bridge does and get consent
2111    const {
2112      getGlobalConfig,
2113      saveGlobalConfig,
2114      getCurrentProjectConfig,
2115      saveCurrentProjectConfig,
2116    } = await import('../utils/config.js')
2117    if (!getGlobalConfig().remoteDialogSeen) {
2118      const readline = await import('readline')
2119      const rl = readline.createInterface({
2120        input: process.stdin,
2121        output: process.stdout,
2122      })
2123      // biome-ignore lint/suspicious/noConsole:: intentional console output
2124      console.log(
2125        '\nRemote Control lets you access this CLI session from the web (claude.ai/code)\nor the Claude app, so you can pick up where you left off on any device.\n\nYou can disconnect remote access anytime by running /remote-control again.\n',
2126      )
2127      const answer = await new Promise<string>(resolve => {
2128        rl.question('Enable Remote Control? (y/n) ', resolve)
2129      })
2130      rl.close()
2131      saveGlobalConfig(current => {
2132        if (current.remoteDialogSeen) return current
2133        return { ...current, remoteDialogSeen: true }
2134      })
2135      if (answer.toLowerCase() !== 'y' && answer.toLowerCase() !== 'yes') {
2136        // eslint-disable-next-line custom-rules/no-process-exit
2137        process.exit(0)
2138      }
2139    }
2140  
2141    // --continue: resolve the most recent session from the crash-recovery
2142    // pointer and chain into the #20460 --session-id flow. Worktree-aware:
2143    // checks current dir first (fast path, zero exec), then fans out to git
2144    // worktree siblings if that misses — the REPL bridge writes to
2145    // getOriginalCwd() which EnterWorktreeTool/activeWorktreeSession can
2146    // point at a worktree while the user's shell is at the repo root.
2147    // KAIROS-gated at parseArgs — continueSession is always false in external
2148    // builds, so this block tree-shakes.
2149    if (feature('KAIROS') && continueSession) {
2150      const { readBridgePointerAcrossWorktrees } = await import(
2151        './bridgePointer.js'
2152      )
2153      const found = await readBridgePointerAcrossWorktrees(dir)
2154      if (!found) {
2155        // biome-ignore lint/suspicious/noConsole: intentional error output
2156        console.error(
2157          `Error: No recent session found in this directory or its worktrees. Run \`claude remote-control\` to start a new one.`,
2158        )
2159        // eslint-disable-next-line custom-rules/no-process-exit
2160        process.exit(1)
2161      }
2162      const { pointer, dir: pointerDir } = found
2163      const ageMin = Math.round(pointer.ageMs / 60_000)
2164      const ageStr = ageMin < 60 ? `${ageMin}m` : `${Math.round(ageMin / 60)}h`
2165      const fromWt = pointerDir !== dir ? ` from worktree ${pointerDir}` : ''
2166      // biome-ignore lint/suspicious/noConsole: intentional info output
2167      console.error(
2168        `Resuming session ${pointer.sessionId} (${ageStr} ago)${fromWt}\u2026`,
2169      )
2170      resumeSessionId = pointer.sessionId
2171      // Track where the pointer came from so the #20460 exit(1) paths below
2172      // clear the RIGHT file on deterministic failure — otherwise --continue
2173      // would keep hitting the same dead session. May be a worktree sibling.
2174      resumePointerDir = pointerDir
2175    }
2176  
2177    // In production, baseUrl is the Anthropic API (from OAuth config).
2178    // CLAUDE_BRIDGE_BASE_URL overrides this for ant local dev only.
2179    const baseUrl = getBridgeBaseUrl()
2180  
2181    // For non-localhost targets, require HTTPS to protect credentials.
2182    if (
2183      baseUrl.startsWith('http://') &&
2184      !baseUrl.includes('localhost') &&
2185      !baseUrl.includes('127.0.0.1')
2186    ) {
2187      // biome-ignore lint/suspicious/noConsole:: intentional console output
2188      console.error(
2189        'Error: Remote Control base URL uses HTTP. Only HTTPS or localhost HTTP is allowed.',
2190      )
2191      // eslint-disable-next-line custom-rules/no-process-exit
2192      process.exit(1)
2193    }
2194  
2195    // Session ingress URL for WebSocket connections. In production this is the
2196    // same as baseUrl (Envoy routes /v1/session_ingress/* to session-ingress).
2197    // Locally, session-ingress runs on a different port (9413) than the
2198    // contain-provide-api (8211), so CLAUDE_BRIDGE_SESSION_INGRESS_URL must be
2199    // set explicitly. Ant-only, matching CLAUDE_BRIDGE_BASE_URL.
2200    const sessionIngressUrl =
2201      process.env.USER_TYPE === 'ant' &&
2202      process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
2203        ? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
2204        : baseUrl
2205  
2206    const { getBranch, getRemoteUrl, findGitRoot } = await import(
2207      '../utils/git.js'
2208    )
2209  
2210    // Precheck worktree availability for the first-run dialog and the `w`
2211    // toggle. Unconditional so we know upfront whether worktree is an option.
2212    const { hasWorktreeCreateHook } = await import('../utils/hooks.js')
2213    const worktreeAvailable = hasWorktreeCreateHook() || findGitRoot(dir) !== null
2214  
2215    // Load saved per-project spawn-mode preference. Gated by multiSessionEnabled
2216    // so a GrowthBook rollback cleanly reverts users to single-session —
2217    // otherwise a saved pref would silently re-enable multi-session behavior
2218    // (worktree isolation, 32 max sessions, w toggle) despite the gate being off.
2219    // Also guard against a stale worktree pref left over from when this dir WAS
2220    // a git repo (or the user copied config) — clear it on disk so the warning
2221    // doesn't repeat on every launch.
2222    let savedSpawnMode = multiSessionEnabled
2223      ? getCurrentProjectConfig().remoteControlSpawnMode
2224      : undefined
2225    if (savedSpawnMode === 'worktree' && !worktreeAvailable) {
2226      // biome-ignore lint/suspicious/noConsole: intentional warning output
2227      console.error(
2228        'Warning: Saved spawn mode is worktree but this directory is not a git repository. Falling back to same-dir.',
2229      )
2230      savedSpawnMode = undefined
2231      saveCurrentProjectConfig(current => {
2232        if (current.remoteControlSpawnMode === undefined) return current
2233        return { ...current, remoteControlSpawnMode: undefined }
2234      })
2235    }
2236  
2237    // First-run spawn-mode choice: ask once per project when the choice is
2238    // meaningful (gate on, both modes available, no explicit override, not
2239    // resuming). Saves to ProjectConfig so subsequent runs skip this.
2240    if (
2241      multiSessionEnabled &&
2242      !savedSpawnMode &&
2243      worktreeAvailable &&
2244      parsedSpawnMode === undefined &&
2245      !resumeSessionId &&
2246      process.stdin.isTTY
2247    ) {
2248      const readline = await import('readline')
2249      const rl = readline.createInterface({
2250        input: process.stdin,
2251        output: process.stdout,
2252      })
2253      // biome-ignore lint/suspicious/noConsole: intentional dialog output
2254      console.log(
2255        `\nClaude Remote Control is launching in spawn mode which lets you create new sessions in this project from Claude Code on Web or your Mobile app. Learn more here: https://code.claude.com/docs/en/remote-control\n\n` +
2256          `Spawn mode for this project:\n` +
2257          `  [1] same-dir \u2014 sessions share the current directory (default)\n` +
2258          `  [2] worktree \u2014 each session gets an isolated git worktree\n\n` +
2259          `This can be changed later or explicitly set with --spawn=same-dir or --spawn=worktree.\n`,
2260      )
2261      const answer = await new Promise<string>(resolve => {
2262        rl.question('Choose [1/2] (default: 1): ', resolve)
2263      })
2264      rl.close()
2265      const chosen: 'same-dir' | 'worktree' =
2266        answer.trim() === '2' ? 'worktree' : 'same-dir'
2267      savedSpawnMode = chosen
2268      logEvent('tengu_bridge_spawn_mode_chosen', {
2269        spawn_mode:
2270          chosen as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2271      })
2272      saveCurrentProjectConfig(current => {
2273        if (current.remoteControlSpawnMode === chosen) return current
2274        return { ...current, remoteControlSpawnMode: chosen }
2275      })
2276    }
2277  
2278    // Determine effective spawn mode.
2279    // Precedence: resume > explicit --spawn > saved project pref > gate default
2280    // - resuming via --continue / --session-id: always single-session (resume
2281    //   targets one specific session in its original directory)
2282    // - explicit --spawn flag: use that value directly (does not persist)
2283    // - saved ProjectConfig.remoteControlSpawnMode: set by first-run dialog or `w`
2284    // - default with gate on: same-dir (persistent multi-session, shared cwd)
2285    // - default with gate off: single-session (unchanged legacy behavior)
2286    // Track how spawn mode was determined, for rollout analytics.
2287    type SpawnModeSource = 'resume' | 'flag' | 'saved' | 'gate_default'
2288    let spawnModeSource: SpawnModeSource
2289    let spawnMode: SpawnMode
2290    if (resumeSessionId) {
2291      spawnMode = 'single-session'
2292      spawnModeSource = 'resume'
2293    } else if (parsedSpawnMode !== undefined) {
2294      spawnMode = parsedSpawnMode
2295      spawnModeSource = 'flag'
2296    } else if (savedSpawnMode !== undefined) {
2297      spawnMode = savedSpawnMode
2298      spawnModeSource = 'saved'
2299    } else {
2300      spawnMode = multiSessionEnabled ? 'same-dir' : 'single-session'
2301      spawnModeSource = 'gate_default'
2302    }
2303    const maxSessions =
2304      spawnMode === 'single-session'
2305        ? 1
2306        : (parsedCapacity ?? SPAWN_SESSIONS_DEFAULT)
2307    // Pre-create an empty session on start so the user has somewhere to type
2308    // immediately, running in the current directory (exempted from worktree
2309    // creation in the spawn loop). On by default; --no-create-session-in-dir
2310    // opts out for a pure on-demand server where every session is isolated.
2311    // The effectiveResumeSessionId guard at the creation site handles the
2312    // resume case (skip creation when resume succeeded; fall through to
2313    // fresh creation on env-mismatch fallback).
2314    const preCreateSession = parsedCreateSessionInDir ?? true
2315  
2316    // Without --continue: a leftover pointer means the previous run didn't
2317    // shut down cleanly (crash, kill -9, terminal closed). Clear it so the
2318    // stale env doesn't linger past its relevance. Runs in all modes
2319    // (clearBridgePointer is a no-op when no file exists) — covers the
2320    // gate-transition case where a user crashed in single-session mode then
2321    // starts fresh in worktree mode. Only single-session mode writes new
2322    // pointers.
2323    if (!resumeSessionId) {
2324      const { clearBridgePointer } = await import('./bridgePointer.js')
2325      await clearBridgePointer(dir)
2326    }
2327  
2328    // Worktree mode requires either git or WorktreeCreate/WorktreeRemove hooks.
2329    // Only reachable via explicit --spawn=worktree (default is same-dir);
2330    // saved worktree pref was already guarded above.
2331    if (spawnMode === 'worktree' && !worktreeAvailable) {
2332      // biome-ignore lint/suspicious/noConsole: intentional error output
2333      console.error(
2334        `Error: Worktree mode requires a git repository or WorktreeCreate hooks configured. Use --spawn=session for single-session mode.`,
2335      )
2336      // eslint-disable-next-line custom-rules/no-process-exit
2337      process.exit(1)
2338    }
2339  
2340    const branch = await getBranch()
2341    const gitRepoUrl = await getRemoteUrl()
2342    const machineName = hostname()
2343    const bridgeId = randomUUID()
2344  
2345    const { handleOAuth401Error } = await import('../utils/auth.js')
2346    const api = createBridgeApiClient({
2347      baseUrl,
2348      getAccessToken: getBridgeAccessToken,
2349      runnerVersion: MACRO.VERSION,
2350      onDebug: logForDebugging,
2351      onAuth401: handleOAuth401Error,
2352      getTrustedDeviceToken,
2353    })
2354  
2355    // When resuming a session via --session-id, fetch it to learn its
2356    // environment_id and reuse that for registration (idempotent on the
2357    // backend). Left undefined otherwise — the backend rejects
2358    // client-generated UUIDs and will allocate a fresh environment.
2359    // feature('KAIROS') gate: --session-id is ant-only; parseArgs already
2360    // rejects the flag when the gate is off, so resumeSessionId is always
2361    // undefined here in external builds — this guard is for tree-shaking.
2362    let reuseEnvironmentId: string | undefined
2363    if (feature('KAIROS') && resumeSessionId) {
2364      try {
2365        validateBridgeId(resumeSessionId, 'sessionId')
2366      } catch {
2367        // biome-ignore lint/suspicious/noConsole: intentional error output
2368        console.error(
2369          `Error: Invalid session ID "${resumeSessionId}". Session IDs must not contain unsafe characters.`,
2370        )
2371        // eslint-disable-next-line custom-rules/no-process-exit
2372        process.exit(1)
2373      }
2374      // Proactively refresh the OAuth token — getBridgeSession uses raw axios
2375      // without the withOAuthRetry 401-refresh logic. An expired-but-present
2376      // token would otherwise produce a misleading "not found" error.
2377      await checkAndRefreshOAuthTokenIfNeeded()
2378      clearOAuthTokenCache()
2379      const { getBridgeSession } = await import('./createSession.js')
2380      const session = await getBridgeSession(resumeSessionId, {
2381        baseUrl,
2382        getAccessToken: getBridgeAccessToken,
2383      })
2384      if (!session) {
2385        // Session gone on server → pointer is stale. Clear it so the user
2386        // isn't re-prompted next launch. (Explicit --session-id leaves the
2387        // pointer alone — it's an independent file they may not even have.)
2388        // resumePointerDir may be a worktree sibling — clear THAT file.
2389        if (resumePointerDir) {
2390          const { clearBridgePointer } = await import('./bridgePointer.js')
2391          await clearBridgePointer(resumePointerDir)
2392        }
2393        // biome-ignore lint/suspicious/noConsole: intentional error output
2394        console.error(
2395          `Error: Session ${resumeSessionId} not found. It may have been archived or expired, or your login may have lapsed (run \`claude /login\`).`,
2396        )
2397        // eslint-disable-next-line custom-rules/no-process-exit
2398        process.exit(1)
2399      }
2400      if (!session.environment_id) {
2401        if (resumePointerDir) {
2402          const { clearBridgePointer } = await import('./bridgePointer.js')
2403          await clearBridgePointer(resumePointerDir)
2404        }
2405        // biome-ignore lint/suspicious/noConsole: intentional error output
2406        console.error(
2407          `Error: Session ${resumeSessionId} has no environment_id. It may never have been attached to a bridge.`,
2408        )
2409        // eslint-disable-next-line custom-rules/no-process-exit
2410        process.exit(1)
2411      }
2412      reuseEnvironmentId = session.environment_id
2413      logForDebugging(
2414        `[bridge:init] Resuming session ${resumeSessionId} on environment ${reuseEnvironmentId}`,
2415      )
2416    }
2417  
2418    const config: BridgeConfig = {
2419      dir,
2420      machineName,
2421      branch,
2422      gitRepoUrl,
2423      maxSessions,
2424      spawnMode,
2425      verbose,
2426      sandbox,
2427      bridgeId,
2428      workerType: 'claude_code',
2429      environmentId: randomUUID(),
2430      reuseEnvironmentId,
2431      apiBaseUrl: baseUrl,
2432      sessionIngressUrl,
2433      debugFile,
2434      sessionTimeoutMs,
2435    }
2436  
2437    logForDebugging(
2438      `[bridge:init] bridgeId=${bridgeId}${reuseEnvironmentId ? ` reuseEnvironmentId=${reuseEnvironmentId}` : ''} dir=${dir} branch=${branch} gitRepoUrl=${gitRepoUrl} machine=${machineName}`,
2439    )
2440    logForDebugging(
2441      `[bridge:init] apiBaseUrl=${baseUrl} sessionIngressUrl=${sessionIngressUrl}`,
2442    )
2443    logForDebugging(
2444      `[bridge:init] sandbox=${sandbox}${debugFile ? ` debugFile=${debugFile}` : ''}`,
2445    )
2446  
2447    // Register the bridge environment before entering the poll loop.
2448    let environmentId: string
2449    let environmentSecret: string
2450    try {
2451      const reg = await api.registerBridgeEnvironment(config)
2452      environmentId = reg.environment_id
2453      environmentSecret = reg.environment_secret
2454    } catch (err) {
2455      logEvent('tengu_bridge_registration_failed', {
2456        status: err instanceof BridgeFatalError ? err.status : undefined,
2457      })
2458      // Registration failures are fatal — print a clean message instead of a stack trace.
2459      // biome-ignore lint/suspicious/noConsole:: intentional console output
2460      console.error(
2461        err instanceof BridgeFatalError && err.status === 404
2462          ? 'Remote Control environments are not available for your account.'
2463          : `Error: ${errorMessage(err)}`,
2464      )
2465      // eslint-disable-next-line custom-rules/no-process-exit
2466      process.exit(1)
2467    }
2468  
2469    // Tracks whether the --session-id resume flow completed successfully.
2470    // Used below to skip fresh session creation and seed initialSessionId.
2471    // Cleared on env mismatch so we gracefully fall back to a new session.
2472    let effectiveResumeSessionId: string | undefined
2473    if (feature('KAIROS') && resumeSessionId) {
2474      if (reuseEnvironmentId && environmentId !== reuseEnvironmentId) {
2475        // Backend returned a different environment_id — the original env
2476        // expired or was reaped. Reconnect won't work against the new env
2477        // (session is bound to the old one). Log to sentry for visibility
2478        // and fall through to fresh session creation on the new env.
2479        logError(
2480          new Error(
2481            `Bridge resume env mismatch: requested ${reuseEnvironmentId}, backend returned ${environmentId}. Falling back to fresh session.`,
2482          ),
2483        )
2484        // biome-ignore lint/suspicious/noConsole: intentional warning output
2485        console.warn(
2486          `Warning: Could not resume session ${resumeSessionId} — its environment has expired. Creating a fresh session instead.`,
2487        )
2488        // Don't deregister — we're going to use this new environment.
2489        // effectiveResumeSessionId stays undefined → fresh session path below.
2490      } else {
2491        // Force-stop any stale worker instances for this session and re-queue
2492        // it so our poll loop picks it up. Must happen after registration so
2493        // the backend knows a live worker exists for the environment.
2494        //
2495        // The pointer stores a session_* ID but /bridge/reconnect looks
2496        // sessions up by their infra tag (cse_*) when ccr_v2_compat_enabled
2497        // is on. Try both; the conversion is a no-op if already cse_*.
2498        const infraResumeId = toInfraSessionId(resumeSessionId)
2499        const reconnectCandidates =
2500          infraResumeId === resumeSessionId
2501            ? [resumeSessionId]
2502            : [resumeSessionId, infraResumeId]
2503        let reconnected = false
2504        let lastReconnectErr: unknown
2505        for (const candidateId of reconnectCandidates) {
2506          try {
2507            await api.reconnectSession(environmentId, candidateId)
2508            logForDebugging(
2509              `[bridge:init] Session ${candidateId} re-queued via bridge/reconnect`,
2510            )
2511            effectiveResumeSessionId = resumeSessionId
2512            reconnected = true
2513            break
2514          } catch (err) {
2515            lastReconnectErr = err
2516            logForDebugging(
2517              `[bridge:init] reconnectSession(${candidateId}) failed: ${errorMessage(err)}`,
2518            )
2519          }
2520        }
2521        if (!reconnected) {
2522          const err = lastReconnectErr
2523  
2524          // Do NOT deregister on transient reconnect failure — at this point
2525          // environmentId IS the session's own environment. Deregistering
2526          // would make retry impossible. The backend's 4h TTL cleans up.
2527          const isFatal = err instanceof BridgeFatalError
2528          // Clear pointer only on fatal reconnect failure. Transient failures
2529          // ("try running the same command again") should keep the pointer so
2530          // next launch re-prompts — that IS the retry mechanism.
2531          if (resumePointerDir && isFatal) {
2532            const { clearBridgePointer } = await import('./bridgePointer.js')
2533            await clearBridgePointer(resumePointerDir)
2534          }
2535          // biome-ignore lint/suspicious/noConsole: intentional error output
2536          console.error(
2537            isFatal
2538              ? `Error: ${errorMessage(err)}`
2539              : `Error: Failed to reconnect session ${resumeSessionId}: ${errorMessage(err)}\nThe session may still be resumable — try running the same command again.`,
2540          )
2541          // eslint-disable-next-line custom-rules/no-process-exit
2542          process.exit(1)
2543        }
2544      }
2545    }
2546  
2547    logForDebugging(
2548      `[bridge:init] Registered, server environmentId=${environmentId}`,
2549    )
2550    const startupPollConfig = getPollIntervalConfig()
2551    logEvent('tengu_bridge_started', {
2552      max_sessions: config.maxSessions,
2553      has_debug_file: !!config.debugFile,
2554      sandbox: config.sandbox,
2555      verbose: config.verbose,
2556      heartbeat_interval_ms:
2557        startupPollConfig.non_exclusive_heartbeat_interval_ms,
2558      spawn_mode:
2559        config.spawnMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2560      spawn_mode_source:
2561        spawnModeSource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2562      multi_session_gate: multiSessionEnabled,
2563      pre_create_session: preCreateSession,
2564      worktree_available: worktreeAvailable,
2565    })
2566    logForDiagnosticsNoPII('info', 'bridge_started', {
2567      max_sessions: config.maxSessions,
2568      sandbox: config.sandbox,
2569      spawn_mode: config.spawnMode,
2570    })
2571  
2572    const spawner = createSessionSpawner({
2573      execPath: process.execPath,
2574      scriptArgs: spawnScriptArgs(),
2575      env: process.env,
2576      verbose,
2577      sandbox,
2578      debugFile,
2579      permissionMode,
2580      onDebug: logForDebugging,
2581      onActivity: (sessionId, activity) => {
2582        logForDebugging(
2583          `[bridge:activity] sessionId=${sessionId} ${activity.type} ${activity.summary}`,
2584        )
2585      },
2586      onPermissionRequest: (sessionId, request, _accessToken) => {
2587        logForDebugging(
2588          `[bridge:perm] sessionId=${sessionId} tool=${request.request.tool_name} request_id=${request.request_id} (not auto-approving)`,
2589        )
2590      },
2591    })
2592  
2593    const logger = createBridgeLogger({ verbose })
2594    const { parseGitHubRepository } = await import('../utils/detectRepository.js')
2595    const ownerRepo = gitRepoUrl ? parseGitHubRepository(gitRepoUrl) : null
2596    // Use the repo name from the parsed owner/repo, or fall back to the dir basename
2597    const repoName = ownerRepo ? ownerRepo.split('/').pop()! : basename(dir)
2598    logger.setRepoInfo(repoName, branch)
2599  
2600    // `w` toggle is available iff we're in a multi-session mode AND worktree
2601    // is a valid option. When unavailable, the mode suffix and hint are hidden.
2602    const toggleAvailable = spawnMode !== 'single-session' && worktreeAvailable
2603    if (toggleAvailable) {
2604      // Safe cast: spawnMode is not single-session (checked above), and the
2605      // saved-worktree-in-non-git guard + exit check above ensure worktree
2606      // is only reached when available.
2607      logger.setSpawnModeDisplay(spawnMode as 'same-dir' | 'worktree')
2608    }
2609  
2610    // Listen for keys: space toggles QR code, w toggles spawn mode
2611    const onStdinData = (data: Buffer): void => {
2612      if (data[0] === 0x03 || data[0] === 0x04) {
2613        // Ctrl+C / Ctrl+D — trigger graceful shutdown
2614        process.emit('SIGINT')
2615        return
2616      }
2617      if (data[0] === 0x20 /* space */) {
2618        logger.toggleQr()
2619        return
2620      }
2621      if (data[0] === 0x77 /* 'w' */) {
2622        if (!toggleAvailable) return
2623        const newMode: 'same-dir' | 'worktree' =
2624          config.spawnMode === 'same-dir' ? 'worktree' : 'same-dir'
2625        config.spawnMode = newMode
2626        logEvent('tengu_bridge_spawn_mode_toggled', {
2627          spawn_mode:
2628            newMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2629        })
2630        logger.logStatus(
2631          newMode === 'worktree'
2632            ? 'Spawn mode: worktree (new sessions get isolated git worktrees)'
2633            : 'Spawn mode: same-dir (new sessions share the current directory)',
2634        )
2635        logger.setSpawnModeDisplay(newMode)
2636        logger.refreshDisplay()
2637        saveCurrentProjectConfig(current => {
2638          if (current.remoteControlSpawnMode === newMode) return current
2639          return { ...current, remoteControlSpawnMode: newMode }
2640        })
2641        return
2642      }
2643    }
2644    if (process.stdin.isTTY) {
2645      process.stdin.setRawMode(true)
2646      process.stdin.resume()
2647      process.stdin.on('data', onStdinData)
2648    }
2649  
2650    const controller = new AbortController()
2651    const onSigint = (): void => {
2652      logForDebugging('[bridge:shutdown] SIGINT received, shutting down')
2653      controller.abort()
2654    }
2655    const onSigterm = (): void => {
2656      logForDebugging('[bridge:shutdown] SIGTERM received, shutting down')
2657      controller.abort()
2658    }
2659    process.on('SIGINT', onSigint)
2660    process.on('SIGTERM', onSigterm)
2661  
2662    // Auto-create an empty session so the user has somewhere to type
2663    // immediately (matching /remote-control behavior). Controlled by
2664    // preCreateSession: on by default; --no-create-session-in-dir opts out.
2665    // When a --session-id resume succeeded, skip creation entirely — the
2666    // session already exists and bridge/reconnect has re-queued it.
2667    // When resume was requested but failed on env mismatch, effectiveResumeSessionId
2668    // is undefined, so we fall through to fresh session creation (honoring the
2669    // "Creating a fresh session instead" warning printed above).
2670    let initialSessionId: string | null =
2671      feature('KAIROS') && effectiveResumeSessionId
2672        ? effectiveResumeSessionId
2673        : null
2674    if (preCreateSession && !(feature('KAIROS') && effectiveResumeSessionId)) {
2675      const { createBridgeSession } = await import('./createSession.js')
2676      try {
2677        initialSessionId = await createBridgeSession({
2678          environmentId,
2679          title: name,
2680          events: [],
2681          gitRepoUrl,
2682          branch,
2683          signal: controller.signal,
2684          baseUrl,
2685          getAccessToken: getBridgeAccessToken,
2686          permissionMode,
2687        })
2688        if (initialSessionId) {
2689          logForDebugging(
2690            `[bridge:init] Created initial session ${initialSessionId}`,
2691          )
2692        }
2693      } catch (err) {
2694        logForDebugging(
2695          `[bridge:init] Session creation failed (non-fatal): ${errorMessage(err)}`,
2696        )
2697      }
2698    }
2699  
2700    // Crash-recovery pointer: write immediately so kill -9 at any point
2701    // after this leaves a recoverable trail. Covers both fresh sessions and
2702    // resumed ones (so a second crash after resume is still recoverable).
2703    // Cleared when runBridgeLoop falls through to archive+deregister; left in
2704    // place on the SIGINT resumable-shutdown return (backup for when the user
2705    // closes the terminal before copying the printed --session-id hint).
2706    // Refreshed hourly so a 5h+ session that crashes still has a fresh
2707    // pointer (staleness checks file mtime, backend TTL is rolling-from-poll).
2708    let pointerRefreshTimer: ReturnType<typeof setInterval> | null = null
2709    // Single-session only: --continue forces single-session mode on resume,
2710    // so a pointer written in multi-session mode would contradict the user's
2711    // config when they try to resume. The resumable-shutdown path is also
2712    // gated to single-session (line ~1254) so the pointer would be orphaned.
2713    if (initialSessionId && spawnMode === 'single-session') {
2714      const { writeBridgePointer } = await import('./bridgePointer.js')
2715      const pointerPayload = {
2716        sessionId: initialSessionId,
2717        environmentId,
2718        source: 'standalone' as const,
2719      }
2720      await writeBridgePointer(config.dir, pointerPayload)
2721      pointerRefreshTimer = setInterval(
2722        writeBridgePointer,
2723        60 * 60 * 1000,
2724        config.dir,
2725        pointerPayload,
2726      )
2727      // Don't let the interval keep the process alive on its own.
2728      pointerRefreshTimer.unref?.()
2729    }
2730  
2731    try {
2732      await runBridgeLoop(
2733        config,
2734        environmentId,
2735        environmentSecret,
2736        api,
2737        spawner,
2738        logger,
2739        controller.signal,
2740        undefined,
2741        initialSessionId ?? undefined,
2742        async () => {
2743          // Clear the memoized OAuth token cache so we re-read from secure
2744          // storage, picking up tokens refreshed by child processes.
2745          clearOAuthTokenCache()
2746          // Proactively refresh the token if it's expired on disk too.
2747          await checkAndRefreshOAuthTokenIfNeeded()
2748          return getBridgeAccessToken()
2749        },
2750      )
2751    } finally {
2752      if (pointerRefreshTimer !== null) {
2753        clearInterval(pointerRefreshTimer)
2754      }
2755      process.off('SIGINT', onSigint)
2756      process.off('SIGTERM', onSigterm)
2757      process.stdin.off('data', onStdinData)
2758      if (process.stdin.isTTY) {
2759        process.stdin.setRawMode(false)
2760      }
2761      process.stdin.pause()
2762    }
2763  
2764    // The bridge bypasses init.ts (and its graceful shutdown handler), so we
2765    // must exit explicitly.
2766    // eslint-disable-next-line custom-rules/no-process-exit
2767    process.exit(0)
2768  }
2769  
2770  // ─── Headless bridge (daemon worker) ────────────────────────────────────────
2771  
2772  /**
2773   * Thrown by runBridgeHeadless for configuration issues the supervisor should
2774   * NOT retry (trust not accepted, worktree unavailable, http-not-https). The
2775   * daemon worker catches this and exits with EXIT_CODE_PERMANENT so the
2776   * supervisor parks the worker instead of respawning it on backoff.
2777   */
2778  export class BridgeHeadlessPermanentError extends Error {
2779    constructor(message: string) {
2780      super(message)
2781      this.name = 'BridgeHeadlessPermanentError'
2782    }
2783  }
2784  
2785  export type HeadlessBridgeOpts = {
2786    dir: string
2787    name?: string
2788    spawnMode: 'same-dir' | 'worktree'
2789    capacity: number
2790    permissionMode?: string
2791    sandbox: boolean
2792    sessionTimeoutMs?: number
2793    createSessionOnStart: boolean
2794    getAccessToken: () => string | undefined
2795    onAuth401: (failedToken: string) => Promise<boolean>
2796    log: (s: string) => void
2797  }
2798  
2799  /**
2800   * Non-interactive bridge entrypoint for the `remoteControl` daemon worker.
2801   *
2802   * Linear subset of bridgeMain(): no readline dialogs, no stdin key handlers,
2803   * no TUI, no process.exit(). Config comes from the caller (daemon.json), auth
2804   * comes via IPC (supervisor's AuthManager), logs go to the worker's stdout
2805   * pipe. Throws on fatal errors — the worker catches and maps permanent vs
2806   * transient to the right exit code.
2807   *
2808   * Resolves cleanly when `signal` aborts and the poll loop tears down.
2809   */
2810  export async function runBridgeHeadless(
2811    opts: HeadlessBridgeOpts,
2812    signal: AbortSignal,
2813  ): Promise<void> {
2814    const { dir, log } = opts
2815  
2816    // Worker inherits the supervisor's CWD. chdir first so git utilities
2817    // (getBranch/getRemoteUrl) — which read from bootstrap CWD state set
2818    // below — resolve against the right repo.
2819    process.chdir(dir)
2820    const { setOriginalCwd, setCwdState } = await import('../bootstrap/state.js')
2821    setOriginalCwd(dir)
2822    setCwdState(dir)
2823  
2824    const { enableConfigs, checkHasTrustDialogAccepted } = await import(
2825      '../utils/config.js'
2826    )
2827    enableConfigs()
2828    const { initSinks } = await import('../utils/sinks.js')
2829    initSinks()
2830  
2831    if (!checkHasTrustDialogAccepted()) {
2832      throw new BridgeHeadlessPermanentError(
2833        `Workspace not trusted: ${dir}. Run \`claude\` in that directory first to accept the trust dialog.`,
2834      )
2835    }
2836  
2837    if (!opts.getAccessToken()) {
2838      // Transient — supervisor's AuthManager may pick up a token on next cycle.
2839      throw new Error(BRIDGE_LOGIN_ERROR)
2840    }
2841  
2842    const { getBridgeBaseUrl } = await import('./bridgeConfig.js')
2843    const baseUrl = getBridgeBaseUrl()
2844    if (
2845      baseUrl.startsWith('http://') &&
2846      !baseUrl.includes('localhost') &&
2847      !baseUrl.includes('127.0.0.1')
2848    ) {
2849      throw new BridgeHeadlessPermanentError(
2850        'Remote Control base URL uses HTTP. Only HTTPS or localhost HTTP is allowed.',
2851      )
2852    }
2853    const sessionIngressUrl =
2854      process.env.USER_TYPE === 'ant' &&
2855      process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
2856        ? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
2857        : baseUrl
2858  
2859    const { getBranch, getRemoteUrl, findGitRoot } = await import(
2860      '../utils/git.js'
2861    )
2862    const { hasWorktreeCreateHook } = await import('../utils/hooks.js')
2863  
2864    if (opts.spawnMode === 'worktree') {
2865      const worktreeAvailable =
2866        hasWorktreeCreateHook() || findGitRoot(dir) !== null
2867      if (!worktreeAvailable) {
2868        throw new BridgeHeadlessPermanentError(
2869          `Worktree mode requires a git repository or WorktreeCreate hooks. Directory ${dir} has neither.`,
2870        )
2871      }
2872    }
2873  
2874    const branch = await getBranch()
2875    const gitRepoUrl = await getRemoteUrl()
2876    const machineName = hostname()
2877    const bridgeId = randomUUID()
2878  
2879    const config: BridgeConfig = {
2880      dir,
2881      machineName,
2882      branch,
2883      gitRepoUrl,
2884      maxSessions: opts.capacity,
2885      spawnMode: opts.spawnMode,
2886      verbose: false,
2887      sandbox: opts.sandbox,
2888      bridgeId,
2889      workerType: 'claude_code',
2890      environmentId: randomUUID(),
2891      apiBaseUrl: baseUrl,
2892      sessionIngressUrl,
2893      sessionTimeoutMs: opts.sessionTimeoutMs,
2894    }
2895  
2896    const api = createBridgeApiClient({
2897      baseUrl,
2898      getAccessToken: opts.getAccessToken,
2899      runnerVersion: MACRO.VERSION,
2900      onDebug: log,
2901      onAuth401: opts.onAuth401,
2902      getTrustedDeviceToken,
2903    })
2904  
2905    let environmentId: string
2906    let environmentSecret: string
2907    try {
2908      const reg = await api.registerBridgeEnvironment(config)
2909      environmentId = reg.environment_id
2910      environmentSecret = reg.environment_secret
2911    } catch (err) {
2912      // Transient — let supervisor backoff-retry.
2913      throw new Error(`Bridge registration failed: ${errorMessage(err)}`)
2914    }
2915  
2916    const spawner = createSessionSpawner({
2917      execPath: process.execPath,
2918      scriptArgs: spawnScriptArgs(),
2919      env: process.env,
2920      verbose: false,
2921      sandbox: opts.sandbox,
2922      permissionMode: opts.permissionMode,
2923      onDebug: log,
2924    })
2925  
2926    const logger = createHeadlessBridgeLogger(log)
2927    logger.printBanner(config, environmentId)
2928  
2929    let initialSessionId: string | undefined
2930    if (opts.createSessionOnStart) {
2931      const { createBridgeSession } = await import('./createSession.js')
2932      try {
2933        const sid = await createBridgeSession({
2934          environmentId,
2935          title: opts.name,
2936          events: [],
2937          gitRepoUrl,
2938          branch,
2939          signal,
2940          baseUrl,
2941          getAccessToken: opts.getAccessToken,
2942          permissionMode: opts.permissionMode,
2943        })
2944        if (sid) {
2945          initialSessionId = sid
2946          log(`created initial session ${sid}`)
2947        }
2948      } catch (err) {
2949        log(`session pre-creation failed (non-fatal): ${errorMessage(err)}`)
2950      }
2951    }
2952  
2953    await runBridgeLoop(
2954      config,
2955      environmentId,
2956      environmentSecret,
2957      api,
2958      spawner,
2959      logger,
2960      signal,
2961      undefined,
2962      initialSessionId,
2963      async () => opts.getAccessToken(),
2964    )
2965  }
2966  
2967  /** BridgeLogger adapter that routes everything to a single line-log fn. */
2968  function createHeadlessBridgeLogger(log: (s: string) => void): BridgeLogger {
2969    const noop = (): void => {}
2970    return {
2971      printBanner: (cfg, envId) =>
2972        log(
2973          `registered environmentId=${envId} dir=${cfg.dir} spawnMode=${cfg.spawnMode} capacity=${cfg.maxSessions}`,
2974        ),
2975      logSessionStart: (id, _prompt) => log(`session start ${id}`),
2976      logSessionComplete: (id, ms) => log(`session complete ${id} (${ms}ms)`),
2977      logSessionFailed: (id, err) => log(`session failed ${id}: ${err}`),
2978      logStatus: log,
2979      logVerbose: log,
2980      logError: s => log(`error: ${s}`),
2981      logReconnected: ms => log(`reconnected after ${ms}ms`),
2982      addSession: (id, _url) => log(`session attached ${id}`),
2983      removeSession: id => log(`session detached ${id}`),
2984      updateIdleStatus: noop,
2985      updateReconnectingStatus: noop,
2986      updateSessionStatus: noop,
2987      updateSessionActivity: noop,
2988      updateSessionCount: noop,
2989      updateFailedStatus: noop,
2990      setSpawnModeDisplay: noop,
2991      setRepoInfo: noop,
2992      setDebugLogPath: noop,
2993      setAttached: noop,
2994      setSessionTitle: noop,
2995      clearStatus: noop,
2996      toggleQr: noop,
2997      refreshDisplay: noop,
2998    }
2999  }