/ upstreamproxy / upstreamproxy.ts
upstreamproxy.ts
  1  /**
  2   * CCR upstreamproxy — container-side wiring.
  3   *
  4   * When running inside a CCR session container with upstreamproxy configured,
  5   * this module:
  6   *   1. Reads the session token from /run/ccr/session_token
  7   *   2. Sets prctl(PR_SET_DUMPABLE, 0) to block same-UID ptrace of the heap
  8   *   3. Downloads the upstreamproxy CA cert and concatenates it with the
  9   *      system bundle so curl/gh/python trust the MITM proxy
 10   *   4. Starts a local CONNECT→WebSocket relay (see relay.ts)
 11   *   5. Unlinks the token file (token stays heap-only; file is gone before
 12   *      the agent loop can see it, but only after the relay is confirmed up
 13   *      so a supervisor restart can retry)
 14   *   6. Exposes HTTPS_PROXY / SSL_CERT_FILE env vars for all agent subprocesses
 15   *
 16   * Every step fails open: any error logs a warning and disables the proxy.
 17   * A broken proxy setup must never break an otherwise-working session.
 18   *
 19   * Design doc: api-go/ccr/docs/plans/CCR_AUTH_DESIGN.md § "Week-1 pilot scope".
 20   */
 21  
 22  import { mkdir, readFile, unlink, writeFile } from 'fs/promises'
 23  import { homedir } from 'os'
 24  import { join } from 'path'
 25  import { registerCleanup } from '../utils/cleanupRegistry.js'
 26  import { logForDebugging } from '../utils/debug.js'
 27  import { isEnvTruthy } from '../utils/envUtils.js'
 28  import { isENOENT } from '../utils/errors.js'
 29  import { startUpstreamProxyRelay } from './relay.js'
 30  
 31  export const SESSION_TOKEN_PATH = '/run/ccr/session_token'
 32  const SYSTEM_CA_BUNDLE = '/etc/ssl/certs/ca-certificates.crt'
 33  
 34  // Hosts the proxy must NOT intercept. Covers loopback, RFC1918, the IMDS
 35  // range, and the package registries + GitHub that CCR containers already
 36  // reach directly. Mirrors airlock/scripts/sandbox-shell-ccr.sh.
 37  const NO_PROXY_LIST = [
 38    'localhost',
 39    '127.0.0.1',
 40    '::1',
 41    '169.254.0.0/16',
 42    '10.0.0.0/8',
 43    '172.16.0.0/12',
 44    '192.168.0.0/16',
 45    // Anthropic API: no upstream route will ever match, and the MITM breaks
 46    // non-Bun runtimes (Python httpx/certifi doesn't trust the forged CA).
 47    // Three forms because NO_PROXY parsing differs across runtimes:
 48    //   *.anthropic.com  — Bun, curl, Go (glob match)
 49    //   .anthropic.com   — Python urllib/httpx (suffix match, strips leading dot)
 50    //   anthropic.com    — apex domain fallback
 51    'anthropic.com',
 52    '.anthropic.com',
 53    '*.anthropic.com',
 54    'github.com',
 55    'api.github.com',
 56    '*.github.com',
 57    '*.githubusercontent.com',
 58    'registry.npmjs.org',
 59    'pypi.org',
 60    'files.pythonhosted.org',
 61    'index.crates.io',
 62    'proxy.golang.org',
 63  ].join(',')
 64  
 65  type UpstreamProxyState = {
 66    enabled: boolean
 67    port?: number
 68    caBundlePath?: string
 69  }
 70  
 71  let state: UpstreamProxyState = { enabled: false }
 72  
 73  /**
 74   * Initialize upstreamproxy. Called once from init.ts. Safe to call when the
 75   * feature is off or the token file is absent — returns {enabled: false}.
 76   *
 77   * Overridable paths are for tests; production uses the defaults.
 78   */
 79  export async function initUpstreamProxy(opts?: {
 80    tokenPath?: string
 81    systemCaPath?: string
 82    caBundlePath?: string
 83    ccrBaseUrl?: string
 84  }): Promise<UpstreamProxyState> {
 85    if (!isEnvTruthy(process.env.CLAUDE_CODE_REMOTE)) {
 86      return state
 87    }
 88    // CCR evaluates ccr_upstream_proxy_enabled server-side (where GrowthBook is
 89    // warm) and injects this env var via StartupContext.EnvironmentVariables.
 90    // Every CCR session is a fresh container with no GB cache, so a client-side
 91    // GB check here always returned the default (false).
 92    if (!isEnvTruthy(process.env.CCR_UPSTREAM_PROXY_ENABLED)) {
 93      return state
 94    }
 95  
 96    const sessionId = process.env.CLAUDE_CODE_REMOTE_SESSION_ID
 97    if (!sessionId) {
 98      logForDebugging(
 99        '[upstreamproxy] CLAUDE_CODE_REMOTE_SESSION_ID unset; proxy disabled',
100        { level: 'warn' },
101      )
102      return state
103    }
104  
105    const tokenPath = opts?.tokenPath ?? SESSION_TOKEN_PATH
106    const token = await readToken(tokenPath)
107    if (!token) {
108      logForDebugging('[upstreamproxy] no session token file; proxy disabled')
109      return state
110    }
111  
112    setNonDumpable()
113  
114    // CCR injects ANTHROPIC_BASE_URL via StartupContext (sessionExecutor.ts /
115    // sessionHandler.ts). getOauthConfig() is wrong here: it keys off
116    // USER_TYPE + USE_{LOCAL,STAGING}_OAUTH, none of which the container sets,
117    // so it always returned the prod URL and the CA fetch 404'd.
118    const baseUrl =
119      opts?.ccrBaseUrl ??
120      process.env.ANTHROPIC_BASE_URL ??
121      'https://api.anthropic.com'
122    const caBundlePath =
123      opts?.caBundlePath ?? join(homedir(), '.ccr', 'ca-bundle.crt')
124  
125    const caOk = await downloadCaBundle(
126      baseUrl,
127      opts?.systemCaPath ?? SYSTEM_CA_BUNDLE,
128      caBundlePath,
129    )
130    if (!caOk) return state
131  
132    try {
133      const wsUrl = baseUrl.replace(/^http/, 'ws') + '/v1/code/upstreamproxy/ws'
134      const relay = await startUpstreamProxyRelay({ wsUrl, sessionId, token })
135      registerCleanup(async () => relay.stop())
136      state = { enabled: true, port: relay.port, caBundlePath }
137      logForDebugging(`[upstreamproxy] enabled on 127.0.0.1:${relay.port}`)
138      // Only unlink after the listener is up: if CA download or listen()
139      // fails, a supervisor restart can retry with the token still on disk.
140      await unlink(tokenPath).catch(() => {
141        logForDebugging('[upstreamproxy] token file unlink failed', {
142          level: 'warn',
143        })
144      })
145    } catch (err) {
146      logForDebugging(
147        `[upstreamproxy] relay start failed: ${err instanceof Error ? err.message : String(err)}; proxy disabled`,
148        { level: 'warn' },
149      )
150    }
151  
152    return state
153  }
154  
155  /**
156   * Env vars to merge into every agent subprocess. Empty when the proxy is
157   * disabled. Called from subprocessEnv() so Bash/MCP/LSP/hooks all inherit
158   * the same recipe.
159   */
160  export function getUpstreamProxyEnv(): Record<string, string> {
161    if (!state.enabled || !state.port || !state.caBundlePath) {
162      // Child CLI processes can't re-initialize the relay (token file was
163      // unlinked by the parent), but the parent's relay is still running and
164      // reachable at 127.0.0.1:<port>. If we inherited proxy vars from the
165      // parent (HTTPS_PROXY + SSL_CERT_FILE both set), pass them through so
166      // our subprocesses also route through the parent's relay.
167      if (process.env.HTTPS_PROXY && process.env.SSL_CERT_FILE) {
168        const inherited: Record<string, string> = {}
169        for (const key of [
170          'HTTPS_PROXY',
171          'https_proxy',
172          'NO_PROXY',
173          'no_proxy',
174          'SSL_CERT_FILE',
175          'NODE_EXTRA_CA_CERTS',
176          'REQUESTS_CA_BUNDLE',
177          'CURL_CA_BUNDLE',
178        ]) {
179          if (process.env[key]) inherited[key] = process.env[key]
180        }
181        return inherited
182      }
183      return {}
184    }
185    const proxyUrl = `http://127.0.0.1:${state.port}`
186    // HTTPS only: the relay handles CONNECT and nothing else. Plain HTTP has
187    // no credentials to inject, so routing it through the relay would just
188    // break the request with a 405.
189    return {
190      HTTPS_PROXY: proxyUrl,
191      https_proxy: proxyUrl,
192      NO_PROXY: NO_PROXY_LIST,
193      no_proxy: NO_PROXY_LIST,
194      SSL_CERT_FILE: state.caBundlePath,
195      NODE_EXTRA_CA_CERTS: state.caBundlePath,
196      REQUESTS_CA_BUNDLE: state.caBundlePath,
197      CURL_CA_BUNDLE: state.caBundlePath,
198    }
199  }
200  
201  /** Test-only: reset module state between test cases. */
202  export function resetUpstreamProxyForTests(): void {
203    state = { enabled: false }
204  }
205  
206  async function readToken(path: string): Promise<string | null> {
207    try {
208      const raw = await readFile(path, 'utf8')
209      return raw.trim() || null
210    } catch (err) {
211      if (isENOENT(err)) return null
212      logForDebugging(
213        `[upstreamproxy] token read failed: ${err instanceof Error ? err.message : String(err)}`,
214        { level: 'warn' },
215      )
216      return null
217    }
218  }
219  
220  /**
221   * prctl(PR_SET_DUMPABLE, 0) via libc FFI. Blocks same-UID ptrace of this
222   * process, so a prompt-injected `gdb -p $PPID` can't scrape the token from
223   * the heap. Linux-only; silently no-ops elsewhere.
224   */
225  function setNonDumpable(): void {
226    if (process.platform !== 'linux' || typeof Bun === 'undefined') return
227    try {
228      // eslint-disable-next-line @typescript-eslint/no-require-imports
229      const ffi = require('bun:ffi') as typeof import('bun:ffi')
230      const lib = ffi.dlopen('libc.so.6', {
231        prctl: {
232          args: ['int', 'u64', 'u64', 'u64', 'u64'],
233          returns: 'int',
234        },
235      } as const)
236      const PR_SET_DUMPABLE = 4
237      const rc = lib.symbols.prctl(PR_SET_DUMPABLE, 0n, 0n, 0n, 0n)
238      if (rc !== 0) {
239        logForDebugging(
240          '[upstreamproxy] prctl(PR_SET_DUMPABLE,0) returned nonzero',
241          {
242            level: 'warn',
243          },
244        )
245      }
246    } catch (err) {
247      logForDebugging(
248        `[upstreamproxy] prctl unavailable: ${err instanceof Error ? err.message : String(err)}`,
249        { level: 'warn' },
250      )
251    }
252  }
253  
254  async function downloadCaBundle(
255    baseUrl: string,
256    systemCaPath: string,
257    outPath: string,
258  ): Promise<boolean> {
259    try {
260      // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
261      const resp = await fetch(`${baseUrl}/v1/code/upstreamproxy/ca-cert`, {
262        // Bun has no default fetch timeout — a hung endpoint would block CLI
263        // startup forever. 5s is generous for a small PEM.
264        signal: AbortSignal.timeout(5000),
265      })
266      if (!resp.ok) {
267        logForDebugging(
268          `[upstreamproxy] ca-cert fetch ${resp.status}; proxy disabled`,
269          { level: 'warn' },
270        )
271        return false
272      }
273      const ccrCa = await resp.text()
274      const systemCa = await readFile(systemCaPath, 'utf8').catch(() => '')
275      await mkdir(join(outPath, '..'), { recursive: true })
276      await writeFile(outPath, systemCa + '\n' + ccrCa, 'utf8')
277      return true
278    } catch (err) {
279      logForDebugging(
280        `[upstreamproxy] ca-cert download failed: ${err instanceof Error ? err.message : String(err)}; proxy disabled`,
281        { level: 'warn' },
282      )
283      return false
284    }
285  }