test_codex_cloudflare_headers.py
1 """Regression guard: Codex Cloudflare 403 mitigation headers. 2 3 The ``chatgpt.com/backend-api/codex`` endpoint sits behind a Cloudflare layer 4 that whitelists a small set of first-party originators (``codex_cli_rs``, 5 ``codex_vscode``, ``codex_sdk_ts``, ``Codex*``). Requests from non-residential 6 IPs (VPS, always-on servers, some corporate egress) that don't advertise an 7 allowed originator are served 403 with ``cf-mitigated: challenge`` regardless 8 of auth correctness. 9 10 ``_codex_cloudflare_headers`` in ``agent.auxiliary_client`` centralizes the 11 header set so the primary chat client (``run_agent.AIAgent.__init__`` + 12 ``_apply_client_headers_for_base_url``) and the auxiliary client paths 13 (``_build_codex_client`` and the ``raw_codex`` branch of ``resolve_provider_client``) 14 all emit the same headers. 15 16 These tests pin: 17 - the originator value (must be ``codex_cli_rs`` — the whitelisted one) 18 - the User-Agent shape (codex_cli_rs-prefixed) 19 - ``ChatGPT-Account-ID`` extraction from the OAuth JWT (canonical casing, 20 from codex-rs ``auth.rs``) 21 - graceful handling of malformed tokens (drop the account-ID header, don't 22 raise) 23 - primary-client wiring at both entry points in ``run_agent.py`` 24 - aux-client wiring at both entry points in ``agent/auxiliary_client.py`` 25 """ 26 from __future__ import annotations 27 28 import base64 29 import json 30 from unittest.mock import MagicMock, patch 31 32 import pytest 33 34 35 # --------------------------------------------------------------------------- 36 # Fixtures 37 # --------------------------------------------------------------------------- 38 39 def _make_codex_jwt(account_id: str = "acct-test-123") -> str: 40 """Build a syntactically valid Codex-style JWT with the account_id claim.""" 41 def b64url(data: bytes) -> str: 42 return base64.urlsafe_b64encode(data).rstrip(b"=").decode() 43 header = b64url(b'{"alg":"RS256","typ":"JWT"}') 44 claims = { 45 "sub": "user-xyz", 46 "exp": 9999999999, 47 "https://api.openai.com/auth": { 48 "chatgpt_account_id": account_id, 49 "chatgpt_plan_type": "plus", 50 }, 51 } 52 payload = b64url(json.dumps(claims).encode()) 53 sig = b64url(b"fake-sig") 54 return f"{header}.{payload}.{sig}" 55 56 57 # --------------------------------------------------------------------------- 58 # _codex_cloudflare_headers — the shared helper 59 # --------------------------------------------------------------------------- 60 61 class TestCodexCloudflareHeaders: 62 def test_originator_is_codex_cli_rs(self): 63 """Cloudflare whitelists codex_cli_rs — any other value is 403'd.""" 64 from agent.auxiliary_client import _codex_cloudflare_headers 65 headers = _codex_cloudflare_headers(_make_codex_jwt()) 66 assert headers["originator"] == "codex_cli_rs" 67 68 def test_user_agent_advertises_codex_cli_rs(self): 69 from agent.auxiliary_client import _codex_cloudflare_headers 70 headers = _codex_cloudflare_headers(_make_codex_jwt()) 71 assert headers["User-Agent"].startswith("codex_cli_rs/") 72 73 def test_account_id_extracted_from_jwt(self): 74 from agent.auxiliary_client import _codex_cloudflare_headers 75 headers = _codex_cloudflare_headers(_make_codex_jwt("acct-abc-999")) 76 # Canonical casing — matches codex-rs auth.rs 77 assert headers["ChatGPT-Account-ID"] == "acct-abc-999" 78 79 def test_canonical_header_casing(self): 80 """Upstream codex-rs uses PascalCase with trailing -ID. Match exactly.""" 81 from agent.auxiliary_client import _codex_cloudflare_headers 82 headers = _codex_cloudflare_headers(_make_codex_jwt()) 83 assert "ChatGPT-Account-ID" in headers 84 # The lowercase/titlecase variants MUST NOT be used — pin to be explicit 85 assert "chatgpt-account-id" not in headers 86 assert "ChatGPT-Account-Id" not in headers 87 88 def test_malformed_token_drops_account_id_without_raising(self): 89 from agent.auxiliary_client import _codex_cloudflare_headers 90 for bad in ["not-a-jwt", "", "only.one", " ", "...."]: 91 headers = _codex_cloudflare_headers(bad) 92 # Still returns base headers — never raises 93 assert headers["originator"] == "codex_cli_rs" 94 assert "ChatGPT-Account-ID" not in headers 95 96 def test_non_string_token_handled(self): 97 from agent.auxiliary_client import _codex_cloudflare_headers 98 headers = _codex_cloudflare_headers(None) # type: ignore[arg-type] 99 assert headers["originator"] == "codex_cli_rs" 100 assert "ChatGPT-Account-ID" not in headers 101 102 def test_jwt_without_chatgpt_account_id_claim(self): 103 """A valid JWT that lacks the account_id claim should still return headers.""" 104 from agent.auxiliary_client import _codex_cloudflare_headers 105 import base64 as _b64, json as _json 106 107 def b64url(data: bytes) -> str: 108 return _b64.urlsafe_b64encode(data).rstrip(b"=").decode() 109 payload = b64url(_json.dumps({"sub": "user-xyz", "exp": 9999999999}).encode()) 110 token = f"{b64url(b'{}')}.{payload}.{b64url(b'sig')}" 111 headers = _codex_cloudflare_headers(token) 112 assert headers["originator"] == "codex_cli_rs" 113 assert "ChatGPT-Account-ID" not in headers 114 115 116 # --------------------------------------------------------------------------- 117 # Primary chat client wiring (run_agent.AIAgent) 118 # --------------------------------------------------------------------------- 119 120 class TestPrimaryClientWiring: 121 def test_init_wires_codex_headers_for_chatgpt_base_url(self): 122 from run_agent import AIAgent 123 token = _make_codex_jwt("acct-primary-init") 124 with patch("run_agent.OpenAI") as mock_openai: 125 mock_openai.return_value = MagicMock() 126 AIAgent( 127 api_key=token, 128 base_url="https://chatgpt.com/backend-api/codex", 129 provider="openai-codex", 130 model="gpt-5.4", 131 quiet_mode=True, 132 skip_context_files=True, 133 skip_memory=True, 134 ) 135 headers = mock_openai.call_args.kwargs.get("default_headers") or {} 136 assert headers.get("originator") == "codex_cli_rs" 137 assert headers.get("ChatGPT-Account-ID") == "acct-primary-init" 138 assert headers.get("User-Agent", "").startswith("codex_cli_rs/") 139 140 def test_apply_client_headers_on_base_url_change(self): 141 """Credential-rotation / base-url change path must also emit codex headers.""" 142 from run_agent import AIAgent 143 token = _make_codex_jwt("acct-rotation") 144 with patch("run_agent.OpenAI") as mock_openai: 145 mock_openai.return_value = MagicMock() 146 agent = AIAgent( 147 api_key="placeholder-openrouter-key", 148 base_url="https://openrouter.ai/api/v1", 149 provider="openrouter", 150 model="anthropic/claude-sonnet-4.6", 151 quiet_mode=True, 152 skip_context_files=True, 153 skip_memory=True, 154 ) 155 # Simulate rotation into a Codex credential 156 agent._client_kwargs["api_key"] = token 157 agent._apply_client_headers_for_base_url( 158 "https://chatgpt.com/backend-api/codex" 159 ) 160 headers = agent._client_kwargs.get("default_headers") or {} 161 assert headers.get("originator") == "codex_cli_rs" 162 assert headers.get("ChatGPT-Account-ID") == "acct-rotation" 163 assert headers.get("User-Agent", "").startswith("codex_cli_rs/") 164 165 def test_apply_client_headers_clears_codex_headers_off_chatgpt(self): 166 """Switching AWAY from chatgpt.com must drop the codex headers.""" 167 from run_agent import AIAgent 168 token = _make_codex_jwt() 169 with patch("run_agent.OpenAI") as mock_openai: 170 mock_openai.return_value = MagicMock() 171 agent = AIAgent( 172 api_key=token, 173 base_url="https://chatgpt.com/backend-api/codex", 174 provider="openai-codex", 175 model="gpt-5.4", 176 quiet_mode=True, 177 skip_context_files=True, 178 skip_memory=True, 179 ) 180 # Sanity: headers are set initially 181 assert "originator" in (agent._client_kwargs.get("default_headers") or {}) 182 agent._apply_client_headers_for_base_url( 183 "https://api.anthropic.com" 184 ) 185 # default_headers should be popped for anthropic base 186 assert "default_headers" not in agent._client_kwargs 187 188 def test_openrouter_base_url_does_not_get_codex_headers(self): 189 from run_agent import AIAgent 190 with patch("run_agent.OpenAI") as mock_openai: 191 mock_openai.return_value = MagicMock() 192 AIAgent( 193 api_key="sk-or-test", 194 base_url="https://openrouter.ai/api/v1", 195 provider="openrouter", 196 model="anthropic/claude-sonnet-4.6", 197 quiet_mode=True, 198 skip_context_files=True, 199 skip_memory=True, 200 ) 201 headers = mock_openai.call_args.kwargs.get("default_headers") or {} 202 assert headers.get("originator") != "codex_cli_rs" 203 204 205 # --------------------------------------------------------------------------- 206 # Auxiliary client wiring (agent.auxiliary_client) 207 # --------------------------------------------------------------------------- 208 209 class TestAuxiliaryClientWiring: 210 def test_build_codex_client_passes_codex_headers(self, monkeypatch): 211 """_build_codex_client builds the OpenAI client used for compression / 212 vision / title generation when routed through Codex. Must emit codex 213 headers.""" 214 from agent import auxiliary_client 215 token = _make_codex_jwt("acct-aux-try-codex") 216 217 # Force _select_pool_entry to return "no pool" so we fall through to 218 # _read_codex_access_token. 219 monkeypatch.setattr( 220 auxiliary_client, "_select_pool_entry", 221 lambda provider: (False, None), 222 ) 223 monkeypatch.setattr( 224 auxiliary_client, "_read_codex_access_token", 225 lambda: token, 226 ) 227 with patch("agent.auxiliary_client.OpenAI") as mock_openai: 228 mock_openai.return_value = MagicMock() 229 client, model = auxiliary_client._build_codex_client("gpt-5.4") 230 assert client is not None 231 headers = mock_openai.call_args.kwargs.get("default_headers") or {} 232 assert headers.get("originator") == "codex_cli_rs" 233 assert headers.get("ChatGPT-Account-ID") == "acct-aux-try-codex" 234 assert headers.get("User-Agent", "").startswith("codex_cli_rs/") 235 236 def test_resolve_provider_client_raw_codex_passes_codex_headers(self, monkeypatch): 237 """The ``raw_codex=True`` branch (used by the main agent loop for direct 238 responses.stream() access) must also emit codex headers.""" 239 from agent import auxiliary_client 240 token = _make_codex_jwt("acct-aux-raw-codex") 241 monkeypatch.setattr( 242 auxiliary_client, "_read_codex_access_token", 243 lambda: token, 244 ) 245 with patch("agent.auxiliary_client.OpenAI") as mock_openai: 246 mock_openai.return_value = MagicMock() 247 client, model = auxiliary_client.resolve_provider_client( 248 "openai-codex", model="gpt-5.4", raw_codex=True, 249 ) 250 assert client is not None 251 headers = mock_openai.call_args.kwargs.get("default_headers") or {} 252 assert headers.get("originator") == "codex_cli_rs" 253 assert headers.get("ChatGPT-Account-ID") == "acct-aux-raw-codex" 254 assert headers.get("User-Agent", "").startswith("codex_cli_rs/")