/ tests / agent / test_codex_cloudflare_headers.py
test_codex_cloudflare_headers.py
  1  """Regression guard: Codex Cloudflare 403 mitigation headers.
  2  
  3  The ``chatgpt.com/backend-api/codex`` endpoint sits behind a Cloudflare layer
  4  that whitelists a small set of first-party originators (``codex_cli_rs``,
  5  ``codex_vscode``, ``codex_sdk_ts``, ``Codex*``). Requests from non-residential
  6  IPs (VPS, always-on servers, some corporate egress) that don't advertise an
  7  allowed originator are served 403 with ``cf-mitigated: challenge`` regardless
  8  of auth correctness.
  9  
 10  ``_codex_cloudflare_headers`` in ``agent.auxiliary_client`` centralizes the
 11  header set so the primary chat client (``run_agent.AIAgent.__init__`` +
 12  ``_apply_client_headers_for_base_url``) and the auxiliary client paths
 13  (``_build_codex_client`` and the ``raw_codex`` branch of ``resolve_provider_client``)
 14  all emit the same headers.
 15  
 16  These tests pin:
 17  - the originator value (must be ``codex_cli_rs`` — the whitelisted one)
 18  - the User-Agent shape (codex_cli_rs-prefixed)
 19  - ``ChatGPT-Account-ID`` extraction from the OAuth JWT (canonical casing,
 20    from codex-rs ``auth.rs``)
 21  - graceful handling of malformed tokens (drop the account-ID header, don't
 22    raise)
 23  - primary-client wiring at both entry points in ``run_agent.py``
 24  - aux-client wiring at both entry points in ``agent/auxiliary_client.py``
 25  """
 26  from __future__ import annotations
 27  
 28  import base64
 29  import json
 30  from unittest.mock import MagicMock, patch
 31  
 32  import pytest
 33  
 34  
 35  # ---------------------------------------------------------------------------
 36  # Fixtures
 37  # ---------------------------------------------------------------------------
 38  
 39  def _make_codex_jwt(account_id: str = "acct-test-123") -> str:
 40      """Build a syntactically valid Codex-style JWT with the account_id claim."""
 41      def b64url(data: bytes) -> str:
 42          return base64.urlsafe_b64encode(data).rstrip(b"=").decode()
 43      header = b64url(b'{"alg":"RS256","typ":"JWT"}')
 44      claims = {
 45          "sub": "user-xyz",
 46          "exp": 9999999999,
 47          "https://api.openai.com/auth": {
 48              "chatgpt_account_id": account_id,
 49              "chatgpt_plan_type": "plus",
 50          },
 51      }
 52      payload = b64url(json.dumps(claims).encode())
 53      sig = b64url(b"fake-sig")
 54      return f"{header}.{payload}.{sig}"
 55  
 56  
 57  # ---------------------------------------------------------------------------
 58  # _codex_cloudflare_headers — the shared helper
 59  # ---------------------------------------------------------------------------
 60  
 61  class TestCodexCloudflareHeaders:
 62      def test_originator_is_codex_cli_rs(self):
 63          """Cloudflare whitelists codex_cli_rs — any other value is 403'd."""
 64          from agent.auxiliary_client import _codex_cloudflare_headers
 65          headers = _codex_cloudflare_headers(_make_codex_jwt())
 66          assert headers["originator"] == "codex_cli_rs"
 67  
 68      def test_user_agent_advertises_codex_cli_rs(self):
 69          from agent.auxiliary_client import _codex_cloudflare_headers
 70          headers = _codex_cloudflare_headers(_make_codex_jwt())
 71          assert headers["User-Agent"].startswith("codex_cli_rs/")
 72  
 73      def test_account_id_extracted_from_jwt(self):
 74          from agent.auxiliary_client import _codex_cloudflare_headers
 75          headers = _codex_cloudflare_headers(_make_codex_jwt("acct-abc-999"))
 76          # Canonical casing — matches codex-rs auth.rs
 77          assert headers["ChatGPT-Account-ID"] == "acct-abc-999"
 78  
 79      def test_canonical_header_casing(self):
 80          """Upstream codex-rs uses PascalCase with trailing -ID. Match exactly."""
 81          from agent.auxiliary_client import _codex_cloudflare_headers
 82          headers = _codex_cloudflare_headers(_make_codex_jwt())
 83          assert "ChatGPT-Account-ID" in headers
 84          # The lowercase/titlecase variants MUST NOT be used — pin to be explicit
 85          assert "chatgpt-account-id" not in headers
 86          assert "ChatGPT-Account-Id" not in headers
 87  
 88      def test_malformed_token_drops_account_id_without_raising(self):
 89          from agent.auxiliary_client import _codex_cloudflare_headers
 90          for bad in ["not-a-jwt", "", "only.one", "  ", "...."]:
 91              headers = _codex_cloudflare_headers(bad)
 92              # Still returns base headers — never raises
 93              assert headers["originator"] == "codex_cli_rs"
 94              assert "ChatGPT-Account-ID" not in headers
 95  
 96      def test_non_string_token_handled(self):
 97          from agent.auxiliary_client import _codex_cloudflare_headers
 98          headers = _codex_cloudflare_headers(None)  # type: ignore[arg-type]
 99          assert headers["originator"] == "codex_cli_rs"
100          assert "ChatGPT-Account-ID" not in headers
101  
102      def test_jwt_without_chatgpt_account_id_claim(self):
103          """A valid JWT that lacks the account_id claim should still return headers."""
104          from agent.auxiliary_client import _codex_cloudflare_headers
105          import base64 as _b64, json as _json
106  
107          def b64url(data: bytes) -> str:
108              return _b64.urlsafe_b64encode(data).rstrip(b"=").decode()
109          payload = b64url(_json.dumps({"sub": "user-xyz", "exp": 9999999999}).encode())
110          token = f"{b64url(b'{}')}.{payload}.{b64url(b'sig')}"
111          headers = _codex_cloudflare_headers(token)
112          assert headers["originator"] == "codex_cli_rs"
113          assert "ChatGPT-Account-ID" not in headers
114  
115  
116  # ---------------------------------------------------------------------------
117  # Primary chat client wiring (run_agent.AIAgent)
118  # ---------------------------------------------------------------------------
119  
120  class TestPrimaryClientWiring:
121      def test_init_wires_codex_headers_for_chatgpt_base_url(self):
122          from run_agent import AIAgent
123          token = _make_codex_jwt("acct-primary-init")
124          with patch("run_agent.OpenAI") as mock_openai:
125              mock_openai.return_value = MagicMock()
126              AIAgent(
127                  api_key=token,
128                  base_url="https://chatgpt.com/backend-api/codex",
129                  provider="openai-codex",
130                  model="gpt-5.4",
131                  quiet_mode=True,
132                  skip_context_files=True,
133                  skip_memory=True,
134              )
135              headers = mock_openai.call_args.kwargs.get("default_headers") or {}
136              assert headers.get("originator") == "codex_cli_rs"
137              assert headers.get("ChatGPT-Account-ID") == "acct-primary-init"
138              assert headers.get("User-Agent", "").startswith("codex_cli_rs/")
139  
140      def test_apply_client_headers_on_base_url_change(self):
141          """Credential-rotation / base-url change path must also emit codex headers."""
142          from run_agent import AIAgent
143          token = _make_codex_jwt("acct-rotation")
144          with patch("run_agent.OpenAI") as mock_openai:
145              mock_openai.return_value = MagicMock()
146              agent = AIAgent(
147                  api_key="placeholder-openrouter-key",
148                  base_url="https://openrouter.ai/api/v1",
149                  provider="openrouter",
150                  model="anthropic/claude-sonnet-4.6",
151                  quiet_mode=True,
152                  skip_context_files=True,
153                  skip_memory=True,
154              )
155              # Simulate rotation into a Codex credential
156              agent._client_kwargs["api_key"] = token
157              agent._apply_client_headers_for_base_url(
158                  "https://chatgpt.com/backend-api/codex"
159              )
160              headers = agent._client_kwargs.get("default_headers") or {}
161              assert headers.get("originator") == "codex_cli_rs"
162              assert headers.get("ChatGPT-Account-ID") == "acct-rotation"
163              assert headers.get("User-Agent", "").startswith("codex_cli_rs/")
164  
165      def test_apply_client_headers_clears_codex_headers_off_chatgpt(self):
166          """Switching AWAY from chatgpt.com must drop the codex headers."""
167          from run_agent import AIAgent
168          token = _make_codex_jwt()
169          with patch("run_agent.OpenAI") as mock_openai:
170              mock_openai.return_value = MagicMock()
171              agent = AIAgent(
172                  api_key=token,
173                  base_url="https://chatgpt.com/backend-api/codex",
174                  provider="openai-codex",
175                  model="gpt-5.4",
176                  quiet_mode=True,
177                  skip_context_files=True,
178                  skip_memory=True,
179              )
180              # Sanity: headers are set initially
181              assert "originator" in (agent._client_kwargs.get("default_headers") or {})
182              agent._apply_client_headers_for_base_url(
183                  "https://api.anthropic.com"
184              )
185              # default_headers should be popped for anthropic base
186              assert "default_headers" not in agent._client_kwargs
187  
188      def test_openrouter_base_url_does_not_get_codex_headers(self):
189          from run_agent import AIAgent
190          with patch("run_agent.OpenAI") as mock_openai:
191              mock_openai.return_value = MagicMock()
192              AIAgent(
193                  api_key="sk-or-test",
194                  base_url="https://openrouter.ai/api/v1",
195                  provider="openrouter",
196                  model="anthropic/claude-sonnet-4.6",
197                  quiet_mode=True,
198                  skip_context_files=True,
199                  skip_memory=True,
200              )
201              headers = mock_openai.call_args.kwargs.get("default_headers") or {}
202              assert headers.get("originator") != "codex_cli_rs"
203  
204  
205  # ---------------------------------------------------------------------------
206  # Auxiliary client wiring (agent.auxiliary_client)
207  # ---------------------------------------------------------------------------
208  
209  class TestAuxiliaryClientWiring:
210      def test_build_codex_client_passes_codex_headers(self, monkeypatch):
211          """_build_codex_client builds the OpenAI client used for compression /
212          vision / title generation when routed through Codex. Must emit codex
213          headers."""
214          from agent import auxiliary_client
215          token = _make_codex_jwt("acct-aux-try-codex")
216  
217          # Force _select_pool_entry to return "no pool" so we fall through to
218          # _read_codex_access_token.
219          monkeypatch.setattr(
220              auxiliary_client, "_select_pool_entry",
221              lambda provider: (False, None),
222          )
223          monkeypatch.setattr(
224              auxiliary_client, "_read_codex_access_token",
225              lambda: token,
226          )
227          with patch("agent.auxiliary_client.OpenAI") as mock_openai:
228              mock_openai.return_value = MagicMock()
229              client, model = auxiliary_client._build_codex_client("gpt-5.4")
230              assert client is not None
231              headers = mock_openai.call_args.kwargs.get("default_headers") or {}
232              assert headers.get("originator") == "codex_cli_rs"
233              assert headers.get("ChatGPT-Account-ID") == "acct-aux-try-codex"
234              assert headers.get("User-Agent", "").startswith("codex_cli_rs/")
235  
236      def test_resolve_provider_client_raw_codex_passes_codex_headers(self, monkeypatch):
237          """The ``raw_codex=True`` branch (used by the main agent loop for direct
238          responses.stream() access) must also emit codex headers."""
239          from agent import auxiliary_client
240          token = _make_codex_jwt("acct-aux-raw-codex")
241          monkeypatch.setattr(
242              auxiliary_client, "_read_codex_access_token",
243              lambda: token,
244          )
245          with patch("agent.auxiliary_client.OpenAI") as mock_openai:
246              mock_openai.return_value = MagicMock()
247              client, model = auxiliary_client.resolve_provider_client(
248                  "openai-codex", model="gpt-5.4", raw_codex=True,
249              )
250              assert client is not None
251              headers = mock_openai.call_args.kwargs.get("default_headers") or {}
252              assert headers.get("originator") == "codex_cli_rs"
253              assert headers.get("ChatGPT-Account-ID") == "acct-aux-raw-codex"
254              assert headers.get("User-Agent", "").startswith("codex_cli_rs/")