/ tests / agent / test_unsupported_temperature_retry.py
test_unsupported_temperature_retry.py
  1  """Regression tests for the universal "unsupported temperature" retry in
  2  ``agent.auxiliary_client``.
  3  
  4  Auxiliary callers (context compression, session search,
  5  web extract summarisation, etc.) hardcode ``temperature=0.3`` for historical
  6  reasons. Several provider/model combinations reject ``temperature`` with a
  7  400:
  8  
  9    * OpenAI Responses (gpt-5/o-series reasoning models)
 10    * Copilot Responses (reasoning models)
 11    * OpenRouter reasoning models (gpt-5.5, some anthropic via OAI-compat)
 12    * Anthropic Opus 4.7+ via OpenAI-compat endpoints
 13    * Kimi/Moonshot (server-managed)
 14  
 15  ``_fixed_temperature_for_model`` catches Kimi up front, and
 16  ``build_chat_completion_kwargs`` drops temperature for Anthropic Opus 4.7+,
 17  but the same backend can accept ``temperature`` for some models and reject
 18  it for others (for example gpt-5.4 accepts but gpt-5.5 rejects on the same
 19  endpoint). An allow/deny-list is not maintainable across providers.
 20  
 21  The universal fix is reactive: when a call returns an
 22  ``Unsupported parameter: temperature`` 400, retry once without temperature.
 23  These tests lock in that behaviour for both sync and async paths.
 24  """
 25  
 26  from unittest.mock import patch, MagicMock, AsyncMock
 27  
 28  import pytest
 29  
 30  from agent.auxiliary_client import (
 31      call_llm,
 32      async_call_llm,
 33      _is_unsupported_temperature_error,
 34  )
 35  
 36  
 37  class TestIsUnsupportedTemperatureError:
 38      """The detector must match the phrasings providers actually return."""
 39  
 40      @pytest.mark.parametrize("message", [
 41          # OpenAI / Codex Responses
 42          "HTTP 400: Unsupported parameter: temperature",
 43          "Error code: 400 - {'error': {'message': \"Unsupported parameter: 'temperature'\"}}",
 44          # Copilot / OpenAI error-code form
 45          "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}",
 46          # OpenRouter-style
 47          "Provider returned error: temperature is not supported for this model",
 48          "this model does not support temperature",
 49          # Anthropic-style via OAI-compat
 50          "temperature: unknown parameter",
 51          # Some gateways
 52          "unrecognized request argument supplied: temperature",
 53      ])
 54      def test_matches_real_provider_messages(self, message):
 55          assert _is_unsupported_temperature_error(RuntimeError(message)) is True
 56  
 57      @pytest.mark.parametrize("message", [
 58          # Unrelated 400s must NOT trigger a silent-retry
 59          "HTTP 400: Invalid value: 'tool'. Supported values are: 'assistant'...",
 60          "max_tokens is too large for this model",
 61          "Rate limit exceeded",
 62          "Connection reset by peer",
 63          # Temperature value error is a different class of problem
 64          "temperature must be between 0 and 2",
 65      ])
 66      def test_does_not_match_unrelated_errors(self, message):
 67          assert _is_unsupported_temperature_error(RuntimeError(message)) is False
 68  
 69  
 70  def _dummy_response():
 71      # The real code calls _validate_llm_response which inspects
 72      # response.choices[0].message.  The tests here patch that out, so
 73      # any sentinel object is fine.
 74      return {"ok": True}
 75  
 76  
 77  class TestCallLlmUnsupportedTemperatureRetry:
 78      """``call_llm`` retries once without temperature and returns on success."""
 79  
 80      def _setup(self, first_exc):
 81          client = MagicMock()
 82          client.base_url = "https://api.openai.com/v1"
 83          client.chat.completions.create.side_effect = [first_exc, _dummy_response()]
 84          return client
 85  
 86      @pytest.mark.parametrize("error_message", [
 87          "HTTP 400: Unsupported parameter: temperature",
 88          "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}",
 89          "Provider error: this model does not support temperature",
 90      ])
 91      def test_retries_once_without_temperature(self, error_message):
 92          client = self._setup(RuntimeError(error_message))
 93  
 94          with (
 95              patch("agent.auxiliary_client._resolve_task_provider_model",
 96                    return_value=("openai-codex", "gpt-5.5", None, None, None)),
 97              patch("agent.auxiliary_client._get_cached_client",
 98                    return_value=(client, "gpt-5.5")),
 99              patch("agent.auxiliary_client._validate_llm_response",
100                    side_effect=lambda resp, _task: resp),
101          ):
102              result = call_llm(
103                  task="compression",
104                  messages=[{"role": "user", "content": "remember this"}],
105                  temperature=0.3,
106                  max_tokens=500,
107              )
108  
109          assert result == {"ok": True}
110          assert client.chat.completions.create.call_count == 2
111          first_kwargs = client.chat.completions.create.call_args_list[0].kwargs
112          retry_kwargs = client.chat.completions.create.call_args_list[1].kwargs
113          assert first_kwargs["temperature"] == 0.3
114          assert "temperature" not in retry_kwargs
115          # other kwargs preserved
116          assert retry_kwargs["max_tokens"] == 500
117  
118      def test_non_temperature_400_does_not_retry_as_temperature(self):
119          """Unrelated 400s (e.g. bad tool role) must not silently drop temp."""
120          client = MagicMock()
121          client.base_url = "https://api.openai.com/v1"
122          non_temp_err = RuntimeError(
123              "HTTP 400: Invalid value: 'tool'. Supported values are: 'assistant'..."
124          )
125          client.chat.completions.create.side_effect = non_temp_err
126  
127          with (
128              patch("agent.auxiliary_client._resolve_task_provider_model",
129                    return_value=("openai-codex", "gpt-5.5", None, None, None)),
130              patch("agent.auxiliary_client._get_cached_client",
131                    return_value=(client, "gpt-5.5")),
132              patch("agent.auxiliary_client._validate_llm_response",
133                    side_effect=lambda resp, _task: resp),
134              patch("agent.auxiliary_client._try_payment_fallback",
135                    return_value=None),
136          ):
137              with pytest.raises(RuntimeError, match="Invalid value"):
138                  call_llm(
139                      task="compression",
140                      messages=[{"role": "user", "content": "x"}],
141                      temperature=0.3,
142                      max_tokens=500,
143                  )
144          # Should NOT have retried (non-temperature 400 doesn't match)
145          assert client.chat.completions.create.call_count == 1
146  
147      def test_no_retry_when_temperature_not_in_kwargs(self):
148          """If caller didn't send temperature, don't invent a temperature-retry."""
149          client = MagicMock()
150          client.base_url = "https://api.openai.com/v1"
151          # Provider complains about temperature even though we didn't send it.
152          # (Pathological but possible with misleading error text.)  The guard
153          # ``"temperature" in kwargs`` must prevent an unnecessary retry.
154          err = RuntimeError("HTTP 400: Unsupported parameter: temperature")
155          client.chat.completions.create.side_effect = err
156  
157          with (
158              patch("agent.auxiliary_client._resolve_task_provider_model",
159                    return_value=("openai-codex", "gpt-5.5", None, None, None)),
160              patch("agent.auxiliary_client._get_cached_client",
161                    return_value=(client, "gpt-5.5")),
162              patch("agent.auxiliary_client._validate_llm_response",
163                    side_effect=lambda resp, _task: resp),
164              patch("agent.auxiliary_client._try_payment_fallback",
165                    return_value=None),
166          ):
167              with pytest.raises(RuntimeError):
168                  call_llm(
169                      task="compression",
170                      messages=[{"role": "user", "content": "x"}],
171                      temperature=None,  # explicit: no temperature sent
172                      max_tokens=500,
173                  )
174          assert client.chat.completions.create.call_count == 1
175  
176  
177  class TestAsyncCallLlmUnsupportedTemperatureRetry:
178      """``async_call_llm`` mirror of the sync retry semantics."""
179  
180      @pytest.mark.asyncio
181      async def test_async_retries_once_without_temperature(self):
182          client = MagicMock()
183          client.base_url = "https://api.openai.com/v1"
184          client.chat.completions.create = AsyncMock(side_effect=[
185              RuntimeError("HTTP 400: Unsupported parameter: temperature"),
186              _dummy_response(),
187          ])
188  
189          with (
190              patch("agent.auxiliary_client._resolve_task_provider_model",
191                    return_value=("openai-codex", "gpt-5.5", None, None, None)),
192              patch("agent.auxiliary_client._get_cached_client",
193                    return_value=(client, "gpt-5.5")),
194              patch("agent.auxiliary_client._validate_llm_response",
195                    side_effect=lambda resp, _task: resp),
196          ):
197              result = await async_call_llm(
198                  task="session_search",
199                  messages=[{"role": "user", "content": "query"}],
200                  temperature=0.3,
201                  max_tokens=500,
202              )
203  
204          assert result == {"ok": True}
205          assert client.chat.completions.create.await_count == 2
206          first_kwargs = client.chat.completions.create.call_args_list[0].kwargs
207          retry_kwargs = client.chat.completions.create.call_args_list[1].kwargs
208          assert first_kwargs["temperature"] == 0.3
209          assert "temperature" not in retry_kwargs
210          assert retry_kwargs["max_tokens"] == 500
211  
212      @pytest.mark.asyncio
213      async def test_async_non_temperature_400_does_not_retry(self):
214          client = MagicMock()
215          client.base_url = "https://api.openai.com/v1"
216          client.chat.completions.create = AsyncMock(
217              side_effect=RuntimeError("HTTP 400: Invalid value: 'tool'"),
218          )
219  
220          with (
221              patch("agent.auxiliary_client._resolve_task_provider_model",
222                    return_value=("openai-codex", "gpt-5.5", None, None, None)),
223              patch("agent.auxiliary_client._get_cached_client",
224                    return_value=(client, "gpt-5.5")),
225              patch("agent.auxiliary_client._validate_llm_response",
226                    side_effect=lambda resp, _task: resp),
227              patch("agent.auxiliary_client._try_payment_fallback",
228                    return_value=None),
229          ):
230              with pytest.raises(RuntimeError, match="Invalid value"):
231                  await async_call_llm(
232                      task="session_search",
233                      messages=[{"role": "user", "content": "x"}],
234                      temperature=0.3,
235                      max_tokens=500,
236                  )
237          assert client.chat.completions.create.await_count == 1