test_unsupported_temperature_retry.py
1 """Regression tests for the universal "unsupported temperature" retry in 2 ``agent.auxiliary_client``. 3 4 Auxiliary callers (context compression, session search, 5 web extract summarisation, etc.) hardcode ``temperature=0.3`` for historical 6 reasons. Several provider/model combinations reject ``temperature`` with a 7 400: 8 9 * OpenAI Responses (gpt-5/o-series reasoning models) 10 * Copilot Responses (reasoning models) 11 * OpenRouter reasoning models (gpt-5.5, some anthropic via OAI-compat) 12 * Anthropic Opus 4.7+ via OpenAI-compat endpoints 13 * Kimi/Moonshot (server-managed) 14 15 ``_fixed_temperature_for_model`` catches Kimi up front, and 16 ``build_chat_completion_kwargs`` drops temperature for Anthropic Opus 4.7+, 17 but the same backend can accept ``temperature`` for some models and reject 18 it for others (for example gpt-5.4 accepts but gpt-5.5 rejects on the same 19 endpoint). An allow/deny-list is not maintainable across providers. 20 21 The universal fix is reactive: when a call returns an 22 ``Unsupported parameter: temperature`` 400, retry once without temperature. 23 These tests lock in that behaviour for both sync and async paths. 24 """ 25 26 from unittest.mock import patch, MagicMock, AsyncMock 27 28 import pytest 29 30 from agent.auxiliary_client import ( 31 call_llm, 32 async_call_llm, 33 _is_unsupported_temperature_error, 34 ) 35 36 37 class TestIsUnsupportedTemperatureError: 38 """The detector must match the phrasings providers actually return.""" 39 40 @pytest.mark.parametrize("message", [ 41 # OpenAI / Codex Responses 42 "HTTP 400: Unsupported parameter: temperature", 43 "Error code: 400 - {'error': {'message': \"Unsupported parameter: 'temperature'\"}}", 44 # Copilot / OpenAI error-code form 45 "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}", 46 # OpenRouter-style 47 "Provider returned error: temperature is not supported for this model", 48 "this model does not support temperature", 49 # Anthropic-style via OAI-compat 50 "temperature: unknown parameter", 51 # Some gateways 52 "unrecognized request argument supplied: temperature", 53 ]) 54 def test_matches_real_provider_messages(self, message): 55 assert _is_unsupported_temperature_error(RuntimeError(message)) is True 56 57 @pytest.mark.parametrize("message", [ 58 # Unrelated 400s must NOT trigger a silent-retry 59 "HTTP 400: Invalid value: 'tool'. Supported values are: 'assistant'...", 60 "max_tokens is too large for this model", 61 "Rate limit exceeded", 62 "Connection reset by peer", 63 # Temperature value error is a different class of problem 64 "temperature must be between 0 and 2", 65 ]) 66 def test_does_not_match_unrelated_errors(self, message): 67 assert _is_unsupported_temperature_error(RuntimeError(message)) is False 68 69 70 def _dummy_response(): 71 # The real code calls _validate_llm_response which inspects 72 # response.choices[0].message. The tests here patch that out, so 73 # any sentinel object is fine. 74 return {"ok": True} 75 76 77 class TestCallLlmUnsupportedTemperatureRetry: 78 """``call_llm`` retries once without temperature and returns on success.""" 79 80 def _setup(self, first_exc): 81 client = MagicMock() 82 client.base_url = "https://api.openai.com/v1" 83 client.chat.completions.create.side_effect = [first_exc, _dummy_response()] 84 return client 85 86 @pytest.mark.parametrize("error_message", [ 87 "HTTP 400: Unsupported parameter: temperature", 88 "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}", 89 "Provider error: this model does not support temperature", 90 ]) 91 def test_retries_once_without_temperature(self, error_message): 92 client = self._setup(RuntimeError(error_message)) 93 94 with ( 95 patch("agent.auxiliary_client._resolve_task_provider_model", 96 return_value=("openai-codex", "gpt-5.5", None, None, None)), 97 patch("agent.auxiliary_client._get_cached_client", 98 return_value=(client, "gpt-5.5")), 99 patch("agent.auxiliary_client._validate_llm_response", 100 side_effect=lambda resp, _task: resp), 101 ): 102 result = call_llm( 103 task="compression", 104 messages=[{"role": "user", "content": "remember this"}], 105 temperature=0.3, 106 max_tokens=500, 107 ) 108 109 assert result == {"ok": True} 110 assert client.chat.completions.create.call_count == 2 111 first_kwargs = client.chat.completions.create.call_args_list[0].kwargs 112 retry_kwargs = client.chat.completions.create.call_args_list[1].kwargs 113 assert first_kwargs["temperature"] == 0.3 114 assert "temperature" not in retry_kwargs 115 # other kwargs preserved 116 assert retry_kwargs["max_tokens"] == 500 117 118 def test_non_temperature_400_does_not_retry_as_temperature(self): 119 """Unrelated 400s (e.g. bad tool role) must not silently drop temp.""" 120 client = MagicMock() 121 client.base_url = "https://api.openai.com/v1" 122 non_temp_err = RuntimeError( 123 "HTTP 400: Invalid value: 'tool'. Supported values are: 'assistant'..." 124 ) 125 client.chat.completions.create.side_effect = non_temp_err 126 127 with ( 128 patch("agent.auxiliary_client._resolve_task_provider_model", 129 return_value=("openai-codex", "gpt-5.5", None, None, None)), 130 patch("agent.auxiliary_client._get_cached_client", 131 return_value=(client, "gpt-5.5")), 132 patch("agent.auxiliary_client._validate_llm_response", 133 side_effect=lambda resp, _task: resp), 134 patch("agent.auxiliary_client._try_payment_fallback", 135 return_value=None), 136 ): 137 with pytest.raises(RuntimeError, match="Invalid value"): 138 call_llm( 139 task="compression", 140 messages=[{"role": "user", "content": "x"}], 141 temperature=0.3, 142 max_tokens=500, 143 ) 144 # Should NOT have retried (non-temperature 400 doesn't match) 145 assert client.chat.completions.create.call_count == 1 146 147 def test_no_retry_when_temperature_not_in_kwargs(self): 148 """If caller didn't send temperature, don't invent a temperature-retry.""" 149 client = MagicMock() 150 client.base_url = "https://api.openai.com/v1" 151 # Provider complains about temperature even though we didn't send it. 152 # (Pathological but possible with misleading error text.) The guard 153 # ``"temperature" in kwargs`` must prevent an unnecessary retry. 154 err = RuntimeError("HTTP 400: Unsupported parameter: temperature") 155 client.chat.completions.create.side_effect = err 156 157 with ( 158 patch("agent.auxiliary_client._resolve_task_provider_model", 159 return_value=("openai-codex", "gpt-5.5", None, None, None)), 160 patch("agent.auxiliary_client._get_cached_client", 161 return_value=(client, "gpt-5.5")), 162 patch("agent.auxiliary_client._validate_llm_response", 163 side_effect=lambda resp, _task: resp), 164 patch("agent.auxiliary_client._try_payment_fallback", 165 return_value=None), 166 ): 167 with pytest.raises(RuntimeError): 168 call_llm( 169 task="compression", 170 messages=[{"role": "user", "content": "x"}], 171 temperature=None, # explicit: no temperature sent 172 max_tokens=500, 173 ) 174 assert client.chat.completions.create.call_count == 1 175 176 177 class TestAsyncCallLlmUnsupportedTemperatureRetry: 178 """``async_call_llm`` mirror of the sync retry semantics.""" 179 180 @pytest.mark.asyncio 181 async def test_async_retries_once_without_temperature(self): 182 client = MagicMock() 183 client.base_url = "https://api.openai.com/v1" 184 client.chat.completions.create = AsyncMock(side_effect=[ 185 RuntimeError("HTTP 400: Unsupported parameter: temperature"), 186 _dummy_response(), 187 ]) 188 189 with ( 190 patch("agent.auxiliary_client._resolve_task_provider_model", 191 return_value=("openai-codex", "gpt-5.5", None, None, None)), 192 patch("agent.auxiliary_client._get_cached_client", 193 return_value=(client, "gpt-5.5")), 194 patch("agent.auxiliary_client._validate_llm_response", 195 side_effect=lambda resp, _task: resp), 196 ): 197 result = await async_call_llm( 198 task="session_search", 199 messages=[{"role": "user", "content": "query"}], 200 temperature=0.3, 201 max_tokens=500, 202 ) 203 204 assert result == {"ok": True} 205 assert client.chat.completions.create.await_count == 2 206 first_kwargs = client.chat.completions.create.call_args_list[0].kwargs 207 retry_kwargs = client.chat.completions.create.call_args_list[1].kwargs 208 assert first_kwargs["temperature"] == 0.3 209 assert "temperature" not in retry_kwargs 210 assert retry_kwargs["max_tokens"] == 500 211 212 @pytest.mark.asyncio 213 async def test_async_non_temperature_400_does_not_retry(self): 214 client = MagicMock() 215 client.base_url = "https://api.openai.com/v1" 216 client.chat.completions.create = AsyncMock( 217 side_effect=RuntimeError("HTTP 400: Invalid value: 'tool'"), 218 ) 219 220 with ( 221 patch("agent.auxiliary_client._resolve_task_provider_model", 222 return_value=("openai-codex", "gpt-5.5", None, None, None)), 223 patch("agent.auxiliary_client._get_cached_client", 224 return_value=(client, "gpt-5.5")), 225 patch("agent.auxiliary_client._validate_llm_response", 226 side_effect=lambda resp, _task: resp), 227 patch("agent.auxiliary_client._try_payment_fallback", 228 return_value=None), 229 ): 230 with pytest.raises(RuntimeError, match="Invalid value"): 231 await async_call_llm( 232 task="session_search", 233 messages=[{"role": "user", "content": "x"}], 234 temperature=0.3, 235 max_tokens=500, 236 ) 237 assert client.chat.completions.create.await_count == 1