test_session_model_override_routing.py
1 """Regression tests for session-scoped model/provider overrides in gateway agents. 2 3 These cover the bug where `/model ...` stored a session override, but fresh 4 agent constructions still resolved model/provider from global config/runtime. 5 That let helper agents (and cache-miss main agents) route GPT-5.4 to the wrong 6 provider, e.g. Nous instead of OpenAI Codex. 7 """ 8 9 import asyncio 10 import sys 11 import threading 12 import types 13 from unittest.mock import AsyncMock, MagicMock 14 15 import pytest 16 17 import gateway.run as gateway_run 18 from gateway.config import Platform 19 from gateway.session import SessionSource 20 21 22 class _CapturingAgent: 23 """Fake agent that records init kwargs for assertions.""" 24 25 last_init = None 26 27 def __init__(self, *args, **kwargs): 28 type(self).last_init = dict(kwargs) 29 self.tools = [] 30 31 def run_conversation(self, user_message: str, conversation_history=None, task_id=None): 32 return { 33 "final_response": "ok", 34 "messages": [], 35 "api_calls": 1, 36 } 37 38 39 def _make_runner(): 40 runner = object.__new__(gateway_run.GatewayRunner) 41 runner.adapters = {} 42 runner.session_store = None 43 runner.config = None 44 runner._voice_mode = {} 45 runner._ephemeral_system_prompt = "" 46 runner._prefill_messages = [] 47 runner._reasoning_config = None 48 runner._show_reasoning = False 49 runner._provider_routing = {} 50 runner._fallback_model = None 51 runner._service_tier = None 52 runner._running_agents = {} 53 runner._running_agents_ts = {} 54 runner._background_tasks = set() 55 runner._session_db = None 56 runner._session_model_overrides = {} 57 runner._session_reasoning_overrides = {} 58 runner._pending_model_notes = {} 59 runner._pending_approvals = {} 60 runner._agent_cache = {} 61 runner._agent_cache_lock = threading.Lock() 62 runner._get_or_create_gateway_honcho = lambda session_key: (None, None) 63 runner.hooks = MagicMock() 64 runner.hooks.emit = AsyncMock() 65 runner.hooks.loaded_hooks = [] 66 return runner 67 68 69 def _codex_override(): 70 return { 71 "model": "gpt-5.4", 72 "provider": "openai-codex", 73 "api_key": "***", 74 "base_url": "https://chatgpt.com/backend-api/codex", 75 "api_mode": "codex_responses", 76 } 77 78 79 def _explode_runtime_resolution(): 80 raise AssertionError( 81 "global runtime resolution should not run when a complete session override exists" 82 ) 83 84 85 def test_run_agent_prefers_session_override_over_global_runtime(monkeypatch): 86 monkeypatch.setattr(gateway_run, "_load_gateway_config", lambda: {}) 87 monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None) 88 monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", _explode_runtime_resolution) 89 90 fake_run_agent = types.ModuleType("run_agent") 91 fake_run_agent.AIAgent = _CapturingAgent 92 monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent) 93 94 _CapturingAgent.last_init = None 95 runner = _make_runner() 96 97 source = SessionSource( 98 platform=Platform.LOCAL, 99 chat_id="cli", 100 chat_name="CLI", 101 chat_type="dm", 102 user_id="user-1", 103 ) 104 session_key = "agent:main:local:dm" 105 runner._session_model_overrides[session_key] = _codex_override() 106 runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"} 107 108 result = asyncio.run( 109 runner._run_agent( 110 message="ping", 111 context_prompt="", 112 history=[], 113 source=source, 114 session_id="session-1", 115 session_key=session_key, 116 ) 117 ) 118 119 assert result["final_response"] == "ok" 120 assert _CapturingAgent.last_init is not None 121 assert _CapturingAgent.last_init["model"] == "gpt-5.4" 122 assert _CapturingAgent.last_init["provider"] == "openai-codex" 123 assert _CapturingAgent.last_init["api_mode"] == "codex_responses" 124 assert _CapturingAgent.last_init["base_url"] == "https://chatgpt.com/backend-api/codex" 125 assert _CapturingAgent.last_init["api_key"] == "***" 126 assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"} 127 128 129 @pytest.mark.asyncio 130 async def test_background_task_prefers_session_override_over_global_runtime(monkeypatch): 131 monkeypatch.setattr(gateway_run, "_load_gateway_config", lambda: {}) 132 monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", _explode_runtime_resolution) 133 134 fake_run_agent = types.ModuleType("run_agent") 135 fake_run_agent.AIAgent = _CapturingAgent 136 monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent) 137 138 _CapturingAgent.last_init = None 139 runner = _make_runner() 140 141 adapter = AsyncMock() 142 adapter.send = AsyncMock() 143 adapter.extract_media = MagicMock(return_value=([], "ok")) 144 adapter.extract_images = MagicMock(return_value=([], "ok")) 145 runner.adapters[Platform.TELEGRAM] = adapter 146 147 source = SessionSource( 148 platform=Platform.TELEGRAM, 149 user_id="12345", 150 chat_id="67890", 151 user_name="testuser", 152 ) 153 session_key = runner._session_key_for_source(source) 154 runner._session_model_overrides[session_key] = _codex_override() 155 runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"} 156 157 await runner._run_background_task("say hello", source, "bg_test") 158 159 assert _CapturingAgent.last_init is not None 160 assert _CapturingAgent.last_init["model"] == "gpt-5.4" 161 assert _CapturingAgent.last_init["provider"] == "openai-codex" 162 assert _CapturingAgent.last_init["api_mode"] == "codex_responses" 163 assert _CapturingAgent.last_init["base_url"] == "https://chatgpt.com/backend-api/codex" 164 assert _CapturingAgent.last_init["api_key"] == "***" 165 assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"}