test_api_server_multimodal.py
1 """End-to-end tests for inline image inputs on /v1/chat/completions and /v1/responses. 2 3 Covers the multimodal normalization path added to the API server. Unlike the 4 adapter-level tests that patch ``_run_agent``, these tests patch 5 ``AIAgent.run_conversation`` instead so the adapter's full request-handling 6 path (including the ``run_agent`` prologue that used to crash on list content) 7 executes against a real aiohttp app. 8 """ 9 10 from unittest.mock import MagicMock, patch 11 12 import pytest 13 from aiohttp import web 14 from aiohttp.test_utils import TestClient, TestServer 15 16 from gateway.config import PlatformConfig 17 from gateway.platforms.api_server import ( 18 APIServerAdapter, 19 _content_has_visible_payload, 20 _normalize_multimodal_content, 21 cors_middleware, 22 security_headers_middleware, 23 ) 24 25 26 # --------------------------------------------------------------------------- 27 # Pure-function tests for _normalize_multimodal_content 28 # --------------------------------------------------------------------------- 29 30 31 class TestNormalizeMultimodalContent: 32 def test_string_passthrough(self): 33 assert _normalize_multimodal_content("hello") == "hello" 34 35 def test_none_returns_empty_string(self): 36 assert _normalize_multimodal_content(None) == "" 37 38 def test_text_only_list_collapses_to_string(self): 39 content = [{"type": "text", "text": "hi"}, {"type": "text", "text": "there"}] 40 assert _normalize_multimodal_content(content) == "hi\nthere" 41 42 def test_responses_input_text_canonicalized(self): 43 content = [{"type": "input_text", "text": "hello"}] 44 assert _normalize_multimodal_content(content) == "hello" 45 46 def test_image_url_preserved_with_text(self): 47 content = [ 48 {"type": "text", "text": "describe this"}, 49 {"type": "image_url", "image_url": {"url": "https://example.com/cat.png", "detail": "high"}}, 50 ] 51 out = _normalize_multimodal_content(content) 52 assert isinstance(out, list) 53 assert out == [ 54 {"type": "text", "text": "describe this"}, 55 {"type": "image_url", "image_url": {"url": "https://example.com/cat.png", "detail": "high"}}, 56 ] 57 58 def test_input_image_converted_to_canonical_shape(self): 59 content = [ 60 {"type": "input_text", "text": "hi"}, 61 {"type": "input_image", "image_url": "https://example.com/cat.png"}, 62 ] 63 out = _normalize_multimodal_content(content) 64 assert out == [ 65 {"type": "text", "text": "hi"}, 66 {"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}}, 67 ] 68 69 def test_data_image_url_accepted(self): 70 content = [{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}}] 71 out = _normalize_multimodal_content(content) 72 assert out == [{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}}] 73 74 def test_non_image_data_url_rejected(self): 75 content = [{"type": "image_url", "image_url": {"url": "data:text/plain;base64,SGVsbG8="}}] 76 with pytest.raises(ValueError) as exc: 77 _normalize_multimodal_content(content) 78 assert str(exc.value).startswith("unsupported_content_type:") 79 80 def test_file_part_rejected(self): 81 with pytest.raises(ValueError) as exc: 82 _normalize_multimodal_content([{"type": "file", "file": {"file_id": "f_1"}}]) 83 assert str(exc.value).startswith("unsupported_content_type:") 84 85 def test_input_file_part_rejected(self): 86 with pytest.raises(ValueError) as exc: 87 _normalize_multimodal_content([{"type": "input_file", "file_id": "f_1"}]) 88 assert str(exc.value).startswith("unsupported_content_type:") 89 90 def test_missing_url_rejected(self): 91 with pytest.raises(ValueError) as exc: 92 _normalize_multimodal_content([{"type": "image_url", "image_url": {}}]) 93 assert str(exc.value).startswith("invalid_image_url:") 94 95 def test_bad_scheme_rejected(self): 96 with pytest.raises(ValueError) as exc: 97 _normalize_multimodal_content([{"type": "image_url", "image_url": {"url": "ftp://example.com/x.png"}}]) 98 assert str(exc.value).startswith("invalid_image_url:") 99 100 def test_unknown_part_type_rejected(self): 101 with pytest.raises(ValueError) as exc: 102 _normalize_multimodal_content([{"type": "audio", "audio": {}}]) 103 assert str(exc.value).startswith("unsupported_content_type:") 104 105 106 class TestContentHasVisiblePayload: 107 def test_non_empty_string(self): 108 assert _content_has_visible_payload("hello") 109 110 def test_whitespace_only_string(self): 111 assert not _content_has_visible_payload(" ") 112 113 def test_list_with_image_only(self): 114 assert _content_has_visible_payload([{"type": "image_url", "image_url": {"url": "x"}}]) 115 116 def test_list_with_only_empty_text(self): 117 assert not _content_has_visible_payload([{"type": "text", "text": ""}]) 118 119 120 # --------------------------------------------------------------------------- 121 # HTTP integration — real aiohttp client hitting the adapter handlers 122 # --------------------------------------------------------------------------- 123 124 125 def _make_adapter() -> APIServerAdapter: 126 return APIServerAdapter(PlatformConfig(enabled=True)) 127 128 129 def _create_app(adapter: APIServerAdapter) -> web.Application: 130 mws = [mw for mw in (cors_middleware, security_headers_middleware) if mw is not None] 131 app = web.Application(middlewares=mws) 132 app["api_server_adapter"] = adapter 133 app.router.add_post("/v1/chat/completions", adapter._handle_chat_completions) 134 app.router.add_post("/v1/responses", adapter._handle_responses) 135 app.router.add_get("/v1/responses/{response_id}", adapter._handle_get_response) 136 return app 137 138 139 @pytest.fixture 140 def adapter(): 141 return _make_adapter() 142 143 144 class TestChatCompletionsMultimodalHTTP: 145 @pytest.mark.asyncio 146 async def test_inline_image_preserved_to_run_agent(self, adapter): 147 """Multimodal user content reaches _run_agent as a list of parts.""" 148 image_payload = [ 149 {"type": "text", "text": "What's in this image?"}, 150 {"type": "image_url", "image_url": {"url": "https://example.com/cat.png", "detail": "high"}}, 151 ] 152 153 app = _create_app(adapter) 154 async with TestClient(TestServer(app)) as cli: 155 with patch.object( 156 adapter, 157 "_run_agent", 158 new=MagicMock(), 159 ) as mock_run: 160 async def _stub(**kwargs): 161 mock_run.captured = kwargs 162 return ( 163 {"final_response": "A cat.", "messages": [], "api_calls": 1}, 164 {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}, 165 ) 166 mock_run.side_effect = _stub 167 168 resp = await cli.post( 169 "/v1/chat/completions", 170 json={ 171 "model": "hermes-agent", 172 "messages": [{"role": "user", "content": image_payload}], 173 }, 174 ) 175 176 assert resp.status == 200, await resp.text() 177 assert mock_run.captured["user_message"] == image_payload 178 179 @pytest.mark.asyncio 180 async def test_text_only_array_collapses_to_string(self, adapter): 181 """Text-only array becomes a plain string so logging stays unchanged.""" 182 app = _create_app(adapter) 183 async with TestClient(TestServer(app)) as cli: 184 with patch.object(adapter, "_run_agent", new=MagicMock()) as mock_run: 185 async def _stub(**kwargs): 186 mock_run.captured = kwargs 187 return ( 188 {"final_response": "ok", "messages": [], "api_calls": 1}, 189 {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}, 190 ) 191 mock_run.side_effect = _stub 192 193 resp = await cli.post( 194 "/v1/chat/completions", 195 json={ 196 "model": "hermes-agent", 197 "messages": [ 198 {"role": "user", "content": [{"type": "text", "text": "hello"}]}, 199 ], 200 }, 201 ) 202 203 assert resp.status == 200, await resp.text() 204 assert mock_run.captured["user_message"] == "hello" 205 206 @pytest.mark.asyncio 207 async def test_file_part_returns_400(self, adapter): 208 app = _create_app(adapter) 209 async with TestClient(TestServer(app)) as cli: 210 resp = await cli.post( 211 "/v1/chat/completions", 212 json={ 213 "model": "hermes-agent", 214 "messages": [ 215 {"role": "user", "content": [{"type": "file", "file": {"file_id": "f_1"}}]}, 216 ], 217 }, 218 ) 219 assert resp.status == 400 220 body = await resp.json() 221 assert body["error"]["code"] == "unsupported_content_type" 222 assert body["error"]["param"] == "messages[0].content" 223 224 @pytest.mark.asyncio 225 async def test_non_image_data_url_returns_400(self, adapter): 226 app = _create_app(adapter) 227 async with TestClient(TestServer(app)) as cli: 228 resp = await cli.post( 229 "/v1/chat/completions", 230 json={ 231 "model": "hermes-agent", 232 "messages": [ 233 { 234 "role": "user", 235 "content": [ 236 { 237 "type": "image_url", 238 "image_url": {"url": "data:text/plain;base64,SGVsbG8="}, 239 }, 240 ], 241 }, 242 ], 243 }, 244 ) 245 assert resp.status == 400 246 body = await resp.json() 247 assert body["error"]["code"] == "unsupported_content_type" 248 249 250 class TestResponsesMultimodalHTTP: 251 @pytest.mark.asyncio 252 async def test_input_image_canonicalized_and_forwarded(self, adapter): 253 app = _create_app(adapter) 254 async with TestClient(TestServer(app)) as cli: 255 with patch.object(adapter, "_run_agent", new=MagicMock()) as mock_run: 256 async def _stub(**kwargs): 257 mock_run.captured = kwargs 258 return ( 259 {"final_response": "ok", "messages": [], "api_calls": 1}, 260 {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}, 261 ) 262 mock_run.side_effect = _stub 263 264 resp = await cli.post( 265 "/v1/responses", 266 json={ 267 "model": "hermes-agent", 268 "input": [ 269 { 270 "role": "user", 271 "content": [ 272 {"type": "input_text", "text": "Describe."}, 273 { 274 "type": "input_image", 275 "image_url": "https://example.com/cat.png", 276 }, 277 ], 278 } 279 ], 280 }, 281 ) 282 283 assert resp.status == 200, await resp.text() 284 expected = [ 285 {"type": "text", "text": "Describe."}, 286 {"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}}, 287 ] 288 assert mock_run.captured["user_message"] == expected 289 290 @pytest.mark.asyncio 291 async def test_input_file_returns_400(self, adapter): 292 app = _create_app(adapter) 293 async with TestClient(TestServer(app)) as cli: 294 resp = await cli.post( 295 "/v1/responses", 296 json={ 297 "model": "hermes-agent", 298 "input": [ 299 { 300 "role": "user", 301 "content": [{"type": "input_file", "file_id": "f_1"}], 302 } 303 ], 304 }, 305 ) 306 assert resp.status == 400 307 body = await resp.json() 308 assert body["error"]["code"] == "unsupported_content_type"