/ tests / gateway / test_api_server_multimodal.py
test_api_server_multimodal.py
  1  """End-to-end tests for inline image inputs on /v1/chat/completions and /v1/responses.
  2  
  3  Covers the multimodal normalization path added to the API server.  Unlike the
  4  adapter-level tests that patch ``_run_agent``, these tests patch
  5  ``AIAgent.run_conversation`` instead so the adapter's full request-handling
  6  path (including the ``run_agent`` prologue that used to crash on list content)
  7  executes against a real aiohttp app.
  8  """
  9  
 10  from unittest.mock import MagicMock, patch
 11  
 12  import pytest
 13  from aiohttp import web
 14  from aiohttp.test_utils import TestClient, TestServer
 15  
 16  from gateway.config import PlatformConfig
 17  from gateway.platforms.api_server import (
 18      APIServerAdapter,
 19      _content_has_visible_payload,
 20      _normalize_multimodal_content,
 21      cors_middleware,
 22      security_headers_middleware,
 23  )
 24  
 25  
 26  # ---------------------------------------------------------------------------
 27  # Pure-function tests for _normalize_multimodal_content
 28  # ---------------------------------------------------------------------------
 29  
 30  
 31  class TestNormalizeMultimodalContent:
 32      def test_string_passthrough(self):
 33          assert _normalize_multimodal_content("hello") == "hello"
 34  
 35      def test_none_returns_empty_string(self):
 36          assert _normalize_multimodal_content(None) == ""
 37  
 38      def test_text_only_list_collapses_to_string(self):
 39          content = [{"type": "text", "text": "hi"}, {"type": "text", "text": "there"}]
 40          assert _normalize_multimodal_content(content) == "hi\nthere"
 41  
 42      def test_responses_input_text_canonicalized(self):
 43          content = [{"type": "input_text", "text": "hello"}]
 44          assert _normalize_multimodal_content(content) == "hello"
 45  
 46      def test_image_url_preserved_with_text(self):
 47          content = [
 48              {"type": "text", "text": "describe this"},
 49              {"type": "image_url", "image_url": {"url": "https://example.com/cat.png", "detail": "high"}},
 50          ]
 51          out = _normalize_multimodal_content(content)
 52          assert isinstance(out, list)
 53          assert out == [
 54              {"type": "text", "text": "describe this"},
 55              {"type": "image_url", "image_url": {"url": "https://example.com/cat.png", "detail": "high"}},
 56          ]
 57  
 58      def test_input_image_converted_to_canonical_shape(self):
 59          content = [
 60              {"type": "input_text", "text": "hi"},
 61              {"type": "input_image", "image_url": "https://example.com/cat.png"},
 62          ]
 63          out = _normalize_multimodal_content(content)
 64          assert out == [
 65              {"type": "text", "text": "hi"},
 66              {"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}},
 67          ]
 68  
 69      def test_data_image_url_accepted(self):
 70          content = [{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}}]
 71          out = _normalize_multimodal_content(content)
 72          assert out == [{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}}]
 73  
 74      def test_non_image_data_url_rejected(self):
 75          content = [{"type": "image_url", "image_url": {"url": "data:text/plain;base64,SGVsbG8="}}]
 76          with pytest.raises(ValueError) as exc:
 77              _normalize_multimodal_content(content)
 78          assert str(exc.value).startswith("unsupported_content_type:")
 79  
 80      def test_file_part_rejected(self):
 81          with pytest.raises(ValueError) as exc:
 82              _normalize_multimodal_content([{"type": "file", "file": {"file_id": "f_1"}}])
 83          assert str(exc.value).startswith("unsupported_content_type:")
 84  
 85      def test_input_file_part_rejected(self):
 86          with pytest.raises(ValueError) as exc:
 87              _normalize_multimodal_content([{"type": "input_file", "file_id": "f_1"}])
 88          assert str(exc.value).startswith("unsupported_content_type:")
 89  
 90      def test_missing_url_rejected(self):
 91          with pytest.raises(ValueError) as exc:
 92              _normalize_multimodal_content([{"type": "image_url", "image_url": {}}])
 93          assert str(exc.value).startswith("invalid_image_url:")
 94  
 95      def test_bad_scheme_rejected(self):
 96          with pytest.raises(ValueError) as exc:
 97              _normalize_multimodal_content([{"type": "image_url", "image_url": {"url": "ftp://example.com/x.png"}}])
 98          assert str(exc.value).startswith("invalid_image_url:")
 99  
100      def test_unknown_part_type_rejected(self):
101          with pytest.raises(ValueError) as exc:
102              _normalize_multimodal_content([{"type": "audio", "audio": {}}])
103          assert str(exc.value).startswith("unsupported_content_type:")
104  
105  
106  class TestContentHasVisiblePayload:
107      def test_non_empty_string(self):
108          assert _content_has_visible_payload("hello")
109  
110      def test_whitespace_only_string(self):
111          assert not _content_has_visible_payload("   ")
112  
113      def test_list_with_image_only(self):
114          assert _content_has_visible_payload([{"type": "image_url", "image_url": {"url": "x"}}])
115  
116      def test_list_with_only_empty_text(self):
117          assert not _content_has_visible_payload([{"type": "text", "text": ""}])
118  
119  
120  # ---------------------------------------------------------------------------
121  # HTTP integration — real aiohttp client hitting the adapter handlers
122  # ---------------------------------------------------------------------------
123  
124  
125  def _make_adapter() -> APIServerAdapter:
126      return APIServerAdapter(PlatformConfig(enabled=True))
127  
128  
129  def _create_app(adapter: APIServerAdapter) -> web.Application:
130      mws = [mw for mw in (cors_middleware, security_headers_middleware) if mw is not None]
131      app = web.Application(middlewares=mws)
132      app["api_server_adapter"] = adapter
133      app.router.add_post("/v1/chat/completions", adapter._handle_chat_completions)
134      app.router.add_post("/v1/responses", adapter._handle_responses)
135      app.router.add_get("/v1/responses/{response_id}", adapter._handle_get_response)
136      return app
137  
138  
139  @pytest.fixture
140  def adapter():
141      return _make_adapter()
142  
143  
144  class TestChatCompletionsMultimodalHTTP:
145      @pytest.mark.asyncio
146      async def test_inline_image_preserved_to_run_agent(self, adapter):
147          """Multimodal user content reaches _run_agent as a list of parts."""
148          image_payload = [
149              {"type": "text", "text": "What's in this image?"},
150              {"type": "image_url", "image_url": {"url": "https://example.com/cat.png", "detail": "high"}},
151          ]
152  
153          app = _create_app(adapter)
154          async with TestClient(TestServer(app)) as cli:
155              with patch.object(
156                  adapter,
157                  "_run_agent",
158                  new=MagicMock(),
159              ) as mock_run:
160                  async def _stub(**kwargs):
161                      mock_run.captured = kwargs
162                      return (
163                          {"final_response": "A cat.", "messages": [], "api_calls": 1},
164                          {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
165                      )
166                  mock_run.side_effect = _stub
167  
168                  resp = await cli.post(
169                      "/v1/chat/completions",
170                      json={
171                          "model": "hermes-agent",
172                          "messages": [{"role": "user", "content": image_payload}],
173                      },
174                  )
175  
176              assert resp.status == 200, await resp.text()
177              assert mock_run.captured["user_message"] == image_payload
178  
179      @pytest.mark.asyncio
180      async def test_text_only_array_collapses_to_string(self, adapter):
181          """Text-only array becomes a plain string so logging stays unchanged."""
182          app = _create_app(adapter)
183          async with TestClient(TestServer(app)) as cli:
184              with patch.object(adapter, "_run_agent", new=MagicMock()) as mock_run:
185                  async def _stub(**kwargs):
186                      mock_run.captured = kwargs
187                      return (
188                          {"final_response": "ok", "messages": [], "api_calls": 1},
189                          {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
190                      )
191                  mock_run.side_effect = _stub
192  
193                  resp = await cli.post(
194                      "/v1/chat/completions",
195                      json={
196                          "model": "hermes-agent",
197                          "messages": [
198                              {"role": "user", "content": [{"type": "text", "text": "hello"}]},
199                          ],
200                      },
201                  )
202  
203              assert resp.status == 200, await resp.text()
204              assert mock_run.captured["user_message"] == "hello"
205  
206      @pytest.mark.asyncio
207      async def test_file_part_returns_400(self, adapter):
208          app = _create_app(adapter)
209          async with TestClient(TestServer(app)) as cli:
210              resp = await cli.post(
211                  "/v1/chat/completions",
212                  json={
213                      "model": "hermes-agent",
214                      "messages": [
215                          {"role": "user", "content": [{"type": "file", "file": {"file_id": "f_1"}}]},
216                      ],
217                  },
218              )
219              assert resp.status == 400
220              body = await resp.json()
221          assert body["error"]["code"] == "unsupported_content_type"
222          assert body["error"]["param"] == "messages[0].content"
223  
224      @pytest.mark.asyncio
225      async def test_non_image_data_url_returns_400(self, adapter):
226          app = _create_app(adapter)
227          async with TestClient(TestServer(app)) as cli:
228              resp = await cli.post(
229                  "/v1/chat/completions",
230                  json={
231                      "model": "hermes-agent",
232                      "messages": [
233                          {
234                              "role": "user",
235                              "content": [
236                                  {
237                                      "type": "image_url",
238                                      "image_url": {"url": "data:text/plain;base64,SGVsbG8="},
239                                  },
240                              ],
241                          },
242                      ],
243                  },
244              )
245              assert resp.status == 400
246              body = await resp.json()
247          assert body["error"]["code"] == "unsupported_content_type"
248  
249  
250  class TestResponsesMultimodalHTTP:
251      @pytest.mark.asyncio
252      async def test_input_image_canonicalized_and_forwarded(self, adapter):
253          app = _create_app(adapter)
254          async with TestClient(TestServer(app)) as cli:
255              with patch.object(adapter, "_run_agent", new=MagicMock()) as mock_run:
256                  async def _stub(**kwargs):
257                      mock_run.captured = kwargs
258                      return (
259                          {"final_response": "ok", "messages": [], "api_calls": 1},
260                          {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
261                      )
262                  mock_run.side_effect = _stub
263  
264                  resp = await cli.post(
265                      "/v1/responses",
266                      json={
267                          "model": "hermes-agent",
268                          "input": [
269                              {
270                                  "role": "user",
271                                  "content": [
272                                      {"type": "input_text", "text": "Describe."},
273                                      {
274                                          "type": "input_image",
275                                          "image_url": "https://example.com/cat.png",
276                                      },
277                                  ],
278                              }
279                          ],
280                      },
281                  )
282  
283              assert resp.status == 200, await resp.text()
284              expected = [
285                  {"type": "text", "text": "Describe."},
286                  {"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}},
287              ]
288              assert mock_run.captured["user_message"] == expected
289  
290      @pytest.mark.asyncio
291      async def test_input_file_returns_400(self, adapter):
292          app = _create_app(adapter)
293          async with TestClient(TestServer(app)) as cli:
294              resp = await cli.post(
295                  "/v1/responses",
296                  json={
297                      "model": "hermes-agent",
298                      "input": [
299                          {
300                              "role": "user",
301                              "content": [{"type": "input_file", "file_id": "f_1"}],
302                          }
303                      ],
304                  },
305              )
306              assert resp.status == 400
307              body = await resp.json()
308          assert body["error"]["code"] == "unsupported_content_type"