/ tests / unit / common / a2a / test_inline_vision.py
test_inline_vision.py
  1  """
  2  Tests for inline vision feature — both Layer 1 (force-inline on first message)
  3  and Layer 2 (on-demand via load_artifact tool).
  4  
  5  These tests verify:
  6  1. Image files are inlined when enable_inline_vision=True
  7  2. Non-image files fall back to text metadata
  8  3. Limits (max_inline_vision_images, max_inline_vision_bytes) are enforced
  9  4. The LiteLLM layer creates multipart tool messages for vision data URLs
 10  5. The _sanitize_bytes_in_dict helper works correctly
 11  6. The _vision_image_data_url key is detected and handled in tool responses
 12  """
 13  
 14  import base64
 15  import json
 16  import pytest
 17  from unittest.mock import AsyncMock, MagicMock, patch
 18  from typing import Dict, Any, Optional
 19  
 20  from google.genai import types as adk_types
 21  
 22  # ─── Test helpers ───────────────────────────────────────────────────────────
 23  
 24  def _make_png_bytes(size: int = 100) -> bytes:
 25      """Create fake PNG bytes of a given size."""
 26      # Minimal PNG header + padding
 27      header = b"\x89PNG\r\n\x1a\n"
 28      return header + b"\x00" * (size - len(header))
 29  
 30  
 31  def _make_mock_component(
 32      enable_inline_vision: bool = False,
 33      max_inline_vision_images: int = 5,
 34      max_inline_vision_bytes: int = 20971520,
 35      agent_name: str = "TestAgent",
 36  ):
 37      """Create a mock SamAgentComponent with inline vision config."""
 38      component = MagicMock()
 39      component.log_identifier = "[TestComponent]"
 40      component.enable_inline_vision = enable_inline_vision
 41      component.max_inline_vision_images = max_inline_vision_images
 42      component.max_inline_vision_bytes = max_inline_vision_bytes
 43      component.get_config = MagicMock(side_effect=lambda key, default=None: {
 44          "agent_name": agent_name,
 45          "enable_inline_vision": enable_inline_vision,
 46          "max_inline_vision_images": max_inline_vision_images,
 47          "max_inline_vision_bytes": max_inline_vision_bytes,
 48      }.get(key, default))
 49      component.artifact_service = MagicMock()
 50      return component
 51  
 52  
 53  # ─── Layer 1 Tests: _prepare_a2a_filepart_for_adk ──────────────────────────
 54  
 55  class TestPrepareFilePartForADK:
 56      """Tests for _prepare_a2a_filepart_for_adk with inline vision."""
 57  
 58      @pytest.mark.asyncio
 59      async def test_image_inlined_when_vision_enabled(self):
 60          """When enable_inline_vision=True and file is an image, return inline_data Part."""
 61          from solace_agent_mesh.common.a2a.translation import _prepare_a2a_filepart_for_adk
 62          from a2a.types import FilePart, FileWithBytes
 63  
 64          png_bytes = _make_png_bytes(200)
 65          b64_bytes = base64.b64encode(png_bytes).decode("utf-8")
 66          part = FilePart(file=FileWithBytes(bytes=b64_bytes, name="test.png", mime_type="image/png"))
 67          component = _make_mock_component(enable_inline_vision=True)
 68  
 69          # Mock save_artifact_with_metadata to succeed (patched at source module)
 70          with patch(
 71              "solace_agent_mesh.agent.utils.artifact_helpers.save_artifact_with_metadata",
 72              new_callable=AsyncMock,
 73              return_value={"status": "success", "data_version": 0},
 74          ):
 75              result = await _prepare_a2a_filepart_for_adk(
 76                  part, component, "user1", "session1"
 77              )
 78  
 79          assert result is not None
 80          assert result.inline_data is not None
 81          assert result.inline_data.mime_type == "image/png"
 82          assert result.inline_data.data == png_bytes
 83          assert result.text is None  # Should NOT be text
 84  
 85      @pytest.mark.asyncio
 86      async def test_non_image_returns_text_when_vision_enabled(self):
 87          """Non-image files should still return text metadata even with vision enabled."""
 88          from solace_agent_mesh.common.a2a.translation import _prepare_a2a_filepart_for_adk
 89          from a2a.types import FilePart, FileWithBytes
 90  
 91          csv_bytes = b"col1,col2\nval1,val2"
 92          b64_bytes = base64.b64encode(csv_bytes).decode("utf-8")
 93          part = FilePart(file=FileWithBytes(bytes=b64_bytes, name="data.csv", mime_type="text/csv"))
 94          component = _make_mock_component(enable_inline_vision=True)
 95  
 96          with patch(
 97              "solace_agent_mesh.agent.utils.artifact_helpers.save_artifact_with_metadata",
 98              new_callable=AsyncMock,
 99              return_value={"status": "success", "data_version": 0},
100          ), patch(
101              "solace_agent_mesh.agent.utils.artifact_helpers.load_artifact_content_or_metadata",
102              new_callable=AsyncMock,
103              return_value={"status": "success", "metadata": {"filename": "data.csv"}},
104          ):
105              result = await _prepare_a2a_filepart_for_adk(
106                  part, component, "user1", "session1"
107              )
108  
109          assert result is not None
110          assert result.text is not None  # Should be text metadata
111          assert result.inline_data is None
112  
113      @pytest.mark.asyncio
114      async def test_image_returns_text_when_vision_disabled(self):
115          """Images should return text metadata when enable_inline_vision=False."""
116          from solace_agent_mesh.common.a2a.translation import _prepare_a2a_filepart_for_adk
117          from a2a.types import FilePart, FileWithBytes
118  
119          png_bytes = _make_png_bytes(200)
120          b64_bytes = base64.b64encode(png_bytes).decode("utf-8")
121          part = FilePart(file=FileWithBytes(bytes=b64_bytes, name="test.png", mime_type="image/png"))
122          component = _make_mock_component(enable_inline_vision=False)
123  
124          with patch(
125              "solace_agent_mesh.agent.utils.artifact_helpers.save_artifact_with_metadata",
126              new_callable=AsyncMock,
127              return_value={"status": "success", "data_version": 0},
128          ), patch(
129              "solace_agent_mesh.agent.utils.artifact_helpers.load_artifact_content_or_metadata",
130              new_callable=AsyncMock,
131              return_value={"status": "success", "metadata": {"filename": "test.png"}},
132          ):
133              result = await _prepare_a2a_filepart_for_adk(
134                  part, component, "user1", "session1"
135              )
136  
137          assert result is not None
138          assert result.text is not None  # Should be text metadata
139          assert result.inline_data is None
140  
141  
142  class TestInlineVisionLimits:
143      """Tests for max_inline_vision_images and max_inline_vision_bytes limits."""
144  
145      @pytest.mark.asyncio
146      async def test_max_images_limit_enforced(self):
147          """After max_inline_vision_images, additional images fall back to text."""
148          from solace_agent_mesh.common.a2a.translation import _prepare_a2a_filepart_for_adk
149          from a2a.types import FilePart, FileWithBytes
150  
151          component = _make_mock_component(
152              enable_inline_vision=True,
153              max_inline_vision_images=2,
154          )
155  
156          tracker = {"images_inlined": 0, "bytes_inlined": 0}
157          results = []
158  
159          for i in range(3):
160              png_bytes = _make_png_bytes(100)
161              b64_bytes = base64.b64encode(png_bytes).decode("utf-8")
162              part = FilePart(file=FileWithBytes(
163                  bytes=b64_bytes, name=f"img{i}.png", mime_type="image/png"
164              ))
165  
166              with patch(
167                  "solace_agent_mesh.agent.utils.artifact_helpers.save_artifact_with_metadata",
168                  new_callable=AsyncMock,
169                  return_value={"status": "success", "data_version": 0},
170              ), patch(
171                  "solace_agent_mesh.agent.utils.artifact_helpers.load_artifact_content_or_metadata",
172                  new_callable=AsyncMock,
173                  return_value={"status": "success", "metadata": {"filename": f"img{i}.png"}},
174              ):
175                  result = await _prepare_a2a_filepart_for_adk(
176                      part, component, "user1", "session1",
177                      inline_vision_tracker=tracker,
178                  )
179                  results.append(result)
180  
181          # First 2 should be inline_data
182          assert results[0].inline_data is not None
183          assert results[1].inline_data is not None
184          # Third should fall back to text
185          assert results[2].text is not None
186          assert results[2].inline_data is None
187          # Tracker should show 2 images inlined
188          assert tracker["images_inlined"] == 2
189  
190      @pytest.mark.asyncio
191      async def test_max_bytes_limit_enforced(self):
192          """After max_inline_vision_bytes, additional images fall back to text."""
193          from solace_agent_mesh.common.a2a.translation import _prepare_a2a_filepart_for_adk
194          from a2a.types import FilePart, FileWithBytes
195  
196          component = _make_mock_component(
197              enable_inline_vision=True,
198              max_inline_vision_bytes=50,  # Very small limit — less than one image
199          )
200  
201          tracker = {"images_inlined": 0, "bytes_inlined": 0}
202          results = []
203  
204          for i in range(2):
205              png_bytes = _make_png_bytes(100)
206              b64_bytes = base64.b64encode(png_bytes).decode("utf-8")
207              part = FilePart(file=FileWithBytes(
208                  bytes=b64_bytes, name=f"img{i}.png", mime_type="image/png"
209              ))
210  
211              with patch(
212                  "solace_agent_mesh.agent.utils.artifact_helpers.save_artifact_with_metadata",
213                  new_callable=AsyncMock,
214                  return_value={"status": "success", "data_version": 0},
215              ), patch(
216                  "solace_agent_mesh.agent.utils.artifact_helpers.load_artifact_content_or_metadata",
217                  new_callable=AsyncMock,
218                  return_value={"status": "success", "metadata": {"filename": f"img{i}.png"}},
219              ):
220                  result = await _prepare_a2a_filepart_for_adk(
221                      part, component, "user1", "session1",
222                      inline_vision_tracker=tracker,
223                  )
224                  results.append(result)
225  
226          # First should be inline_data (0 bytes < 50 limit, so it proceeds)
227          assert results[0].inline_data is not None
228          # Second should fall back to text (100 bytes >= 50 limit, exceeded)
229          assert results[1].text is not None
230          assert results[1].inline_data is None
231  
232  
233  # ─── Layer 2 Tests: LiteLLM multipart tool messages ───────────────────────
234  
235  class TestLiteLLMVisionToolMessages:
236      """Tests for _content_to_message_param handling of _vision_image_data_url."""
237  
238      def test_tool_response_with_vision_data_url_creates_tool_plus_user_messages(self):
239          """Tool response with _vision_image_data_url should create tool msg + user msg with image."""
240          from solace_agent_mesh.agent.adk.models.lite_llm import _content_to_message_param
241  
242          data_url = "data:image/png;base64,iVBORw0KGgo="
243          response_data = {
244              "status": "success",
245              "message": "Image loaded",
246              "filename": "test.png",
247              "_vision_image_data_url": data_url,
248          }
249  
250          content = adk_types.Content(
251              role="tool",
252              parts=[
253                  adk_types.Part(
254                      function_response=adk_types.FunctionResponse(
255                          id="call_123",
256                          name="load_artifact",
257                          response=response_data,
258                      )
259                  )
260              ],
261          )
262  
263          result = _content_to_message_param(content)
264          # Should be a list of 2 messages: tool + user
265          assert isinstance(result, list)
266          assert len(result) == 2
267  
268          # First: tool message with text-only content
269          tool_msg = result[0]
270          assert tool_msg["role"] == "tool"
271          assert tool_msg["tool_call_id"] == "call_123"
272          assert isinstance(tool_msg["content"], str)
273          text_data = json.loads(tool_msg["content"])
274          assert text_data["status"] == "success"
275          assert "_vision_image_data_url" not in text_data
276  
277          # Second: user message with image
278          user_msg = result[1]
279          assert user_msg["role"] == "user"
280          assert isinstance(user_msg["content"], list)
281          assert len(user_msg["content"]) == 2
282          assert user_msg["content"][0]["type"] == "text"
283          assert user_msg["content"][1]["type"] == "image_url"
284          assert user_msg["content"][1]["image_url"] == {"url": data_url}
285  
286      def test_tool_response_without_vision_data_url_is_text_only(self):
287          """Normal tool response without _vision_image_data_url should be text-only."""
288          from solace_agent_mesh.agent.adk.models.lite_llm import _content_to_message_param
289  
290          response_data = {
291              "status": "success",
292              "message": "Loaded text file",
293              "content": "Hello world",
294          }
295  
296          content = adk_types.Content(
297              role="tool",
298              parts=[
299                  adk_types.Part(
300                      function_response=adk_types.FunctionResponse(
301                          id="call_456",
302                          name="load_artifact",
303                          response=response_data,
304                      )
305                  )
306              ],
307          )
308  
309          result = _content_to_message_param(content)
310          assert isinstance(result, dict)
311          assert result["role"] == "tool"
312          # Content should be a string (not a list)
313          assert isinstance(result["content"], str)
314          parsed = json.loads(result["content"])
315          assert parsed["status"] == "success"
316  
317  
318  # ─── Bytes sanitization tests ─────────────────────────────────────────────
319  
320  def _sanitize_bytes_in_dict(obj):
321      """Local copy of the helper for testing (callbacks.py has heavy deps)."""
322      if isinstance(obj, dict):
323          for key, value in obj.items():
324              if isinstance(value, (bytes, bytearray)):
325                  obj[key] = f"<binary data: {len(value)} bytes>"
326              elif isinstance(value, (dict, list)):
327                  _sanitize_bytes_in_dict(value)
328      elif isinstance(obj, list):
329          for i, item in enumerate(obj):
330              if isinstance(item, (bytes, bytearray)):
331                  obj[i] = f"<binary data: {len(item)} bytes>"
332              elif isinstance(item, (dict, list)):
333                  _sanitize_bytes_in_dict(item)
334  
335  
336  class TestSanitizeBytesInDict:
337      """Tests for _sanitize_bytes_in_dict helper."""
338  
339      def test_sanitizes_bytes_in_flat_dict(self):
340          data = {"text": "hello", "image": b"\x89PNG\r\n\x1a\n" + b"\x00" * 92}
341          _sanitize_bytes_in_dict(data)
342          assert data["text"] == "hello"
343          assert isinstance(data["image"], str)
344          assert "100 bytes" in data["image"]
345  
346      def test_sanitizes_bytes_in_nested_dict(self):
347          data = {"outer": {"inner": b"\x00" * 50}}
348          _sanitize_bytes_in_dict(data)
349          assert isinstance(data["outer"]["inner"], str)
350          assert "50 bytes" in data["outer"]["inner"]
351  
352      def test_sanitizes_bytes_in_list(self):
353          data = [b"\x00" * 10, "text", {"key": b"\x00" * 20}]
354          _sanitize_bytes_in_dict(data)
355          assert isinstance(data[0], str)
356          assert "10 bytes" in data[0]
357          assert data[1] == "text"
358          assert isinstance(data[2]["key"], str)
359          assert "20 bytes" in data[2]["key"]
360  
361      def test_no_change_for_dict_without_bytes(self):
362          data = {"text": "hello", "number": 42, "nested": {"key": "value"}}
363          original = json.dumps(data)
364          _sanitize_bytes_in_dict(data)
365          assert json.dumps(data) == original
366  
367  
368  # ─── Image artifact detection tests ───────────────────────────────────────
369  
370  class TestIsImageArtifact:
371      """Tests for is_image_artifact helper used in inline vision."""
372  
373      def test_png_detected(self):
374          from solace_agent_mesh.common.utils.mime_helpers import is_image_artifact
375          assert is_image_artifact("photo.png", "image/png") is True
376  
377      def test_jpg_detected(self):
378          from solace_agent_mesh.common.utils.mime_helpers import is_image_artifact
379          assert is_image_artifact("photo.jpg", "image/jpeg") is True
380  
381      def test_webp_detected(self):
382          from solace_agent_mesh.common.utils.mime_helpers import is_image_artifact
383          assert is_image_artifact("photo.webp", "image/webp") is True
384  
385      def test_svg_excluded(self):
386          from solace_agent_mesh.common.utils.mime_helpers import is_image_artifact
387          assert is_image_artifact("diagram.svg", "image/svg+xml") is False
388  
389      def test_csv_not_image(self):
390          from solace_agent_mesh.common.utils.mime_helpers import is_image_artifact
391          assert is_image_artifact("data.csv", "text/csv") is False
392  
393      def test_filename_fallback_when_no_mime(self):
394          from solace_agent_mesh.common.utils.mime_helpers import is_image_artifact
395          assert is_image_artifact("photo.png", None) is True
396  
397      def test_non_image_filename_when_no_mime(self):
398          from solace_agent_mesh.common.utils.mime_helpers import is_image_artifact
399          assert is_image_artifact("data.csv", None) is False