test_inline_vision.py
1 """ 2 Tests for inline vision feature — both Layer 1 (force-inline on first message) 3 and Layer 2 (on-demand via load_artifact tool). 4 5 These tests verify: 6 1. Image files are inlined when enable_inline_vision=True 7 2. Non-image files fall back to text metadata 8 3. Limits (max_inline_vision_images, max_inline_vision_bytes) are enforced 9 4. The LiteLLM layer creates multipart tool messages for vision data URLs 10 5. The _sanitize_bytes_in_dict helper works correctly 11 6. The _vision_image_data_url key is detected and handled in tool responses 12 """ 13 14 import base64 15 import json 16 import pytest 17 from unittest.mock import AsyncMock, MagicMock, patch 18 from typing import Dict, Any, Optional 19 20 from google.genai import types as adk_types 21 22 # ─── Test helpers ─────────────────────────────────────────────────────────── 23 24 def _make_png_bytes(size: int = 100) -> bytes: 25 """Create fake PNG bytes of a given size.""" 26 # Minimal PNG header + padding 27 header = b"\x89PNG\r\n\x1a\n" 28 return header + b"\x00" * (size - len(header)) 29 30 31 def _make_mock_component( 32 enable_inline_vision: bool = False, 33 max_inline_vision_images: int = 5, 34 max_inline_vision_bytes: int = 20971520, 35 agent_name: str = "TestAgent", 36 ): 37 """Create a mock SamAgentComponent with inline vision config.""" 38 component = MagicMock() 39 component.log_identifier = "[TestComponent]" 40 component.enable_inline_vision = enable_inline_vision 41 component.max_inline_vision_images = max_inline_vision_images 42 component.max_inline_vision_bytes = max_inline_vision_bytes 43 component.get_config = MagicMock(side_effect=lambda key, default=None: { 44 "agent_name": agent_name, 45 "enable_inline_vision": enable_inline_vision, 46 "max_inline_vision_images": max_inline_vision_images, 47 "max_inline_vision_bytes": max_inline_vision_bytes, 48 }.get(key, default)) 49 component.artifact_service = MagicMock() 50 return component 51 52 53 # ─── Layer 1 Tests: _prepare_a2a_filepart_for_adk ────────────────────────── 54 55 class TestPrepareFilePartForADK: 56 """Tests for _prepare_a2a_filepart_for_adk with inline vision.""" 57 58 @pytest.mark.asyncio 59 async def test_image_inlined_when_vision_enabled(self): 60 """When enable_inline_vision=True and file is an image, return inline_data Part.""" 61 from solace_agent_mesh.common.a2a.translation import _prepare_a2a_filepart_for_adk 62 from a2a.types import FilePart, FileWithBytes 63 64 png_bytes = _make_png_bytes(200) 65 b64_bytes = base64.b64encode(png_bytes).decode("utf-8") 66 part = FilePart(file=FileWithBytes(bytes=b64_bytes, name="test.png", mime_type="image/png")) 67 component = _make_mock_component(enable_inline_vision=True) 68 69 # Mock save_artifact_with_metadata to succeed (patched at source module) 70 with patch( 71 "solace_agent_mesh.agent.utils.artifact_helpers.save_artifact_with_metadata", 72 new_callable=AsyncMock, 73 return_value={"status": "success", "data_version": 0}, 74 ): 75 result = await _prepare_a2a_filepart_for_adk( 76 part, component, "user1", "session1" 77 ) 78 79 assert result is not None 80 assert result.inline_data is not None 81 assert result.inline_data.mime_type == "image/png" 82 assert result.inline_data.data == png_bytes 83 assert result.text is None # Should NOT be text 84 85 @pytest.mark.asyncio 86 async def test_non_image_returns_text_when_vision_enabled(self): 87 """Non-image files should still return text metadata even with vision enabled.""" 88 from solace_agent_mesh.common.a2a.translation import _prepare_a2a_filepart_for_adk 89 from a2a.types import FilePart, FileWithBytes 90 91 csv_bytes = b"col1,col2\nval1,val2" 92 b64_bytes = base64.b64encode(csv_bytes).decode("utf-8") 93 part = FilePart(file=FileWithBytes(bytes=b64_bytes, name="data.csv", mime_type="text/csv")) 94 component = _make_mock_component(enable_inline_vision=True) 95 96 with patch( 97 "solace_agent_mesh.agent.utils.artifact_helpers.save_artifact_with_metadata", 98 new_callable=AsyncMock, 99 return_value={"status": "success", "data_version": 0}, 100 ), patch( 101 "solace_agent_mesh.agent.utils.artifact_helpers.load_artifact_content_or_metadata", 102 new_callable=AsyncMock, 103 return_value={"status": "success", "metadata": {"filename": "data.csv"}}, 104 ): 105 result = await _prepare_a2a_filepart_for_adk( 106 part, component, "user1", "session1" 107 ) 108 109 assert result is not None 110 assert result.text is not None # Should be text metadata 111 assert result.inline_data is None 112 113 @pytest.mark.asyncio 114 async def test_image_returns_text_when_vision_disabled(self): 115 """Images should return text metadata when enable_inline_vision=False.""" 116 from solace_agent_mesh.common.a2a.translation import _prepare_a2a_filepart_for_adk 117 from a2a.types import FilePart, FileWithBytes 118 119 png_bytes = _make_png_bytes(200) 120 b64_bytes = base64.b64encode(png_bytes).decode("utf-8") 121 part = FilePart(file=FileWithBytes(bytes=b64_bytes, name="test.png", mime_type="image/png")) 122 component = _make_mock_component(enable_inline_vision=False) 123 124 with patch( 125 "solace_agent_mesh.agent.utils.artifact_helpers.save_artifact_with_metadata", 126 new_callable=AsyncMock, 127 return_value={"status": "success", "data_version": 0}, 128 ), patch( 129 "solace_agent_mesh.agent.utils.artifact_helpers.load_artifact_content_or_metadata", 130 new_callable=AsyncMock, 131 return_value={"status": "success", "metadata": {"filename": "test.png"}}, 132 ): 133 result = await _prepare_a2a_filepart_for_adk( 134 part, component, "user1", "session1" 135 ) 136 137 assert result is not None 138 assert result.text is not None # Should be text metadata 139 assert result.inline_data is None 140 141 142 class TestInlineVisionLimits: 143 """Tests for max_inline_vision_images and max_inline_vision_bytes limits.""" 144 145 @pytest.mark.asyncio 146 async def test_max_images_limit_enforced(self): 147 """After max_inline_vision_images, additional images fall back to text.""" 148 from solace_agent_mesh.common.a2a.translation import _prepare_a2a_filepart_for_adk 149 from a2a.types import FilePart, FileWithBytes 150 151 component = _make_mock_component( 152 enable_inline_vision=True, 153 max_inline_vision_images=2, 154 ) 155 156 tracker = {"images_inlined": 0, "bytes_inlined": 0} 157 results = [] 158 159 for i in range(3): 160 png_bytes = _make_png_bytes(100) 161 b64_bytes = base64.b64encode(png_bytes).decode("utf-8") 162 part = FilePart(file=FileWithBytes( 163 bytes=b64_bytes, name=f"img{i}.png", mime_type="image/png" 164 )) 165 166 with patch( 167 "solace_agent_mesh.agent.utils.artifact_helpers.save_artifact_with_metadata", 168 new_callable=AsyncMock, 169 return_value={"status": "success", "data_version": 0}, 170 ), patch( 171 "solace_agent_mesh.agent.utils.artifact_helpers.load_artifact_content_or_metadata", 172 new_callable=AsyncMock, 173 return_value={"status": "success", "metadata": {"filename": f"img{i}.png"}}, 174 ): 175 result = await _prepare_a2a_filepart_for_adk( 176 part, component, "user1", "session1", 177 inline_vision_tracker=tracker, 178 ) 179 results.append(result) 180 181 # First 2 should be inline_data 182 assert results[0].inline_data is not None 183 assert results[1].inline_data is not None 184 # Third should fall back to text 185 assert results[2].text is not None 186 assert results[2].inline_data is None 187 # Tracker should show 2 images inlined 188 assert tracker["images_inlined"] == 2 189 190 @pytest.mark.asyncio 191 async def test_max_bytes_limit_enforced(self): 192 """After max_inline_vision_bytes, additional images fall back to text.""" 193 from solace_agent_mesh.common.a2a.translation import _prepare_a2a_filepart_for_adk 194 from a2a.types import FilePart, FileWithBytes 195 196 component = _make_mock_component( 197 enable_inline_vision=True, 198 max_inline_vision_bytes=50, # Very small limit — less than one image 199 ) 200 201 tracker = {"images_inlined": 0, "bytes_inlined": 0} 202 results = [] 203 204 for i in range(2): 205 png_bytes = _make_png_bytes(100) 206 b64_bytes = base64.b64encode(png_bytes).decode("utf-8") 207 part = FilePart(file=FileWithBytes( 208 bytes=b64_bytes, name=f"img{i}.png", mime_type="image/png" 209 )) 210 211 with patch( 212 "solace_agent_mesh.agent.utils.artifact_helpers.save_artifact_with_metadata", 213 new_callable=AsyncMock, 214 return_value={"status": "success", "data_version": 0}, 215 ), patch( 216 "solace_agent_mesh.agent.utils.artifact_helpers.load_artifact_content_or_metadata", 217 new_callable=AsyncMock, 218 return_value={"status": "success", "metadata": {"filename": f"img{i}.png"}}, 219 ): 220 result = await _prepare_a2a_filepart_for_adk( 221 part, component, "user1", "session1", 222 inline_vision_tracker=tracker, 223 ) 224 results.append(result) 225 226 # First should be inline_data (0 bytes < 50 limit, so it proceeds) 227 assert results[0].inline_data is not None 228 # Second should fall back to text (100 bytes >= 50 limit, exceeded) 229 assert results[1].text is not None 230 assert results[1].inline_data is None 231 232 233 # ─── Layer 2 Tests: LiteLLM multipart tool messages ─────────────────────── 234 235 class TestLiteLLMVisionToolMessages: 236 """Tests for _content_to_message_param handling of _vision_image_data_url.""" 237 238 def test_tool_response_with_vision_data_url_creates_tool_plus_user_messages(self): 239 """Tool response with _vision_image_data_url should create tool msg + user msg with image.""" 240 from solace_agent_mesh.agent.adk.models.lite_llm import _content_to_message_param 241 242 data_url = "data:image/png;base64,iVBORw0KGgo=" 243 response_data = { 244 "status": "success", 245 "message": "Image loaded", 246 "filename": "test.png", 247 "_vision_image_data_url": data_url, 248 } 249 250 content = adk_types.Content( 251 role="tool", 252 parts=[ 253 adk_types.Part( 254 function_response=adk_types.FunctionResponse( 255 id="call_123", 256 name="load_artifact", 257 response=response_data, 258 ) 259 ) 260 ], 261 ) 262 263 result = _content_to_message_param(content) 264 # Should be a list of 2 messages: tool + user 265 assert isinstance(result, list) 266 assert len(result) == 2 267 268 # First: tool message with text-only content 269 tool_msg = result[0] 270 assert tool_msg["role"] == "tool" 271 assert tool_msg["tool_call_id"] == "call_123" 272 assert isinstance(tool_msg["content"], str) 273 text_data = json.loads(tool_msg["content"]) 274 assert text_data["status"] == "success" 275 assert "_vision_image_data_url" not in text_data 276 277 # Second: user message with image 278 user_msg = result[1] 279 assert user_msg["role"] == "user" 280 assert isinstance(user_msg["content"], list) 281 assert len(user_msg["content"]) == 2 282 assert user_msg["content"][0]["type"] == "text" 283 assert user_msg["content"][1]["type"] == "image_url" 284 assert user_msg["content"][1]["image_url"] == {"url": data_url} 285 286 def test_tool_response_without_vision_data_url_is_text_only(self): 287 """Normal tool response without _vision_image_data_url should be text-only.""" 288 from solace_agent_mesh.agent.adk.models.lite_llm import _content_to_message_param 289 290 response_data = { 291 "status": "success", 292 "message": "Loaded text file", 293 "content": "Hello world", 294 } 295 296 content = adk_types.Content( 297 role="tool", 298 parts=[ 299 adk_types.Part( 300 function_response=adk_types.FunctionResponse( 301 id="call_456", 302 name="load_artifact", 303 response=response_data, 304 ) 305 ) 306 ], 307 ) 308 309 result = _content_to_message_param(content) 310 assert isinstance(result, dict) 311 assert result["role"] == "tool" 312 # Content should be a string (not a list) 313 assert isinstance(result["content"], str) 314 parsed = json.loads(result["content"]) 315 assert parsed["status"] == "success" 316 317 318 # ─── Bytes sanitization tests ───────────────────────────────────────────── 319 320 def _sanitize_bytes_in_dict(obj): 321 """Local copy of the helper for testing (callbacks.py has heavy deps).""" 322 if isinstance(obj, dict): 323 for key, value in obj.items(): 324 if isinstance(value, (bytes, bytearray)): 325 obj[key] = f"<binary data: {len(value)} bytes>" 326 elif isinstance(value, (dict, list)): 327 _sanitize_bytes_in_dict(value) 328 elif isinstance(obj, list): 329 for i, item in enumerate(obj): 330 if isinstance(item, (bytes, bytearray)): 331 obj[i] = f"<binary data: {len(item)} bytes>" 332 elif isinstance(item, (dict, list)): 333 _sanitize_bytes_in_dict(item) 334 335 336 class TestSanitizeBytesInDict: 337 """Tests for _sanitize_bytes_in_dict helper.""" 338 339 def test_sanitizes_bytes_in_flat_dict(self): 340 data = {"text": "hello", "image": b"\x89PNG\r\n\x1a\n" + b"\x00" * 92} 341 _sanitize_bytes_in_dict(data) 342 assert data["text"] == "hello" 343 assert isinstance(data["image"], str) 344 assert "100 bytes" in data["image"] 345 346 def test_sanitizes_bytes_in_nested_dict(self): 347 data = {"outer": {"inner": b"\x00" * 50}} 348 _sanitize_bytes_in_dict(data) 349 assert isinstance(data["outer"]["inner"], str) 350 assert "50 bytes" in data["outer"]["inner"] 351 352 def test_sanitizes_bytes_in_list(self): 353 data = [b"\x00" * 10, "text", {"key": b"\x00" * 20}] 354 _sanitize_bytes_in_dict(data) 355 assert isinstance(data[0], str) 356 assert "10 bytes" in data[0] 357 assert data[1] == "text" 358 assert isinstance(data[2]["key"], str) 359 assert "20 bytes" in data[2]["key"] 360 361 def test_no_change_for_dict_without_bytes(self): 362 data = {"text": "hello", "number": 42, "nested": {"key": "value"}} 363 original = json.dumps(data) 364 _sanitize_bytes_in_dict(data) 365 assert json.dumps(data) == original 366 367 368 # ─── Image artifact detection tests ─────────────────────────────────────── 369 370 class TestIsImageArtifact: 371 """Tests for is_image_artifact helper used in inline vision.""" 372 373 def test_png_detected(self): 374 from solace_agent_mesh.common.utils.mime_helpers import is_image_artifact 375 assert is_image_artifact("photo.png", "image/png") is True 376 377 def test_jpg_detected(self): 378 from solace_agent_mesh.common.utils.mime_helpers import is_image_artifact 379 assert is_image_artifact("photo.jpg", "image/jpeg") is True 380 381 def test_webp_detected(self): 382 from solace_agent_mesh.common.utils.mime_helpers import is_image_artifact 383 assert is_image_artifact("photo.webp", "image/webp") is True 384 385 def test_svg_excluded(self): 386 from solace_agent_mesh.common.utils.mime_helpers import is_image_artifact 387 assert is_image_artifact("diagram.svg", "image/svg+xml") is False 388 389 def test_csv_not_image(self): 390 from solace_agent_mesh.common.utils.mime_helpers import is_image_artifact 391 assert is_image_artifact("data.csv", "text/csv") is False 392 393 def test_filename_fallback_when_no_mime(self): 394 from solace_agent_mesh.common.utils.mime_helpers import is_image_artifact 395 assert is_image_artifact("photo.png", None) is True 396 397 def test_non_image_filename_when_no_mime(self): 398 from solace_agent_mesh.common.utils.mime_helpers import is_image_artifact 399 assert is_image_artifact("data.csv", None) is False