test_genai_semconv_converter.py
1 import base64 2 import json 3 from pathlib import Path 4 from unittest import mock 5 6 import boto3 7 import pytest 8 9 import mlflow 10 from mlflow.bedrock.genai_semconv_converter import _convert_image 11 from mlflow.tracing.constant import GenAiSemconvKey 12 13 from tests.tracing.helper import capture_otel_export, reset_autolog_state # noqa: F401 14 15 _ANTHROPIC_MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0" 16 17 18 @pytest.fixture(autouse=True) 19 def enable_genai_semconv(monkeypatch): 20 monkeypatch.setenv("MLFLOW_ENABLE_OTEL_GENAI_SEMCONV", "true") 21 return 22 23 24 def _get_chat_span(exporter, processor): 25 processor.force_flush(timeout_millis=5000) 26 spans = exporter.get_finished_spans() 27 return next(s for s in spans if s.attributes.get(GenAiSemconvKey.OPERATION_NAME) == "chat") 28 29 30 # ── End-to-end autolog integration tests ───────────────────────────────────── 31 32 33 _CONVERSE_REQUEST = { 34 "modelId": _ANTHROPIC_MODEL_ID, 35 "messages": [{"role": "user", "content": [{"text": "Hi"}]}], 36 "inferenceConfig": {"maxTokens": 300, "temperature": 0.1, "topP": 0.9}, 37 } 38 39 _CONVERSE_RESPONSE = { 40 "output": { 41 "message": {"role": "assistant", "content": [{"text": "Hello! How can I help you today?"}]}, 42 }, 43 "stopReason": "end_turn", 44 "usage": {"inputTokens": 8, "outputTokens": 12}, 45 "metrics": {"latencyMs": 551}, 46 } 47 48 _CONVERSE_TOOL_CALLING_REQUEST = { 49 "modelId": _ANTHROPIC_MODEL_ID, 50 "messages": [ 51 {"role": "user", "content": [{"text": "What's the weather?"}]}, 52 { 53 "role": "assistant", 54 "content": [ 55 {"text": "Let me check."}, 56 { 57 "toolUse": { 58 "toolUseId": "tool_1", 59 "name": "get_weather", 60 "input": {"city": "SF"}, 61 } 62 }, 63 ], 64 }, 65 { 66 "role": "user", 67 "content": [ 68 {"toolResult": {"toolUseId": "tool_1", "content": [{"json": {"temp": 72}}]}} 69 ], 70 }, 71 ], 72 "toolConfig": { 73 "tools": [ 74 { 75 "toolSpec": { 76 "name": "get_weather", 77 "description": "Get weather", 78 "inputSchema": { 79 "json": {"type": "object", "properties": {"city": {"type": "string"}}} 80 }, 81 } 82 } 83 ] 84 }, 85 } 86 87 _CONVERSE_TOOL_CALLING_RESPONSE = { 88 "output": { 89 "message": {"role": "assistant", "content": [{"text": "It's 72°F in SF."}]}, 90 }, 91 "stopReason": "end_turn", 92 "usage": {"inputTokens": 20, "outputTokens": 15}, 93 } 94 95 96 @pytest.mark.usefixtures("reset_autolog_state") 97 def test_autolog_basic(capture_otel_export): 98 exporter, processor = capture_otel_export 99 100 mlflow.bedrock.autolog() 101 client = boto3.client("bedrock-runtime", region_name="us-west-2") 102 103 with mock.patch("botocore.client.BaseClient._make_api_call", return_value=_CONVERSE_RESPONSE): 104 client.converse(**_CONVERSE_REQUEST) 105 106 chat_span = _get_chat_span(exporter, processor) 107 assert chat_span.attributes[GenAiSemconvKey.OPERATION_NAME] == "chat" 108 assert chat_span.attributes[GenAiSemconvKey.REQUEST_MODEL] == _ANTHROPIC_MODEL_ID 109 assert chat_span.attributes[GenAiSemconvKey.REQUEST_TEMPERATURE] == 0.1 110 assert chat_span.attributes[GenAiSemconvKey.REQUEST_MAX_TOKENS] == 300 111 assert chat_span.attributes[GenAiSemconvKey.REQUEST_TOP_P] == 0.9 112 113 input_msgs = json.loads(chat_span.attributes[GenAiSemconvKey.INPUT_MESSAGES]) 114 assert input_msgs[0]["role"] == "user" 115 assert input_msgs[0]["parts"][0] == {"type": "text", "content": "Hi"} 116 117 output_msgs = json.loads(chat_span.attributes[GenAiSemconvKey.OUTPUT_MESSAGES]) 118 assert len(output_msgs) == 1 119 assert output_msgs[0]["role"] == "assistant" 120 assert output_msgs[0]["parts"][0] == { 121 "type": "text", 122 "content": "Hello! How can I help you today?", 123 } 124 assert chat_span.attributes[GenAiSemconvKey.USAGE_INPUT_TOKENS] == 8 125 assert chat_span.attributes[GenAiSemconvKey.USAGE_OUTPUT_TOKENS] == 12 126 assert not any(k.startswith("mlflow.") for k in chat_span.attributes) 127 128 129 @pytest.mark.usefixtures("reset_autolog_state") 130 def test_autolog_with_tool_calls(capture_otel_export): 131 exporter, processor = capture_otel_export 132 133 mlflow.bedrock.autolog() 134 client = boto3.client("bedrock-runtime", region_name="us-west-2") 135 136 with mock.patch( 137 "botocore.client.BaseClient._make_api_call", 138 return_value=_CONVERSE_TOOL_CALLING_RESPONSE, 139 ): 140 client.converse(**_CONVERSE_TOOL_CALLING_REQUEST) 141 142 chat_span = _get_chat_span(exporter, processor) 143 144 input_msgs = json.loads(chat_span.attributes[GenAiSemconvKey.INPUT_MESSAGES]) 145 assert len(input_msgs) == 3 146 assert input_msgs[0]["role"] == "user" 147 assert input_msgs[0]["parts"][0]["content"] == "What's the weather?" 148 149 assert input_msgs[1]["role"] == "assistant" 150 assert input_msgs[1]["parts"][0] == {"type": "text", "content": "Let me check."} 151 assert input_msgs[1]["parts"][1] == { 152 "type": "tool_call", 153 "id": "tool_1", 154 "name": "get_weather", 155 "arguments": {"city": "SF"}, 156 } 157 158 assert input_msgs[2]["role"] == "tool" 159 assert input_msgs[2]["parts"][0] == { 160 "type": "tool_call_response", 161 "id": "tool_1", 162 "result": '{"temp": 72}', 163 } 164 165 tool_defs = json.loads(chat_span.attributes[GenAiSemconvKey.TOOL_DEFINITIONS]) 166 assert len(tool_defs) == 1 167 assert tool_defs[0]["name"] == "get_weather" 168 assert "function" not in tool_defs[0] 169 170 assert not any(k.startswith("mlflow.") for k in chat_span.attributes) 171 172 173 @pytest.mark.usefixtures("reset_autolog_state") 174 def test_autolog_with_system_instructions(capture_otel_export): 175 exporter, processor = capture_otel_export 176 177 mlflow.bedrock.autolog() 178 client = boto3.client("bedrock-runtime", region_name="us-west-2") 179 180 request = {**_CONVERSE_REQUEST, "system": [{"text": "You are a helpful assistant."}]} 181 182 with mock.patch("botocore.client.BaseClient._make_api_call", return_value=_CONVERSE_RESPONSE): 183 client.converse(**request) 184 185 chat_span = _get_chat_span(exporter, processor) 186 system = json.loads(chat_span.attributes[GenAiSemconvKey.SYSTEM_INSTRUCTIONS]) 187 assert system == [{"type": "text", "content": "You are a helpful assistant."}] 188 189 190 @pytest.mark.usefixtures("reset_autolog_state") 191 def test_autolog_with_image(capture_otel_export): 192 exporter, processor = capture_otel_export 193 194 mlflow.bedrock.autolog() 195 client = boto3.client("bedrock-runtime", region_name="us-west-2") 196 197 image_path = Path(__file__).parent.parent / "resources" / "images" / "test.png" 198 with open(image_path, "rb") as f: 199 image_bytes = f.read() 200 201 request = { 202 "modelId": _ANTHROPIC_MODEL_ID, 203 "messages": [ 204 { 205 "role": "user", 206 "content": [ 207 {"text": "What text is in this image?"}, 208 {"image": {"format": "png", "source": {"bytes": image_bytes}}}, 209 ], 210 } 211 ], 212 } 213 response = { 214 "output": {"message": {"role": "assistant", "content": [{"text": "MLflow"}]}}, 215 "stopReason": "end_turn", 216 "usage": {"inputTokens": 100, "outputTokens": 2}, 217 } 218 219 with mock.patch("botocore.client.BaseClient._make_api_call", return_value=response): 220 client.converse(**request) 221 222 chat_span = _get_chat_span(exporter, processor) 223 input_msgs = json.loads(chat_span.attributes[GenAiSemconvKey.INPUT_MESSAGES]) 224 assert input_msgs[0]["parts"][0] == {"type": "text", "content": "What text is in this image?"} 225 image_part = input_msgs[0]["parts"][1] 226 assert image_part["type"] == "blob" 227 assert image_part["modality"] == "image" 228 assert image_part["mime_type"] == "image/png" 229 assert image_part["content"] 230 231 232 # ── Multimodal content conversion tests ────────────────────────────────────── 233 234 235 @pytest.mark.parametrize( 236 ("image", "expected"), 237 [ 238 # Raw bytes 239 ( 240 {"format": "jpeg", "source": {"bytes": b"\xff\xd8"}}, 241 { 242 "type": "blob", 243 "modality": "image", 244 "mime_type": "image/jpeg", 245 "content": base64.b64encode(b"\xff\xd8").decode("utf-8"), 246 }, 247 ), 248 # Base64 string passthrough 249 ( 250 {"format": "png", "source": {"bytes": "abc123"}}, 251 {"type": "blob", "modality": "image", "mime_type": "image/png", "content": "abc123"}, 252 ), 253 ], 254 ) 255 def test_convert_image(image, expected): 256 assert _convert_image(image) == expected