test_genai_semconv_converter.py
1 import importlib.metadata 2 import json 3 4 import openai 5 import pytest 6 from packaging.version import Version 7 8 import mlflow 9 from mlflow.openai.genai_semconv_converter import _convert_content, _convert_message 10 11 from tests.tracing.helper import capture_otel_export, reset_autolog_state # noqa: F401 12 13 MODEL = "gpt-4o-mini" 14 15 _openai_version = Version(importlib.metadata.version("openai")) 16 requires_responses_api = pytest.mark.skipif( 17 _openai_version < Version("1.66.0"), 18 reason="OpenAI < 1.66.0 does not support the Responses API", 19 ) 20 21 MOCK_CHAT_TOOLS = [ 22 { 23 "type": "function", 24 "function": { 25 "name": "get_weather", 26 "description": "Get weather", 27 "parameters": { 28 "type": "object", 29 "properties": {"city": {"type": "string"}}, 30 }, 31 }, 32 } 33 ] 34 35 MOCK_RESPONSES_TOOLS = [ 36 { 37 "type": "function", 38 "name": "get_current_weather", 39 "description": "Get the current weather", 40 "parameters": { 41 "type": "object", 42 "properties": { 43 "location": {"type": "string"}, 44 "unit": {"type": "string"}, 45 }, 46 }, 47 } 48 ] 49 50 51 @pytest.fixture(autouse=True) 52 def enable_genai_semconv(monkeypatch): 53 monkeypatch.setenv("MLFLOW_ENABLE_OTEL_GENAI_SEMCONV", "true") 54 return 55 56 57 @pytest.fixture 58 def client(mock_openai, monkeypatch): 59 monkeypatch.setenv("OPENAI_API_KEY", "test") 60 return openai.OpenAI(api_key="test", base_url=mock_openai) 61 62 63 def _get_chat_span(exporter, processor): 64 processor.force_flush(timeout_millis=5000) 65 spans = exporter.get_finished_spans() 66 return next(s for s in spans if s.attributes.get("gen_ai.operation.name") == "chat") 67 68 69 @pytest.mark.parametrize( 70 "api", 71 ["chat_completions", pytest.param("responses", marks=requires_responses_api)], 72 ) 73 @pytest.mark.usefixtures("reset_autolog_state") 74 def test_autolog_basic(client, capture_otel_export, api): 75 exporter, processor = capture_otel_export 76 77 mlflow.openai.autolog() 78 if api == "chat_completions": 79 client.chat.completions.create( 80 messages=[{"role": "user", "content": "Hi"}], 81 model=MODEL, 82 temperature=0.5, 83 top_p=0.9, 84 max_tokens=100, 85 stop=["\n", "END"], 86 ) 87 else: 88 client.responses.create(input="Hi", model=MODEL, temperature=0.5) 89 90 chat_span = _get_chat_span(exporter, processor) 91 assert chat_span.attributes["gen_ai.operation.name"] == "chat" 92 assert chat_span.attributes["gen_ai.request.model"] == MODEL 93 assert chat_span.attributes["gen_ai.request.temperature"] == 0.5 94 95 if api == "chat_completions": 96 assert chat_span.attributes["gen_ai.request.top_p"] == 0.9 97 assert chat_span.attributes["gen_ai.request.max_tokens"] == 100 98 assert list(chat_span.attributes["gen_ai.request.stop_sequences"]) == ["\n", "END"] 99 100 input_msgs = json.loads(chat_span.attributes["gen_ai.input.messages"]) 101 assert input_msgs[0]["role"] == "user" 102 assert input_msgs[0]["parts"][0]["type"] == "text" 103 assert input_msgs[0]["parts"][0]["content"] == "Hi" 104 105 output_msgs = json.loads(chat_span.attributes["gen_ai.output.messages"]) 106 assert len(output_msgs) == 1 107 assert output_msgs[0]["role"] == "assistant" 108 109 assert chat_span.attributes["gen_ai.response.model"] == MODEL 110 assert not any(k.startswith("mlflow.") for k in chat_span.attributes) 111 112 113 @pytest.mark.parametrize( 114 "api", 115 ["chat_completions", pytest.param("responses", marks=requires_responses_api)], 116 ) 117 @pytest.mark.usefixtures("reset_autolog_state") 118 def test_autolog_with_tool_calls(client, capture_otel_export, api): 119 exporter, processor = capture_otel_export 120 121 mlflow.openai.autolog() 122 if api == "chat_completions": 123 client.chat.completions.create( 124 messages=[ 125 {"role": "system", "content": "Be helpful"}, 126 {"role": "user", "content": "What's the weather in SF?"}, 127 { 128 "role": "assistant", 129 "tool_calls": [ 130 { 131 "id": "call_123", 132 "type": "function", 133 "function": {"name": "get_weather", "arguments": '{"city": "SF"}'}, 134 } 135 ], 136 }, 137 {"role": "tool", "tool_call_id": "call_123", "content": "Sunny"}, 138 ], 139 model=MODEL, 140 tools=MOCK_CHAT_TOOLS, 141 ) 142 else: 143 client.responses.create( 144 input=[ 145 {"role": "user", "content": "What's the weather in SF?"}, 146 { 147 "type": "function_call", 148 "id": "fc_1", 149 "call_id": "call_123", 150 "name": "get_weather", 151 "arguments": '{"city": "SF"}', 152 "status": "completed", 153 }, 154 { 155 "type": "function_call_output", 156 "call_id": "call_123", 157 "output": "Sunny", 158 }, 159 ], 160 model=MODEL, 161 tools=MOCK_CHAT_TOOLS, 162 instructions="Be helpful", 163 ) 164 165 chat_span = _get_chat_span(exporter, processor) 166 assert chat_span.attributes["gen_ai.operation.name"] == "chat" 167 assert chat_span.attributes["gen_ai.request.model"] == MODEL 168 169 tool_defs = json.loads(chat_span.attributes["gen_ai.tool.definitions"]) 170 assert "function" not in tool_defs[0] 171 assert tool_defs[0]["name"] == "get_weather" 172 173 input_msgs = json.loads(chat_span.attributes["gen_ai.input.messages"]) 174 assert input_msgs[0]["role"] == "user" 175 assert input_msgs[0]["parts"][0]["type"] == "text" 176 assert input_msgs[0]["parts"][0]["content"] == "What's the weather in SF?" 177 assert input_msgs[1]["role"] == "assistant" 178 assert input_msgs[1]["parts"][0]["type"] == "tool_call" 179 assert input_msgs[1]["parts"][0]["id"] == "call_123" 180 assert input_msgs[1]["parts"][0]["name"] == "get_weather" 181 assert input_msgs[1]["parts"][0]["arguments"] == {"city": "SF"} 182 assert input_msgs[2]["role"] == "tool" 183 assert input_msgs[2]["parts"][0]["type"] == "tool_call_response" 184 assert input_msgs[2]["parts"][0]["id"] == "call_123" 185 assert input_msgs[2]["parts"][0]["result"] == "Sunny" 186 system_instructions = json.loads(chat_span.attributes["gen_ai.system_instructions"]) 187 assert system_instructions == [{"type": "text", "content": "Be helpful"}] 188 189 output_msgs = json.loads(chat_span.attributes["gen_ai.output.messages"]) 190 assert len(output_msgs) == 1 191 assert output_msgs[0]["role"] == "assistant" 192 193 assert chat_span.attributes["gen_ai.response.model"] == MODEL 194 assert not any(k.startswith("mlflow.") for k in chat_span.attributes) 195 196 197 @pytest.mark.parametrize( 198 "api", 199 ["chat_completions", pytest.param("responses", marks=requires_responses_api)], 200 ) 201 @pytest.mark.usefixtures("reset_autolog_state") 202 def test_autolog_streaming(client, capture_otel_export, api): 203 exporter, processor = capture_otel_export 204 205 mlflow.openai.autolog() 206 if api == "chat_completions": 207 stream = client.chat.completions.create( 208 messages=[{"role": "user", "content": "Hi"}], 209 model=MODEL, 210 stream=True, 211 ) 212 for _ in stream: 213 pass 214 else: 215 stream = client.responses.create(input="Hi", model=MODEL, stream=True) 216 for _ in stream: 217 pass 218 219 chat_span = _get_chat_span(exporter, processor) 220 assert chat_span.attributes["gen_ai.operation.name"] == "chat" 221 assert chat_span.attributes["gen_ai.request.model"] == MODEL 222 223 input_msgs = json.loads(chat_span.attributes["gen_ai.input.messages"]) 224 assert input_msgs[0]["role"] == "user" 225 assert input_msgs[0]["parts"][0]["type"] == "text" 226 assert input_msgs[0]["parts"][0]["content"] == "Hi" 227 228 output_msgs = json.loads(chat_span.attributes["gen_ai.output.messages"]) 229 assert len(output_msgs) == 1 230 assert output_msgs[0]["role"] == "assistant" 231 232 assert chat_span.attributes["gen_ai.response.model"] == MODEL 233 assert not any(k.startswith("mlflow.") for k in chat_span.attributes) 234 235 236 @pytest.mark.parametrize( 237 ("content_item", "expected"), 238 [ 239 # Chat API: image_url with HTTP URL → UriPart 240 ( 241 {"type": "image_url", "image_url": {"url": "https://example.com/img.png"}}, 242 {"type": "uri", "modality": "image", "uri": "https://example.com/img.png"}, 243 ), 244 # Chat API: image_url with data URI → BlobPart 245 ( 246 {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,abc123"}}, 247 {"type": "blob", "modality": "image", "mime_type": "image/jpeg", "content": "abc123"}, 248 ), 249 # Responses API: input_image with HTTP URL → UriPart 250 ( 251 {"type": "input_image", "image_url": "https://example.com/img.png"}, 252 {"type": "uri", "modality": "image", "uri": "https://example.com/img.png"}, 253 ), 254 # Responses API: input_image with data URI → BlobPart 255 ( 256 {"type": "input_image", "image_url": "data:image/png;base64,xyz789"}, 257 {"type": "blob", "modality": "image", "mime_type": "image/png", "content": "xyz789"}, 258 ), 259 # Chat API: input_audio → BlobPart 260 ( 261 {"type": "input_audio", "input_audio": {"data": "audiodata", "format": "wav"}}, 262 {"type": "blob", "modality": "audio", "mime_type": "audio/wav", "content": "audiodata"}, 263 ), 264 # Responses API: input_text → TextPart 265 ( 266 {"type": "input_text", "text": "hello"}, 267 {"type": "text", "content": "hello"}, 268 ), 269 ], 270 ) 271 def test_convert_content_multimodal(content_item, expected): 272 result = _convert_content([content_item]) 273 assert result == [expected] 274 275 276 def test_convert_message_audio_transcript_fallback(): 277 msg = { 278 "role": "assistant", 279 "content": None, 280 "audio": { 281 "id": "audio_abc123", 282 "data": "SGVsbG8=", 283 "expires_at": 9999999999, 284 "transcript": "Yes, I am.", 285 }, 286 } 287 result = _convert_message(msg) 288 assert result == { 289 "role": "assistant", 290 "parts": [{"type": "text", "content": "Yes, I am."}], 291 } 292 293 294 def test_convert_message_audio_no_override(): 295 msg = { 296 "role": "assistant", 297 "content": "I have text.", 298 "audio": { 299 "id": "audio_abc123", 300 "data": "SGVsbG8=", 301 "expires_at": 9999999999, 302 "transcript": "Different transcript.", 303 }, 304 } 305 result = _convert_message(msg) 306 assert result == { 307 "role": "assistant", 308 "parts": [{"type": "text", "content": "I have text."}], 309 } 310 311 312 def test_convert_message_no_audio_no_content(): 313 msg = {"role": "assistant", "content": None} 314 result = _convert_message(msg) 315 assert result == {"role": "assistant", "parts": []}