Cradicle Explorer

/ tests / openai / test_genai_semconv_converter.py
test_genai_semconv_converter.py
  1  import importlib.metadata
  2  import json
  3  
  4  import openai
  5  import pytest
  6  from packaging.version import Version
  7  
  8  import mlflow
  9  from mlflow.openai.genai_semconv_converter import _convert_content, _convert_message
 10  
 11  from tests.tracing.helper import capture_otel_export, reset_autolog_state  # noqa: F401
 12  
 13  MODEL = "gpt-4o-mini"
 14  
 15  _openai_version = Version(importlib.metadata.version("openai"))
 16  requires_responses_api = pytest.mark.skipif(
 17      _openai_version < Version("1.66.0"),
 18      reason="OpenAI < 1.66.0 does not support the Responses API",
 19  )
 20  
 21  MOCK_CHAT_TOOLS = [
 22      {
 23          "type": "function",
 24          "function": {
 25              "name": "get_weather",
 26              "description": "Get weather",
 27              "parameters": {
 28                  "type": "object",
 29                  "properties": {"city": {"type": "string"}},
 30              },
 31          },
 32      }
 33  ]
 34  
 35  MOCK_RESPONSES_TOOLS = [
 36      {
 37          "type": "function",
 38          "name": "get_current_weather",
 39          "description": "Get the current weather",
 40          "parameters": {
 41              "type": "object",
 42              "properties": {
 43                  "location": {"type": "string"},
 44                  "unit": {"type": "string"},
 45              },
 46          },
 47      }
 48  ]
 49  
 50  
 51  @pytest.fixture(autouse=True)
 52  def enable_genai_semconv(monkeypatch):
 53      monkeypatch.setenv("MLFLOW_ENABLE_OTEL_GENAI_SEMCONV", "true")
 54      return
 55  
 56  
 57  @pytest.fixture
 58  def client(mock_openai, monkeypatch):
 59      monkeypatch.setenv("OPENAI_API_KEY", "test")
 60      return openai.OpenAI(api_key="test", base_url=mock_openai)
 61  
 62  
 63  def _get_chat_span(exporter, processor):
 64      processor.force_flush(timeout_millis=5000)
 65      spans = exporter.get_finished_spans()
 66      return next(s for s in spans if s.attributes.get("gen_ai.operation.name") == "chat")
 67  
 68  
 69  @pytest.mark.parametrize(
 70      "api",
 71      ["chat_completions", pytest.param("responses", marks=requires_responses_api)],
 72  )
 73  @pytest.mark.usefixtures("reset_autolog_state")
 74  def test_autolog_basic(client, capture_otel_export, api):
 75      exporter, processor = capture_otel_export
 76  
 77      mlflow.openai.autolog()
 78      if api == "chat_completions":
 79          client.chat.completions.create(
 80              messages=[{"role": "user", "content": "Hi"}],
 81              model=MODEL,
 82              temperature=0.5,
 83              top_p=0.9,
 84              max_tokens=100,
 85              stop=["\n", "END"],
 86          )
 87      else:
 88          client.responses.create(input="Hi", model=MODEL, temperature=0.5)
 89  
 90      chat_span = _get_chat_span(exporter, processor)
 91      assert chat_span.attributes["gen_ai.operation.name"] == "chat"
 92      assert chat_span.attributes["gen_ai.request.model"] == MODEL
 93      assert chat_span.attributes["gen_ai.request.temperature"] == 0.5
 94  
 95      if api == "chat_completions":
 96          assert chat_span.attributes["gen_ai.request.top_p"] == 0.9
 97          assert chat_span.attributes["gen_ai.request.max_tokens"] == 100
 98          assert list(chat_span.attributes["gen_ai.request.stop_sequences"]) == ["\n", "END"]
 99  
100      input_msgs = json.loads(chat_span.attributes["gen_ai.input.messages"])
101      assert input_msgs[0]["role"] == "user"
102      assert input_msgs[0]["parts"][0]["type"] == "text"
103      assert input_msgs[0]["parts"][0]["content"] == "Hi"
104  
105      output_msgs = json.loads(chat_span.attributes["gen_ai.output.messages"])
106      assert len(output_msgs) == 1
107      assert output_msgs[0]["role"] == "assistant"
108  
109      assert chat_span.attributes["gen_ai.response.model"] == MODEL
110      assert not any(k.startswith("mlflow.") for k in chat_span.attributes)
111  
112  
113  @pytest.mark.parametrize(
114      "api",
115      ["chat_completions", pytest.param("responses", marks=requires_responses_api)],
116  )
117  @pytest.mark.usefixtures("reset_autolog_state")
118  def test_autolog_with_tool_calls(client, capture_otel_export, api):
119      exporter, processor = capture_otel_export
120  
121      mlflow.openai.autolog()
122      if api == "chat_completions":
123          client.chat.completions.create(
124              messages=[
125                  {"role": "system", "content": "Be helpful"},
126                  {"role": "user", "content": "What's the weather in SF?"},
127                  {
128                      "role": "assistant",
129                      "tool_calls": [
130                          {
131                              "id": "call_123",
132                              "type": "function",
133                              "function": {"name": "get_weather", "arguments": '{"city": "SF"}'},
134                          }
135                      ],
136                  },
137                  {"role": "tool", "tool_call_id": "call_123", "content": "Sunny"},
138              ],
139              model=MODEL,
140              tools=MOCK_CHAT_TOOLS,
141          )
142      else:
143          client.responses.create(
144              input=[
145                  {"role": "user", "content": "What's the weather in SF?"},
146                  {
147                      "type": "function_call",
148                      "id": "fc_1",
149                      "call_id": "call_123",
150                      "name": "get_weather",
151                      "arguments": '{"city": "SF"}',
152                      "status": "completed",
153                  },
154                  {
155                      "type": "function_call_output",
156                      "call_id": "call_123",
157                      "output": "Sunny",
158                  },
159              ],
160              model=MODEL,
161              tools=MOCK_CHAT_TOOLS,
162              instructions="Be helpful",
163          )
164  
165      chat_span = _get_chat_span(exporter, processor)
166      assert chat_span.attributes["gen_ai.operation.name"] == "chat"
167      assert chat_span.attributes["gen_ai.request.model"] == MODEL
168  
169      tool_defs = json.loads(chat_span.attributes["gen_ai.tool.definitions"])
170      assert "function" not in tool_defs[0]
171      assert tool_defs[0]["name"] == "get_weather"
172  
173      input_msgs = json.loads(chat_span.attributes["gen_ai.input.messages"])
174      assert input_msgs[0]["role"] == "user"
175      assert input_msgs[0]["parts"][0]["type"] == "text"
176      assert input_msgs[0]["parts"][0]["content"] == "What's the weather in SF?"
177      assert input_msgs[1]["role"] == "assistant"
178      assert input_msgs[1]["parts"][0]["type"] == "tool_call"
179      assert input_msgs[1]["parts"][0]["id"] == "call_123"
180      assert input_msgs[1]["parts"][0]["name"] == "get_weather"
181      assert input_msgs[1]["parts"][0]["arguments"] == {"city": "SF"}
182      assert input_msgs[2]["role"] == "tool"
183      assert input_msgs[2]["parts"][0]["type"] == "tool_call_response"
184      assert input_msgs[2]["parts"][0]["id"] == "call_123"
185      assert input_msgs[2]["parts"][0]["result"] == "Sunny"
186      system_instructions = json.loads(chat_span.attributes["gen_ai.system_instructions"])
187      assert system_instructions == [{"type": "text", "content": "Be helpful"}]
188  
189      output_msgs = json.loads(chat_span.attributes["gen_ai.output.messages"])
190      assert len(output_msgs) == 1
191      assert output_msgs[0]["role"] == "assistant"
192  
193      assert chat_span.attributes["gen_ai.response.model"] == MODEL
194      assert not any(k.startswith("mlflow.") for k in chat_span.attributes)
195  
196  
197  @pytest.mark.parametrize(
198      "api",
199      ["chat_completions", pytest.param("responses", marks=requires_responses_api)],
200  )
201  @pytest.mark.usefixtures("reset_autolog_state")
202  def test_autolog_streaming(client, capture_otel_export, api):
203      exporter, processor = capture_otel_export
204  
205      mlflow.openai.autolog()
206      if api == "chat_completions":
207          stream = client.chat.completions.create(
208              messages=[{"role": "user", "content": "Hi"}],
209              model=MODEL,
210              stream=True,
211          )
212          for _ in stream:
213              pass
214      else:
215          stream = client.responses.create(input="Hi", model=MODEL, stream=True)
216          for _ in stream:
217              pass
218  
219      chat_span = _get_chat_span(exporter, processor)
220      assert chat_span.attributes["gen_ai.operation.name"] == "chat"
221      assert chat_span.attributes["gen_ai.request.model"] == MODEL
222  
223      input_msgs = json.loads(chat_span.attributes["gen_ai.input.messages"])
224      assert input_msgs[0]["role"] == "user"
225      assert input_msgs[0]["parts"][0]["type"] == "text"
226      assert input_msgs[0]["parts"][0]["content"] == "Hi"
227  
228      output_msgs = json.loads(chat_span.attributes["gen_ai.output.messages"])
229      assert len(output_msgs) == 1
230      assert output_msgs[0]["role"] == "assistant"
231  
232      assert chat_span.attributes["gen_ai.response.model"] == MODEL
233      assert not any(k.startswith("mlflow.") for k in chat_span.attributes)
234  
235  
236  @pytest.mark.parametrize(
237      ("content_item", "expected"),
238      [
239          # Chat API: image_url with HTTP URL → UriPart
240          (
241              {"type": "image_url", "image_url": {"url": "https://example.com/img.png"}},
242              {"type": "uri", "modality": "image", "uri": "https://example.com/img.png"},
243          ),
244          # Chat API: image_url with data URI → BlobPart
245          (
246              {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,abc123"}},
247              {"type": "blob", "modality": "image", "mime_type": "image/jpeg", "content": "abc123"},
248          ),
249          # Responses API: input_image with HTTP URL → UriPart
250          (
251              {"type": "input_image", "image_url": "https://example.com/img.png"},
252              {"type": "uri", "modality": "image", "uri": "https://example.com/img.png"},
253          ),
254          # Responses API: input_image with data URI → BlobPart
255          (
256              {"type": "input_image", "image_url": "data:image/png;base64,xyz789"},
257              {"type": "blob", "modality": "image", "mime_type": "image/png", "content": "xyz789"},
258          ),
259          # Chat API: input_audio → BlobPart
260          (
261              {"type": "input_audio", "input_audio": {"data": "audiodata", "format": "wav"}},
262              {"type": "blob", "modality": "audio", "mime_type": "audio/wav", "content": "audiodata"},
263          ),
264          # Responses API: input_text → TextPart
265          (
266              {"type": "input_text", "text": "hello"},
267              {"type": "text", "content": "hello"},
268          ),
269      ],
270  )
271  def test_convert_content_multimodal(content_item, expected):
272      result = _convert_content([content_item])
273      assert result == [expected]
274  
275  
276  def test_convert_message_audio_transcript_fallback():
277      msg = {
278          "role": "assistant",
279          "content": None,
280          "audio": {
281              "id": "audio_abc123",
282              "data": "SGVsbG8=",
283              "expires_at": 9999999999,
284              "transcript": "Yes, I am.",
285          },
286      }
287      result = _convert_message(msg)
288      assert result == {
289          "role": "assistant",
290          "parts": [{"type": "text", "content": "Yes, I am."}],
291      }
292  
293  
294  def test_convert_message_audio_no_override():
295      msg = {
296          "role": "assistant",
297          "content": "I have text.",
298          "audio": {
299              "id": "audio_abc123",
300              "data": "SGVsbG8=",
301              "expires_at": 9999999999,
302              "transcript": "Different transcript.",
303          },
304      }
305      result = _convert_message(msg)
306      assert result == {
307          "role": "assistant",
308          "parts": [{"type": "text", "content": "I have text."}],
309      }
310  
311  
312  def test_convert_message_no_audio_no_content():
313      msg = {"role": "assistant", "content": None}
314      result = _convert_message(msg)
315      assert result == {"role": "assistant", "parts": []}