/ tests / bedrock / test_genai_semconv_converter.py
test_genai_semconv_converter.py
  1  import base64
  2  import json
  3  from pathlib import Path
  4  from unittest import mock
  5  
  6  import boto3
  7  import pytest
  8  
  9  import mlflow
 10  from mlflow.bedrock.genai_semconv_converter import _convert_image
 11  from mlflow.tracing.constant import GenAiSemconvKey
 12  
 13  from tests.tracing.helper import capture_otel_export, reset_autolog_state  # noqa: F401
 14  
 15  _ANTHROPIC_MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"
 16  
 17  
 18  @pytest.fixture(autouse=True)
 19  def enable_genai_semconv(monkeypatch):
 20      monkeypatch.setenv("MLFLOW_ENABLE_OTEL_GENAI_SEMCONV", "true")
 21      return
 22  
 23  
 24  def _get_chat_span(exporter, processor):
 25      processor.force_flush(timeout_millis=5000)
 26      spans = exporter.get_finished_spans()
 27      return next(s for s in spans if s.attributes.get(GenAiSemconvKey.OPERATION_NAME) == "chat")
 28  
 29  
 30  # ── End-to-end autolog integration tests ─────────────────────────────────────
 31  
 32  
 33  _CONVERSE_REQUEST = {
 34      "modelId": _ANTHROPIC_MODEL_ID,
 35      "messages": [{"role": "user", "content": [{"text": "Hi"}]}],
 36      "inferenceConfig": {"maxTokens": 300, "temperature": 0.1, "topP": 0.9},
 37  }
 38  
 39  _CONVERSE_RESPONSE = {
 40      "output": {
 41          "message": {"role": "assistant", "content": [{"text": "Hello! How can I help you today?"}]},
 42      },
 43      "stopReason": "end_turn",
 44      "usage": {"inputTokens": 8, "outputTokens": 12},
 45      "metrics": {"latencyMs": 551},
 46  }
 47  
 48  _CONVERSE_TOOL_CALLING_REQUEST = {
 49      "modelId": _ANTHROPIC_MODEL_ID,
 50      "messages": [
 51          {"role": "user", "content": [{"text": "What's the weather?"}]},
 52          {
 53              "role": "assistant",
 54              "content": [
 55                  {"text": "Let me check."},
 56                  {
 57                      "toolUse": {
 58                          "toolUseId": "tool_1",
 59                          "name": "get_weather",
 60                          "input": {"city": "SF"},
 61                      }
 62                  },
 63              ],
 64          },
 65          {
 66              "role": "user",
 67              "content": [
 68                  {"toolResult": {"toolUseId": "tool_1", "content": [{"json": {"temp": 72}}]}}
 69              ],
 70          },
 71      ],
 72      "toolConfig": {
 73          "tools": [
 74              {
 75                  "toolSpec": {
 76                      "name": "get_weather",
 77                      "description": "Get weather",
 78                      "inputSchema": {
 79                          "json": {"type": "object", "properties": {"city": {"type": "string"}}}
 80                      },
 81                  }
 82              }
 83          ]
 84      },
 85  }
 86  
 87  _CONVERSE_TOOL_CALLING_RESPONSE = {
 88      "output": {
 89          "message": {"role": "assistant", "content": [{"text": "It's 72°F in SF."}]},
 90      },
 91      "stopReason": "end_turn",
 92      "usage": {"inputTokens": 20, "outputTokens": 15},
 93  }
 94  
 95  
 96  @pytest.mark.usefixtures("reset_autolog_state")
 97  def test_autolog_basic(capture_otel_export):
 98      exporter, processor = capture_otel_export
 99  
100      mlflow.bedrock.autolog()
101      client = boto3.client("bedrock-runtime", region_name="us-west-2")
102  
103      with mock.patch("botocore.client.BaseClient._make_api_call", return_value=_CONVERSE_RESPONSE):
104          client.converse(**_CONVERSE_REQUEST)
105  
106      chat_span = _get_chat_span(exporter, processor)
107      assert chat_span.attributes[GenAiSemconvKey.OPERATION_NAME] == "chat"
108      assert chat_span.attributes[GenAiSemconvKey.REQUEST_MODEL] == _ANTHROPIC_MODEL_ID
109      assert chat_span.attributes[GenAiSemconvKey.REQUEST_TEMPERATURE] == 0.1
110      assert chat_span.attributes[GenAiSemconvKey.REQUEST_MAX_TOKENS] == 300
111      assert chat_span.attributes[GenAiSemconvKey.REQUEST_TOP_P] == 0.9
112  
113      input_msgs = json.loads(chat_span.attributes[GenAiSemconvKey.INPUT_MESSAGES])
114      assert input_msgs[0]["role"] == "user"
115      assert input_msgs[0]["parts"][0] == {"type": "text", "content": "Hi"}
116  
117      output_msgs = json.loads(chat_span.attributes[GenAiSemconvKey.OUTPUT_MESSAGES])
118      assert len(output_msgs) == 1
119      assert output_msgs[0]["role"] == "assistant"
120      assert output_msgs[0]["parts"][0] == {
121          "type": "text",
122          "content": "Hello! How can I help you today?",
123      }
124      assert chat_span.attributes[GenAiSemconvKey.USAGE_INPUT_TOKENS] == 8
125      assert chat_span.attributes[GenAiSemconvKey.USAGE_OUTPUT_TOKENS] == 12
126      assert not any(k.startswith("mlflow.") for k in chat_span.attributes)
127  
128  
129  @pytest.mark.usefixtures("reset_autolog_state")
130  def test_autolog_with_tool_calls(capture_otel_export):
131      exporter, processor = capture_otel_export
132  
133      mlflow.bedrock.autolog()
134      client = boto3.client("bedrock-runtime", region_name="us-west-2")
135  
136      with mock.patch(
137          "botocore.client.BaseClient._make_api_call",
138          return_value=_CONVERSE_TOOL_CALLING_RESPONSE,
139      ):
140          client.converse(**_CONVERSE_TOOL_CALLING_REQUEST)
141  
142      chat_span = _get_chat_span(exporter, processor)
143  
144      input_msgs = json.loads(chat_span.attributes[GenAiSemconvKey.INPUT_MESSAGES])
145      assert len(input_msgs) == 3
146      assert input_msgs[0]["role"] == "user"
147      assert input_msgs[0]["parts"][0]["content"] == "What's the weather?"
148  
149      assert input_msgs[1]["role"] == "assistant"
150      assert input_msgs[1]["parts"][0] == {"type": "text", "content": "Let me check."}
151      assert input_msgs[1]["parts"][1] == {
152          "type": "tool_call",
153          "id": "tool_1",
154          "name": "get_weather",
155          "arguments": {"city": "SF"},
156      }
157  
158      assert input_msgs[2]["role"] == "tool"
159      assert input_msgs[2]["parts"][0] == {
160          "type": "tool_call_response",
161          "id": "tool_1",
162          "result": '{"temp": 72}',
163      }
164  
165      tool_defs = json.loads(chat_span.attributes[GenAiSemconvKey.TOOL_DEFINITIONS])
166      assert len(tool_defs) == 1
167      assert tool_defs[0]["name"] == "get_weather"
168      assert "function" not in tool_defs[0]
169  
170      assert not any(k.startswith("mlflow.") for k in chat_span.attributes)
171  
172  
173  @pytest.mark.usefixtures("reset_autolog_state")
174  def test_autolog_with_system_instructions(capture_otel_export):
175      exporter, processor = capture_otel_export
176  
177      mlflow.bedrock.autolog()
178      client = boto3.client("bedrock-runtime", region_name="us-west-2")
179  
180      request = {**_CONVERSE_REQUEST, "system": [{"text": "You are a helpful assistant."}]}
181  
182      with mock.patch("botocore.client.BaseClient._make_api_call", return_value=_CONVERSE_RESPONSE):
183          client.converse(**request)
184  
185      chat_span = _get_chat_span(exporter, processor)
186      system = json.loads(chat_span.attributes[GenAiSemconvKey.SYSTEM_INSTRUCTIONS])
187      assert system == [{"type": "text", "content": "You are a helpful assistant."}]
188  
189  
190  @pytest.mark.usefixtures("reset_autolog_state")
191  def test_autolog_with_image(capture_otel_export):
192      exporter, processor = capture_otel_export
193  
194      mlflow.bedrock.autolog()
195      client = boto3.client("bedrock-runtime", region_name="us-west-2")
196  
197      image_path = Path(__file__).parent.parent / "resources" / "images" / "test.png"
198      with open(image_path, "rb") as f:
199          image_bytes = f.read()
200  
201      request = {
202          "modelId": _ANTHROPIC_MODEL_ID,
203          "messages": [
204              {
205                  "role": "user",
206                  "content": [
207                      {"text": "What text is in this image?"},
208                      {"image": {"format": "png", "source": {"bytes": image_bytes}}},
209                  ],
210              }
211          ],
212      }
213      response = {
214          "output": {"message": {"role": "assistant", "content": [{"text": "MLflow"}]}},
215          "stopReason": "end_turn",
216          "usage": {"inputTokens": 100, "outputTokens": 2},
217      }
218  
219      with mock.patch("botocore.client.BaseClient._make_api_call", return_value=response):
220          client.converse(**request)
221  
222      chat_span = _get_chat_span(exporter, processor)
223      input_msgs = json.loads(chat_span.attributes[GenAiSemconvKey.INPUT_MESSAGES])
224      assert input_msgs[0]["parts"][0] == {"type": "text", "content": "What text is in this image?"}
225      image_part = input_msgs[0]["parts"][1]
226      assert image_part["type"] == "blob"
227      assert image_part["modality"] == "image"
228      assert image_part["mime_type"] == "image/png"
229      assert image_part["content"]
230  
231  
232  # ── Multimodal content conversion tests ──────────────────────────────────────
233  
234  
235  @pytest.mark.parametrize(
236      ("image", "expected"),
237      [
238          # Raw bytes
239          (
240              {"format": "jpeg", "source": {"bytes": b"\xff\xd8"}},
241              {
242                  "type": "blob",
243                  "modality": "image",
244                  "mime_type": "image/jpeg",
245                  "content": base64.b64encode(b"\xff\xd8").decode("utf-8"),
246              },
247          ),
248          # Base64 string passthrough
249          (
250              {"format": "png", "source": {"bytes": "abc123"}},
251              {"type": "blob", "modality": "image", "mime_type": "image/png", "content": "abc123"},
252          ),
253      ],
254  )
255  def test_convert_image(image, expected):
256      assert _convert_image(image) == expected