test_openai_model_export.py
1 import importlib 2 import json 3 from unittest import mock 4 5 import numpy as np 6 import openai 7 import pandas as pd 8 import pytest 9 import yaml 10 from pyspark.sql import SparkSession 11 12 import mlflow 13 import mlflow.pyfunc.scoring_server as pyfunc_scoring_server 14 from mlflow.models.signature import ModelSignature 15 from mlflow.models.utils import load_serving_example 16 from mlflow.types.schema import ColSpec, ParamSchema, ParamSpec, Schema, TensorSpec 17 18 from tests.helper_functions import pyfunc_serve_and_score_model 19 from tests.openai.conftest import is_v1 20 21 22 @pytest.fixture(scope="module") 23 def spark(): 24 with SparkSession.builder.master("local[*]").getOrCreate() as s: 25 yield s 26 27 28 def chat_completions(): 29 return openai.chat.completions if is_v1 else openai.ChatCompletion 30 31 32 def completions(): 33 return openai.completions if is_v1 else openai.Completion 34 35 36 def embeddings(): 37 return openai.embeddings if is_v1 else openai.Embedding 38 39 40 @pytest.fixture(autouse=True) 41 def set_envs(monkeypatch, mock_openai): 42 monkeypatch.setenv("MLFLOW_TESTING", "true") 43 monkeypatch.setenv("OPENAI_API_KEY", "test") 44 monkeypatch.setenv("OPENAI_API_BASE", mock_openai) 45 if is_v1: 46 openai.base_url = mock_openai 47 else: 48 importlib.reload(openai) 49 50 51 def test_log_model(): 52 with mlflow.start_run(): 53 model_info = mlflow.openai.log_model( 54 "gpt-4o-mini", 55 "chat.completions", 56 name="model", 57 temperature=0.9, 58 messages=[{"role": "system", "content": "You are an MLflow expert."}], 59 ) 60 61 loaded_model = mlflow.openai.load_model(model_info.model_uri) 62 assert loaded_model["model"] == "gpt-4o-mini" 63 assert loaded_model["task"] == "chat.completions" 64 assert loaded_model["temperature"] == 0.9 65 assert loaded_model["messages"] == [{"role": "system", "content": "You are an MLflow expert."}] 66 67 68 def test_chat_single_variable(tmp_path): 69 mlflow.openai.save_model( 70 model="gpt-4o-mini", 71 task=chat_completions(), 72 path=tmp_path, 73 messages=[{"role": "user", "content": "{x}"}], 74 ) 75 76 model = mlflow.pyfunc.load_model(tmp_path) 77 data = pd.DataFrame({ 78 "x": [ 79 "a", 80 "b", 81 ] 82 }) 83 expected_output = [ 84 [{"content": "a", "role": "user"}], 85 [{"content": "b", "role": "user"}], 86 ] 87 assert list(map(json.loads, model.predict(data))) == expected_output 88 89 data = [ 90 {"x": "a"}, 91 {"x": "b"}, 92 ] 93 assert list(map(json.loads, model.predict(data))) == expected_output 94 95 data = [ 96 "a", 97 "b", 98 ] 99 assert list(map(json.loads, model.predict(data))) == expected_output 100 101 102 def test_completion_single_variable(tmp_path): 103 mlflow.openai.save_model( 104 model="text-davinci-003", 105 task=completions(), 106 path=tmp_path, 107 prompt="Say {text}", 108 ) 109 110 model = mlflow.pyfunc.load_model(tmp_path) 111 data = pd.DataFrame({ 112 "x": [ 113 "this is a test", 114 "this is another test", 115 ] 116 }) 117 expected_output = ["Say this is a test", "Say this is another test"] 118 assert model.predict(data) == expected_output 119 120 data = [ 121 {"x": "this is a test"}, 122 {"x": "this is another test"}, 123 ] 124 assert model.predict(data) == expected_output 125 126 data = [ 127 "this is a test", 128 "this is another test", 129 ] 130 assert model.predict(data) == expected_output 131 132 133 def test_chat_multiple_variables(tmp_path): 134 mlflow.openai.save_model( 135 model="gpt-4o-mini", 136 task=chat_completions(), 137 path=tmp_path, 138 messages=[{"role": "user", "content": "{x} {y}"}], 139 ) 140 model = mlflow.models.Model.load(tmp_path) 141 assert model.signature.inputs.to_dict() == [ 142 {"name": "x", "type": "string", "required": True}, 143 {"name": "y", "type": "string", "required": True}, 144 ] 145 assert model.signature.outputs.to_dict() == [ 146 {"type": "string", "required": True}, 147 ] 148 149 model = mlflow.pyfunc.load_model(tmp_path) 150 data = pd.DataFrame({ 151 "x": [ 152 "a", 153 "b", 154 ], 155 "y": [ 156 "c", 157 "d", 158 ], 159 }) 160 expected_output = [ 161 [{"content": "a c", "role": "user"}], 162 [{"content": "b d", "role": "user"}], 163 ] 164 assert list(map(json.loads, model.predict(data))) == expected_output 165 166 data = [ 167 {"x": "a", "y": "c"}, 168 {"x": "b", "y": "d"}, 169 ] 170 assert list(map(json.loads, model.predict(data))) == expected_output 171 172 173 def test_chat_role_content(tmp_path): 174 mlflow.openai.save_model( 175 model="gpt-4o-mini", 176 task=chat_completions(), 177 path=tmp_path, 178 messages=[{"role": "{role}", "content": "{content}"}], 179 ) 180 model = mlflow.models.Model.load(tmp_path) 181 assert model.signature.inputs.to_dict() == [ 182 {"name": "content", "type": "string", "required": True}, 183 {"name": "role", "type": "string", "required": True}, 184 ] 185 assert model.signature.outputs.to_dict() == [ 186 {"type": "string", "required": True}, 187 ] 188 189 model = mlflow.pyfunc.load_model(tmp_path) 190 data = pd.DataFrame({ 191 "role": [ 192 "system", 193 "user", 194 ], 195 "content": [ 196 "c", 197 "d", 198 ], 199 }) 200 expected_output = [ 201 [{"content": "c", "role": "system"}], 202 [{"content": "d", "role": "user"}], 203 ] 204 assert list(map(json.loads, model.predict(data))) == expected_output 205 206 207 def test_completion_multiple_variables(tmp_path): 208 mlflow.openai.save_model( 209 model="text-davinci-003", 210 task=completions(), 211 path=tmp_path, 212 prompt="Say {x} and {y}", 213 ) 214 model = mlflow.models.Model.load(tmp_path) 215 assert model.signature.inputs.to_dict() == [ 216 {"name": "x", "type": "string", "required": True}, 217 {"name": "y", "type": "string", "required": True}, 218 ] 219 assert model.signature.outputs.to_dict() == [ 220 {"type": "string", "required": True}, 221 ] 222 223 model = mlflow.pyfunc.load_model(tmp_path) 224 data = pd.DataFrame({ 225 "x": [ 226 "a", 227 "b", 228 ], 229 "y": [ 230 "c", 231 "d", 232 ], 233 }) 234 expected_output = ["Say a and c", "Say b and d"] 235 assert model.predict(data) == expected_output 236 237 data = [ 238 {"x": "a", "y": "c"}, 239 {"x": "b", "y": "d"}, 240 ] 241 assert model.predict(data) == expected_output 242 243 244 def test_chat_multiple_messages(tmp_path): 245 mlflow.openai.save_model( 246 model="gpt-4o-mini", 247 task=chat_completions(), 248 path=tmp_path, 249 messages=[ 250 {"role": "user", "content": "{x}"}, 251 {"role": "user", "content": "{y}"}, 252 ], 253 ) 254 model = mlflow.models.Model.load(tmp_path) 255 assert model.signature.inputs.to_dict() == [ 256 {"name": "x", "type": "string", "required": True}, 257 {"name": "y", "type": "string", "required": True}, 258 ] 259 assert model.signature.outputs.to_dict() == [ 260 {"type": "string", "required": True}, 261 ] 262 263 model = mlflow.pyfunc.load_model(tmp_path) 264 data = pd.DataFrame({ 265 "x": [ 266 "a", 267 "b", 268 ], 269 "y": [ 270 "c", 271 "d", 272 ], 273 }) 274 expected_output = [ 275 [{"content": "a", "role": "user"}, {"content": "c", "role": "user"}], 276 [{"content": "b", "role": "user"}, {"content": "d", "role": "user"}], 277 ] 278 assert list(map(json.loads, model.predict(data))) == expected_output 279 280 data = [ 281 {"x": "a", "y": "c"}, 282 {"x": "b", "y": "d"}, 283 ] 284 assert list(map(json.loads, model.predict(data))) == expected_output 285 286 287 def test_chat_no_variables(tmp_path): 288 mlflow.openai.save_model( 289 model="gpt-4o-mini", 290 task=chat_completions(), 291 path=tmp_path, 292 messages=[{"role": "user", "content": "a"}], 293 ) 294 model = mlflow.models.Model.load(tmp_path) 295 assert model.signature.inputs.to_dict() == [ 296 {"type": "string", "required": True}, 297 ] 298 assert model.signature.outputs.to_dict() == [ 299 {"type": "string", "required": True}, 300 ] 301 302 model = mlflow.pyfunc.load_model(tmp_path) 303 data = pd.DataFrame({ 304 "content": ["b", "c"], 305 }) 306 expected_output = [ 307 [{"content": "a", "role": "user"}, {"content": "b", "role": "user"}], 308 [{"content": "a", "role": "user"}, {"content": "c", "role": "user"}], 309 ] 310 assert list(map(json.loads, model.predict(data))) == expected_output 311 312 data = [ 313 {"content": "b"}, 314 {"content": "c"}, 315 ] 316 assert list(map(json.loads, model.predict(data))) == expected_output 317 318 data = [ 319 "b", 320 "c", 321 ] 322 assert list(map(json.loads, model.predict(data))) == expected_output 323 324 325 def test_completion_no_variable(tmp_path): 326 mlflow.openai.save_model( 327 model="text-davinci-003", 328 task=completions(), 329 path=tmp_path, 330 ) 331 332 model = mlflow.pyfunc.load_model(tmp_path) 333 data = pd.DataFrame({ 334 "x": [ 335 "this is a test", 336 "this is another test", 337 ] 338 }) 339 expected_output = ["this is a test", "this is another test"] 340 assert model.predict(data) == expected_output 341 342 data = [ 343 {"x": "this is a test"}, 344 {"x": "this is another test"}, 345 ] 346 assert model.predict(data) == expected_output 347 348 data = [ 349 "this is a test", 350 "this is another test", 351 ] 352 assert model.predict(data) == expected_output 353 354 355 def test_chat_no_messages(tmp_path): 356 mlflow.openai.save_model( 357 model="gpt-4o-mini", 358 task=chat_completions(), 359 path=tmp_path, 360 ) 361 model = mlflow.models.Model.load(tmp_path) 362 assert model.signature.inputs.to_dict() == [ 363 {"type": "string", "required": True}, 364 ] 365 assert model.signature.outputs.to_dict() == [ 366 {"type": "string", "required": True}, 367 ] 368 369 model = mlflow.pyfunc.load_model(tmp_path) 370 data = pd.DataFrame({ 371 "content": ["b", "c"], 372 }) 373 expected_output = [ 374 [{"content": "b", "role": "user"}], 375 [{"content": "c", "role": "user"}], 376 ] 377 assert list(map(json.loads, model.predict(data))) == expected_output 378 379 data = [ 380 {"content": "b"}, 381 {"content": "c"}, 382 ] 383 assert list(map(json.loads, model.predict(data))) == expected_output 384 385 data = [ 386 "b", 387 "c", 388 ] 389 assert list(map(json.loads, model.predict(data))) == expected_output 390 391 392 @pytest.mark.parametrize( 393 "messages", 394 [ 395 ["a", "b"], 396 [{"k": "v"}], 397 ], 398 ) 399 def test_invalid_messages(tmp_path, messages): 400 with pytest.raises( 401 mlflow.MlflowException, 402 match="it must be a list of dictionaries with keys 'role' and 'content'", 403 ): 404 mlflow.openai.save_model( 405 model="gpt-4o-mini", 406 task=chat_completions(), 407 path=tmp_path, 408 messages=messages, 409 ) 410 411 412 def test_task_argument_accepts_class(tmp_path): 413 mlflow.openai.save_model(model="gpt-4o-mini", task=chat_completions(), path=tmp_path) 414 loaded_model = mlflow.openai.load_model(tmp_path) 415 assert loaded_model["task"] == "chat.completions" 416 417 418 @pytest.mark.skipif(is_v1, reason="Requires OpenAI SDK v0") 419 def test_model_argument_accepts_retrieved_model(tmp_path): 420 model = openai.Model.retrieve("gpt-4o-mini") 421 mlflow.openai.save_model(model=model, task=chat_completions(), path=tmp_path) 422 loaded_model = mlflow.openai.load_model(tmp_path) 423 assert loaded_model["model"] == "gpt-4o-mini" 424 425 426 def test_save_model_with_secret_scope(tmp_path, monkeypatch): 427 scope = "test" 428 monkeypatch.setenv("MLFLOW_OPENAI_SECRET_SCOPE", scope) 429 with ( 430 mock.patch("mlflow.openai.model.is_in_databricks_runtime", return_value=True), 431 mock.patch("mlflow.openai.model.check_databricks_secret_scope_access"), 432 ): 433 with pytest.warns(FutureWarning, match="MLFLOW_OPENAI_SECRET_SCOPE.+deprecated"): 434 mlflow.openai.save_model(model="gpt-4o-mini", task="chat.completions", path=tmp_path) 435 with tmp_path.joinpath("openai.yaml").open() as f: 436 creds = yaml.safe_load(f) 437 assert creds == { 438 "OPENAI_API_TYPE": f"{scope}:openai_api_type", 439 "OPENAI_API_KEY": f"{scope}:openai_api_key", 440 "OPENAI_API_KEY_PATH": f"{scope}:openai_api_key_path", 441 "OPENAI_API_BASE": f"{scope}:openai_api_base", 442 "OPENAI_BASE_URL": f"{scope}:openai_base_url", 443 "OPENAI_ORGANIZATION": f"{scope}:openai_organization", 444 "OPENAI_API_VERSION": f"{scope}:openai_api_version", 445 "OPENAI_DEPLOYMENT_NAME": f"{scope}:openai_deployment_name", 446 "OPENAI_ENGINE": f"{scope}:openai_engine", 447 } 448 449 450 def test_spark_udf_chat(tmp_path, spark): 451 mlflow.openai.save_model( 452 model="gpt-4o-mini", 453 task="chat.completions", 454 path=tmp_path, 455 messages=[ 456 {"role": "user", "content": "{x} {y}"}, 457 ], 458 ) 459 udf = mlflow.pyfunc.spark_udf(spark, tmp_path, result_type="string") 460 df = spark.createDataFrame( 461 [ 462 ("a", "b"), 463 ("c", "d"), 464 ], 465 ["x", "y"], 466 ) 467 df = df.withColumn("z", udf()) 468 pdf = df.toPandas() 469 assert list(map(json.loads, pdf["z"])) == [ 470 [{"content": "a b", "role": "user"}], 471 [{"content": "c d", "role": "user"}], 472 ] 473 474 475 class ChatCompletionModel(mlflow.pyfunc.PythonModel): 476 def predict(self, context, model_input, params=None): 477 completion = chat_completions.create( 478 model="gpt-4o-mini", 479 messages=[{"role": "user", "content": "What is MLflow?"}], 480 ) 481 return completion.choices[0].message.content 482 483 484 def test_embeddings(tmp_path): 485 mlflow.openai.save_model( 486 model="text-embedding-ada-002", 487 task=embeddings(), 488 path=tmp_path, 489 ) 490 491 model = mlflow.models.Model.load(tmp_path) 492 assert model.signature.inputs.to_dict() == [{"type": "string", "required": True}] 493 assert model.signature.outputs.to_dict() == [ 494 {"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": (-1,)}} 495 ] 496 497 model = mlflow.pyfunc.load_model(tmp_path) 498 data = pd.DataFrame({"text": ["a", "b"]}) 499 preds = model.predict(data) 500 assert list(map(len, preds)) == [1536, 1536] 501 502 data = pd.DataFrame({"text": ["a"] * 100}) 503 preds = model.predict(data) 504 assert list(map(len, preds)) == [1536] * 100 505 506 507 def test_embeddings_batch_size_azure(tmp_path, monkeypatch): 508 monkeypatch.setenv("OPENAI_API_TYPE", "azure") 509 monkeypatch.setenv("OPENAI_ENGINE", "test_engine") 510 mlflow.openai.save_model( 511 model="text-embedding-ada-002", 512 task=embeddings(), 513 path=tmp_path, 514 ) 515 model = mlflow.pyfunc.load_model(tmp_path) 516 517 assert model._model_impl.api_config.batch_size == 16 518 519 520 def test_embeddings_pyfunc_server_and_score(): 521 df = pd.DataFrame({"text": ["a", "b"]}) 522 with mlflow.start_run(): 523 model_info = mlflow.openai.log_model( 524 "text-embedding-ada-002", 525 embeddings(), 526 name="model", 527 input_example=df, 528 ) 529 inference_payload = load_serving_example(model_info.model_uri) 530 resp = pyfunc_serve_and_score_model( 531 model_info.model_uri, 532 data=inference_payload, 533 content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON, 534 extra_args=["--env-manager", "local"], 535 ) 536 expected = mlflow.pyfunc.load_model(model_info.model_uri).predict(df) 537 actual = pd.DataFrame(data=json.loads(resp.content.decode("utf-8"))) 538 pd.testing.assert_frame_equal(actual, pd.DataFrame({"predictions": expected})) 539 540 541 def test_spark_udf_embeddings(tmp_path, spark): 542 mlflow.openai.save_model( 543 model="text-embedding-ada-002", 544 task=embeddings(), 545 path=tmp_path, 546 ) 547 udf = mlflow.pyfunc.spark_udf(spark, tmp_path, result_type="array<double>") 548 df = spark.createDataFrame( 549 [ 550 ("a",), 551 ("b",), 552 ], 553 ["x"], 554 ) 555 df = df.withColumn("z", udf("x")).toPandas() 556 assert list(map(len, df["z"])) == [1536, 1536] 557 558 559 def test_inference_params(tmp_path): 560 mlflow.openai.save_model( 561 model="text-embedding-ada-002", 562 task=embeddings(), 563 path=tmp_path, 564 signature=ModelSignature( 565 inputs=Schema([ColSpec(type="string", name=None)]), 566 outputs=Schema([TensorSpec(type=np.dtype("float64"), shape=(-1,))]), 567 params=ParamSchema([ParamSpec(name="batch_size", dtype="long", default=16)]), 568 ), 569 ) 570 571 model_info = mlflow.models.Model.load(tmp_path) 572 assert ( 573 len([p for p in model_info.signature.params if p.name == "batch_size" and p.default == 16]) 574 == 1 575 ) 576 577 model = mlflow.pyfunc.load_model(tmp_path) 578 data = pd.DataFrame({"text": ["a", "b"]}) 579 preds = model.predict(data, params={"batch_size": 5}) 580 assert list(map(len, preds)) == [1536, 1536] 581 582 583 def test_inference_params_overlap(tmp_path): 584 with pytest.raises(mlflow.MlflowException, match=r"any of \['prefix'\] as parameters"): 585 mlflow.openai.save_model( 586 model="text-davinci-003", 587 task=completions(), 588 path=tmp_path, 589 prefix="Classify the following text's sentiment:", 590 signature=ModelSignature( 591 inputs=Schema([ColSpec(type="string", name=None)]), 592 outputs=Schema([ColSpec(type="string", name=None)]), 593 params=ParamSchema([ParamSpec(name="prefix", default=None, dtype="string")]), 594 ), 595 ) 596 597 598 def test_multimodal_messages(tmp_path): 599 # Test multimodal content with variable placeholders 600 mlflow.openai.save_model( 601 model="gpt-4o-mini", 602 task=chat_completions(), 603 path=tmp_path, 604 messages=[ 605 { 606 "role": "user", 607 "content": [ 608 {"type": "text", "text": "{system_prompt}"}, 609 { 610 "type": "image_url", 611 "image_url": { 612 "url": "data:image/jpeg;base64,{image_base64}", 613 "detail": "low", 614 }, 615 }, 616 ], 617 } 618 ], 619 ) 620 621 model = mlflow.models.Model.load(tmp_path) 622 assert model.signature.inputs.to_dict() == [ 623 {"name": "image_base64", "type": "string", "required": True}, 624 {"name": "system_prompt", "type": "string", "required": True}, 625 ] 626 assert model.signature.outputs.to_dict() == [ 627 {"type": "string", "required": True}, 628 ] 629 630 model = mlflow.pyfunc.load_model(tmp_path) 631 data = pd.DataFrame({ 632 "system_prompt": ["Analyze this image"], 633 "image_base64": [ 634 "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==" 635 ], 636 }) 637 638 expected_output = [ 639 [ 640 { 641 "content": [ 642 {"type": "text", "text": "Analyze this image"}, 643 { 644 "type": "image_url", 645 "image_url": { 646 "url": ( 647 "data:image/jpeg;base64," 648 "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==" 649 ), 650 "detail": "low", 651 }, 652 }, 653 ], 654 "role": "user", 655 } 656 ] 657 ] 658 659 assert list(map(json.loads, model.predict(data))) == expected_output 660 661 662 def test_multimodal_messages_no_variables(tmp_path): 663 mlflow.openai.save_model( 664 model="gpt-4o-mini", 665 task=chat_completions(), 666 path=tmp_path, 667 messages=[ 668 { 669 "role": "user", 670 "content": [ 671 {"type": "text", "text": "What's in this image?"}, 672 { 673 "type": "image_url", 674 "image_url": {"url": "data:image/jpeg;base64,abc123", "detail": "low"}, 675 }, 676 ], 677 } 678 ], 679 ) 680 681 model = mlflow.models.Model.load(tmp_path) 682 # Should add default content variable since no variables found 683 assert model.signature.inputs.to_dict() == [ 684 {"type": "string", "required": True}, 685 ] 686 687 model = mlflow.pyfunc.load_model(tmp_path) 688 data = pd.DataFrame({"content": ["Additional context"]}) 689 690 expected_output = [ 691 [ 692 { 693 "content": [ 694 {"type": "text", "text": "What's in this image?"}, 695 { 696 "type": "image_url", 697 "image_url": {"url": "data:image/jpeg;base64,abc123", "detail": "low"}, 698 }, 699 ], 700 "role": "user", 701 }, 702 {"content": "Additional context", "role": "user"}, 703 ] 704 ] 705 706 assert list(map(json.loads, model.predict(data))) == expected_output