/ tests / openai / test_openai_model_export.py
test_openai_model_export.py
  1  import importlib
  2  import json
  3  from unittest import mock
  4  
  5  import numpy as np
  6  import openai
  7  import pandas as pd
  8  import pytest
  9  import yaml
 10  from pyspark.sql import SparkSession
 11  
 12  import mlflow
 13  import mlflow.pyfunc.scoring_server as pyfunc_scoring_server
 14  from mlflow.models.signature import ModelSignature
 15  from mlflow.models.utils import load_serving_example
 16  from mlflow.types.schema import ColSpec, ParamSchema, ParamSpec, Schema, TensorSpec
 17  
 18  from tests.helper_functions import pyfunc_serve_and_score_model
 19  from tests.openai.conftest import is_v1
 20  
 21  
 22  @pytest.fixture(scope="module")
 23  def spark():
 24      with SparkSession.builder.master("local[*]").getOrCreate() as s:
 25          yield s
 26  
 27  
 28  def chat_completions():
 29      return openai.chat.completions if is_v1 else openai.ChatCompletion
 30  
 31  
 32  def completions():
 33      return openai.completions if is_v1 else openai.Completion
 34  
 35  
 36  def embeddings():
 37      return openai.embeddings if is_v1 else openai.Embedding
 38  
 39  
 40  @pytest.fixture(autouse=True)
 41  def set_envs(monkeypatch, mock_openai):
 42      monkeypatch.setenv("MLFLOW_TESTING", "true")
 43      monkeypatch.setenv("OPENAI_API_KEY", "test")
 44      monkeypatch.setenv("OPENAI_API_BASE", mock_openai)
 45      if is_v1:
 46          openai.base_url = mock_openai
 47      else:
 48          importlib.reload(openai)
 49  
 50  
 51  def test_log_model():
 52      with mlflow.start_run():
 53          model_info = mlflow.openai.log_model(
 54              "gpt-4o-mini",
 55              "chat.completions",
 56              name="model",
 57              temperature=0.9,
 58              messages=[{"role": "system", "content": "You are an MLflow expert."}],
 59          )
 60  
 61      loaded_model = mlflow.openai.load_model(model_info.model_uri)
 62      assert loaded_model["model"] == "gpt-4o-mini"
 63      assert loaded_model["task"] == "chat.completions"
 64      assert loaded_model["temperature"] == 0.9
 65      assert loaded_model["messages"] == [{"role": "system", "content": "You are an MLflow expert."}]
 66  
 67  
 68  def test_chat_single_variable(tmp_path):
 69      mlflow.openai.save_model(
 70          model="gpt-4o-mini",
 71          task=chat_completions(),
 72          path=tmp_path,
 73          messages=[{"role": "user", "content": "{x}"}],
 74      )
 75  
 76      model = mlflow.pyfunc.load_model(tmp_path)
 77      data = pd.DataFrame({
 78          "x": [
 79              "a",
 80              "b",
 81          ]
 82      })
 83      expected_output = [
 84          [{"content": "a", "role": "user"}],
 85          [{"content": "b", "role": "user"}],
 86      ]
 87      assert list(map(json.loads, model.predict(data))) == expected_output
 88  
 89      data = [
 90          {"x": "a"},
 91          {"x": "b"},
 92      ]
 93      assert list(map(json.loads, model.predict(data))) == expected_output
 94  
 95      data = [
 96          "a",
 97          "b",
 98      ]
 99      assert list(map(json.loads, model.predict(data))) == expected_output
100  
101  
102  def test_completion_single_variable(tmp_path):
103      mlflow.openai.save_model(
104          model="text-davinci-003",
105          task=completions(),
106          path=tmp_path,
107          prompt="Say {text}",
108      )
109  
110      model = mlflow.pyfunc.load_model(tmp_path)
111      data = pd.DataFrame({
112          "x": [
113              "this is a test",
114              "this is another test",
115          ]
116      })
117      expected_output = ["Say this is a test", "Say this is another test"]
118      assert model.predict(data) == expected_output
119  
120      data = [
121          {"x": "this is a test"},
122          {"x": "this is another test"},
123      ]
124      assert model.predict(data) == expected_output
125  
126      data = [
127          "this is a test",
128          "this is another test",
129      ]
130      assert model.predict(data) == expected_output
131  
132  
133  def test_chat_multiple_variables(tmp_path):
134      mlflow.openai.save_model(
135          model="gpt-4o-mini",
136          task=chat_completions(),
137          path=tmp_path,
138          messages=[{"role": "user", "content": "{x} {y}"}],
139      )
140      model = mlflow.models.Model.load(tmp_path)
141      assert model.signature.inputs.to_dict() == [
142          {"name": "x", "type": "string", "required": True},
143          {"name": "y", "type": "string", "required": True},
144      ]
145      assert model.signature.outputs.to_dict() == [
146          {"type": "string", "required": True},
147      ]
148  
149      model = mlflow.pyfunc.load_model(tmp_path)
150      data = pd.DataFrame({
151          "x": [
152              "a",
153              "b",
154          ],
155          "y": [
156              "c",
157              "d",
158          ],
159      })
160      expected_output = [
161          [{"content": "a c", "role": "user"}],
162          [{"content": "b d", "role": "user"}],
163      ]
164      assert list(map(json.loads, model.predict(data))) == expected_output
165  
166      data = [
167          {"x": "a", "y": "c"},
168          {"x": "b", "y": "d"},
169      ]
170      assert list(map(json.loads, model.predict(data))) == expected_output
171  
172  
173  def test_chat_role_content(tmp_path):
174      mlflow.openai.save_model(
175          model="gpt-4o-mini",
176          task=chat_completions(),
177          path=tmp_path,
178          messages=[{"role": "{role}", "content": "{content}"}],
179      )
180      model = mlflow.models.Model.load(tmp_path)
181      assert model.signature.inputs.to_dict() == [
182          {"name": "content", "type": "string", "required": True},
183          {"name": "role", "type": "string", "required": True},
184      ]
185      assert model.signature.outputs.to_dict() == [
186          {"type": "string", "required": True},
187      ]
188  
189      model = mlflow.pyfunc.load_model(tmp_path)
190      data = pd.DataFrame({
191          "role": [
192              "system",
193              "user",
194          ],
195          "content": [
196              "c",
197              "d",
198          ],
199      })
200      expected_output = [
201          [{"content": "c", "role": "system"}],
202          [{"content": "d", "role": "user"}],
203      ]
204      assert list(map(json.loads, model.predict(data))) == expected_output
205  
206  
207  def test_completion_multiple_variables(tmp_path):
208      mlflow.openai.save_model(
209          model="text-davinci-003",
210          task=completions(),
211          path=tmp_path,
212          prompt="Say {x} and {y}",
213      )
214      model = mlflow.models.Model.load(tmp_path)
215      assert model.signature.inputs.to_dict() == [
216          {"name": "x", "type": "string", "required": True},
217          {"name": "y", "type": "string", "required": True},
218      ]
219      assert model.signature.outputs.to_dict() == [
220          {"type": "string", "required": True},
221      ]
222  
223      model = mlflow.pyfunc.load_model(tmp_path)
224      data = pd.DataFrame({
225          "x": [
226              "a",
227              "b",
228          ],
229          "y": [
230              "c",
231              "d",
232          ],
233      })
234      expected_output = ["Say a and c", "Say b and d"]
235      assert model.predict(data) == expected_output
236  
237      data = [
238          {"x": "a", "y": "c"},
239          {"x": "b", "y": "d"},
240      ]
241      assert model.predict(data) == expected_output
242  
243  
244  def test_chat_multiple_messages(tmp_path):
245      mlflow.openai.save_model(
246          model="gpt-4o-mini",
247          task=chat_completions(),
248          path=tmp_path,
249          messages=[
250              {"role": "user", "content": "{x}"},
251              {"role": "user", "content": "{y}"},
252          ],
253      )
254      model = mlflow.models.Model.load(tmp_path)
255      assert model.signature.inputs.to_dict() == [
256          {"name": "x", "type": "string", "required": True},
257          {"name": "y", "type": "string", "required": True},
258      ]
259      assert model.signature.outputs.to_dict() == [
260          {"type": "string", "required": True},
261      ]
262  
263      model = mlflow.pyfunc.load_model(tmp_path)
264      data = pd.DataFrame({
265          "x": [
266              "a",
267              "b",
268          ],
269          "y": [
270              "c",
271              "d",
272          ],
273      })
274      expected_output = [
275          [{"content": "a", "role": "user"}, {"content": "c", "role": "user"}],
276          [{"content": "b", "role": "user"}, {"content": "d", "role": "user"}],
277      ]
278      assert list(map(json.loads, model.predict(data))) == expected_output
279  
280      data = [
281          {"x": "a", "y": "c"},
282          {"x": "b", "y": "d"},
283      ]
284      assert list(map(json.loads, model.predict(data))) == expected_output
285  
286  
287  def test_chat_no_variables(tmp_path):
288      mlflow.openai.save_model(
289          model="gpt-4o-mini",
290          task=chat_completions(),
291          path=tmp_path,
292          messages=[{"role": "user", "content": "a"}],
293      )
294      model = mlflow.models.Model.load(tmp_path)
295      assert model.signature.inputs.to_dict() == [
296          {"type": "string", "required": True},
297      ]
298      assert model.signature.outputs.to_dict() == [
299          {"type": "string", "required": True},
300      ]
301  
302      model = mlflow.pyfunc.load_model(tmp_path)
303      data = pd.DataFrame({
304          "content": ["b", "c"],
305      })
306      expected_output = [
307          [{"content": "a", "role": "user"}, {"content": "b", "role": "user"}],
308          [{"content": "a", "role": "user"}, {"content": "c", "role": "user"}],
309      ]
310      assert list(map(json.loads, model.predict(data))) == expected_output
311  
312      data = [
313          {"content": "b"},
314          {"content": "c"},
315      ]
316      assert list(map(json.loads, model.predict(data))) == expected_output
317  
318      data = [
319          "b",
320          "c",
321      ]
322      assert list(map(json.loads, model.predict(data))) == expected_output
323  
324  
325  def test_completion_no_variable(tmp_path):
326      mlflow.openai.save_model(
327          model="text-davinci-003",
328          task=completions(),
329          path=tmp_path,
330      )
331  
332      model = mlflow.pyfunc.load_model(tmp_path)
333      data = pd.DataFrame({
334          "x": [
335              "this is a test",
336              "this is another test",
337          ]
338      })
339      expected_output = ["this is a test", "this is another test"]
340      assert model.predict(data) == expected_output
341  
342      data = [
343          {"x": "this is a test"},
344          {"x": "this is another test"},
345      ]
346      assert model.predict(data) == expected_output
347  
348      data = [
349          "this is a test",
350          "this is another test",
351      ]
352      assert model.predict(data) == expected_output
353  
354  
355  def test_chat_no_messages(tmp_path):
356      mlflow.openai.save_model(
357          model="gpt-4o-mini",
358          task=chat_completions(),
359          path=tmp_path,
360      )
361      model = mlflow.models.Model.load(tmp_path)
362      assert model.signature.inputs.to_dict() == [
363          {"type": "string", "required": True},
364      ]
365      assert model.signature.outputs.to_dict() == [
366          {"type": "string", "required": True},
367      ]
368  
369      model = mlflow.pyfunc.load_model(tmp_path)
370      data = pd.DataFrame({
371          "content": ["b", "c"],
372      })
373      expected_output = [
374          [{"content": "b", "role": "user"}],
375          [{"content": "c", "role": "user"}],
376      ]
377      assert list(map(json.loads, model.predict(data))) == expected_output
378  
379      data = [
380          {"content": "b"},
381          {"content": "c"},
382      ]
383      assert list(map(json.loads, model.predict(data))) == expected_output
384  
385      data = [
386          "b",
387          "c",
388      ]
389      assert list(map(json.loads, model.predict(data))) == expected_output
390  
391  
392  @pytest.mark.parametrize(
393      "messages",
394      [
395          ["a", "b"],
396          [{"k": "v"}],
397      ],
398  )
399  def test_invalid_messages(tmp_path, messages):
400      with pytest.raises(
401          mlflow.MlflowException,
402          match="it must be a list of dictionaries with keys 'role' and 'content'",
403      ):
404          mlflow.openai.save_model(
405              model="gpt-4o-mini",
406              task=chat_completions(),
407              path=tmp_path,
408              messages=messages,
409          )
410  
411  
412  def test_task_argument_accepts_class(tmp_path):
413      mlflow.openai.save_model(model="gpt-4o-mini", task=chat_completions(), path=tmp_path)
414      loaded_model = mlflow.openai.load_model(tmp_path)
415      assert loaded_model["task"] == "chat.completions"
416  
417  
418  @pytest.mark.skipif(is_v1, reason="Requires OpenAI SDK v0")
419  def test_model_argument_accepts_retrieved_model(tmp_path):
420      model = openai.Model.retrieve("gpt-4o-mini")
421      mlflow.openai.save_model(model=model, task=chat_completions(), path=tmp_path)
422      loaded_model = mlflow.openai.load_model(tmp_path)
423      assert loaded_model["model"] == "gpt-4o-mini"
424  
425  
426  def test_save_model_with_secret_scope(tmp_path, monkeypatch):
427      scope = "test"
428      monkeypatch.setenv("MLFLOW_OPENAI_SECRET_SCOPE", scope)
429      with (
430          mock.patch("mlflow.openai.model.is_in_databricks_runtime", return_value=True),
431          mock.patch("mlflow.openai.model.check_databricks_secret_scope_access"),
432      ):
433          with pytest.warns(FutureWarning, match="MLFLOW_OPENAI_SECRET_SCOPE.+deprecated"):
434              mlflow.openai.save_model(model="gpt-4o-mini", task="chat.completions", path=tmp_path)
435      with tmp_path.joinpath("openai.yaml").open() as f:
436          creds = yaml.safe_load(f)
437          assert creds == {
438              "OPENAI_API_TYPE": f"{scope}:openai_api_type",
439              "OPENAI_API_KEY": f"{scope}:openai_api_key",
440              "OPENAI_API_KEY_PATH": f"{scope}:openai_api_key_path",
441              "OPENAI_API_BASE": f"{scope}:openai_api_base",
442              "OPENAI_BASE_URL": f"{scope}:openai_base_url",
443              "OPENAI_ORGANIZATION": f"{scope}:openai_organization",
444              "OPENAI_API_VERSION": f"{scope}:openai_api_version",
445              "OPENAI_DEPLOYMENT_NAME": f"{scope}:openai_deployment_name",
446              "OPENAI_ENGINE": f"{scope}:openai_engine",
447          }
448  
449  
450  def test_spark_udf_chat(tmp_path, spark):
451      mlflow.openai.save_model(
452          model="gpt-4o-mini",
453          task="chat.completions",
454          path=tmp_path,
455          messages=[
456              {"role": "user", "content": "{x} {y}"},
457          ],
458      )
459      udf = mlflow.pyfunc.spark_udf(spark, tmp_path, result_type="string")
460      df = spark.createDataFrame(
461          [
462              ("a", "b"),
463              ("c", "d"),
464          ],
465          ["x", "y"],
466      )
467      df = df.withColumn("z", udf())
468      pdf = df.toPandas()
469      assert list(map(json.loads, pdf["z"])) == [
470          [{"content": "a b", "role": "user"}],
471          [{"content": "c d", "role": "user"}],
472      ]
473  
474  
475  class ChatCompletionModel(mlflow.pyfunc.PythonModel):
476      def predict(self, context, model_input, params=None):
477          completion = chat_completions.create(
478              model="gpt-4o-mini",
479              messages=[{"role": "user", "content": "What is MLflow?"}],
480          )
481          return completion.choices[0].message.content
482  
483  
484  def test_embeddings(tmp_path):
485      mlflow.openai.save_model(
486          model="text-embedding-ada-002",
487          task=embeddings(),
488          path=tmp_path,
489      )
490  
491      model = mlflow.models.Model.load(tmp_path)
492      assert model.signature.inputs.to_dict() == [{"type": "string", "required": True}]
493      assert model.signature.outputs.to_dict() == [
494          {"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": (-1,)}}
495      ]
496  
497      model = mlflow.pyfunc.load_model(tmp_path)
498      data = pd.DataFrame({"text": ["a", "b"]})
499      preds = model.predict(data)
500      assert list(map(len, preds)) == [1536, 1536]
501  
502      data = pd.DataFrame({"text": ["a"] * 100})
503      preds = model.predict(data)
504      assert list(map(len, preds)) == [1536] * 100
505  
506  
507  def test_embeddings_batch_size_azure(tmp_path, monkeypatch):
508      monkeypatch.setenv("OPENAI_API_TYPE", "azure")
509      monkeypatch.setenv("OPENAI_ENGINE", "test_engine")
510      mlflow.openai.save_model(
511          model="text-embedding-ada-002",
512          task=embeddings(),
513          path=tmp_path,
514      )
515      model = mlflow.pyfunc.load_model(tmp_path)
516  
517      assert model._model_impl.api_config.batch_size == 16
518  
519  
520  def test_embeddings_pyfunc_server_and_score():
521      df = pd.DataFrame({"text": ["a", "b"]})
522      with mlflow.start_run():
523          model_info = mlflow.openai.log_model(
524              "text-embedding-ada-002",
525              embeddings(),
526              name="model",
527              input_example=df,
528          )
529      inference_payload = load_serving_example(model_info.model_uri)
530      resp = pyfunc_serve_and_score_model(
531          model_info.model_uri,
532          data=inference_payload,
533          content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
534          extra_args=["--env-manager", "local"],
535      )
536      expected = mlflow.pyfunc.load_model(model_info.model_uri).predict(df)
537      actual = pd.DataFrame(data=json.loads(resp.content.decode("utf-8")))
538      pd.testing.assert_frame_equal(actual, pd.DataFrame({"predictions": expected}))
539  
540  
541  def test_spark_udf_embeddings(tmp_path, spark):
542      mlflow.openai.save_model(
543          model="text-embedding-ada-002",
544          task=embeddings(),
545          path=tmp_path,
546      )
547      udf = mlflow.pyfunc.spark_udf(spark, tmp_path, result_type="array<double>")
548      df = spark.createDataFrame(
549          [
550              ("a",),
551              ("b",),
552          ],
553          ["x"],
554      )
555      df = df.withColumn("z", udf("x")).toPandas()
556      assert list(map(len, df["z"])) == [1536, 1536]
557  
558  
559  def test_inference_params(tmp_path):
560      mlflow.openai.save_model(
561          model="text-embedding-ada-002",
562          task=embeddings(),
563          path=tmp_path,
564          signature=ModelSignature(
565              inputs=Schema([ColSpec(type="string", name=None)]),
566              outputs=Schema([TensorSpec(type=np.dtype("float64"), shape=(-1,))]),
567              params=ParamSchema([ParamSpec(name="batch_size", dtype="long", default=16)]),
568          ),
569      )
570  
571      model_info = mlflow.models.Model.load(tmp_path)
572      assert (
573          len([p for p in model_info.signature.params if p.name == "batch_size" and p.default == 16])
574          == 1
575      )
576  
577      model = mlflow.pyfunc.load_model(tmp_path)
578      data = pd.DataFrame({"text": ["a", "b"]})
579      preds = model.predict(data, params={"batch_size": 5})
580      assert list(map(len, preds)) == [1536, 1536]
581  
582  
583  def test_inference_params_overlap(tmp_path):
584      with pytest.raises(mlflow.MlflowException, match=r"any of \['prefix'\] as parameters"):
585          mlflow.openai.save_model(
586              model="text-davinci-003",
587              task=completions(),
588              path=tmp_path,
589              prefix="Classify the following text's sentiment:",
590              signature=ModelSignature(
591                  inputs=Schema([ColSpec(type="string", name=None)]),
592                  outputs=Schema([ColSpec(type="string", name=None)]),
593                  params=ParamSchema([ParamSpec(name="prefix", default=None, dtype="string")]),
594              ),
595          )
596  
597  
598  def test_multimodal_messages(tmp_path):
599      # Test multimodal content with variable placeholders
600      mlflow.openai.save_model(
601          model="gpt-4o-mini",
602          task=chat_completions(),
603          path=tmp_path,
604          messages=[
605              {
606                  "role": "user",
607                  "content": [
608                      {"type": "text", "text": "{system_prompt}"},
609                      {
610                          "type": "image_url",
611                          "image_url": {
612                              "url": "data:image/jpeg;base64,{image_base64}",
613                              "detail": "low",
614                          },
615                      },
616                  ],
617              }
618          ],
619      )
620  
621      model = mlflow.models.Model.load(tmp_path)
622      assert model.signature.inputs.to_dict() == [
623          {"name": "image_base64", "type": "string", "required": True},
624          {"name": "system_prompt", "type": "string", "required": True},
625      ]
626      assert model.signature.outputs.to_dict() == [
627          {"type": "string", "required": True},
628      ]
629  
630      model = mlflow.pyfunc.load_model(tmp_path)
631      data = pd.DataFrame({
632          "system_prompt": ["Analyze this image"],
633          "image_base64": [
634              "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg=="
635          ],
636      })
637  
638      expected_output = [
639          [
640              {
641                  "content": [
642                      {"type": "text", "text": "Analyze this image"},
643                      {
644                          "type": "image_url",
645                          "image_url": {
646                              "url": (
647                                  "data:image/jpeg;base64,"
648                                  "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg=="
649                              ),
650                              "detail": "low",
651                          },
652                      },
653                  ],
654                  "role": "user",
655              }
656          ]
657      ]
658  
659      assert list(map(json.loads, model.predict(data))) == expected_output
660  
661  
662  def test_multimodal_messages_no_variables(tmp_path):
663      mlflow.openai.save_model(
664          model="gpt-4o-mini",
665          task=chat_completions(),
666          path=tmp_path,
667          messages=[
668              {
669                  "role": "user",
670                  "content": [
671                      {"type": "text", "text": "What's in this image?"},
672                      {
673                          "type": "image_url",
674                          "image_url": {"url": "data:image/jpeg;base64,abc123", "detail": "low"},
675                      },
676                  ],
677              }
678          ],
679      )
680  
681      model = mlflow.models.Model.load(tmp_path)
682      # Should add default content variable since no variables found
683      assert model.signature.inputs.to_dict() == [
684          {"type": "string", "required": True},
685      ]
686  
687      model = mlflow.pyfunc.load_model(tmp_path)
688      data = pd.DataFrame({"content": ["Additional context"]})
689  
690      expected_output = [
691          [
692              {
693                  "content": [
694                      {"type": "text", "text": "What's in this image?"},
695                      {
696                          "type": "image_url",
697                          "image_url": {"url": "data:image/jpeg;base64,abc123", "detail": "low"},
698                      },
699                  ],
700                  "role": "user",
701              },
702              {"content": "Additional context", "role": "user"},
703          ]
704      ]
705  
706      assert list(map(json.loads, model.predict(data))) == expected_output