/ examples / openai / embeddings.py
embeddings.py
 1  import os
 2  
 3  import numpy as np
 4  import openai
 5  
 6  import mlflow
 7  from mlflow.models.signature import ModelSignature
 8  from mlflow.types.schema import ColSpec, ParamSchema, ParamSpec, Schema, TensorSpec
 9  
10  assert "OPENAI_API_KEY" in os.environ, " OPENAI_API_KEY environment variable must be set"
11  
12  
13  print(
14      """
15  # ******************************************************************************
16  # Text embeddings
17  # ******************************************************************************
18  """
19  )
20  
21  with mlflow.start_run():
22      model_info = mlflow.openai.log_model(
23          model="text-embedding-ada-002",
24          task=openai.embeddings,
25          name="model",
26      )
27  
28  model = mlflow.pyfunc.load_model(model_info.model_uri)
29  print(model.predict(["hello", "world"]))
30  
31  
32  print(
33      """
34  # ******************************************************************************
35  # Text embeddings with batch_size parameter
36  # ******************************************************************************
37  """
38  )
39  
40  with mlflow.start_run():
41      mlflow.openai.log_model(
42          model="text-embedding-ada-002",
43          task=openai.embeddings,
44          name="model",
45          signature=ModelSignature(
46              inputs=Schema([ColSpec(type="string", name=None)]),
47              outputs=Schema([TensorSpec(type=np.dtype("float64"), shape=(-1,))]),
48              params=ParamSchema([ParamSpec(name="batch_size", dtype="long", default=1024)]),
49          ),
50      )
51  
52  model = mlflow.pyfunc.load_model(model_info.model_uri)
53  print(model.predict(["hello", "world"], params={"batch_size": 16}))