embeddings.py
1 import os 2 3 import numpy as np 4 import openai 5 6 import mlflow 7 from mlflow.models.signature import ModelSignature 8 from mlflow.types.schema import ColSpec, ParamSchema, ParamSpec, Schema, TensorSpec 9 10 assert "OPENAI_API_KEY" in os.environ, " OPENAI_API_KEY environment variable must be set" 11 12 13 print( 14 """ 15 # ****************************************************************************** 16 # Text embeddings 17 # ****************************************************************************** 18 """ 19 ) 20 21 with mlflow.start_run(): 22 model_info = mlflow.openai.log_model( 23 model="text-embedding-ada-002", 24 task=openai.embeddings, 25 name="model", 26 ) 27 28 model = mlflow.pyfunc.load_model(model_info.model_uri) 29 print(model.predict(["hello", "world"])) 30 31 32 print( 33 """ 34 # ****************************************************************************** 35 # Text embeddings with batch_size parameter 36 # ****************************************************************************** 37 """ 38 ) 39 40 with mlflow.start_run(): 41 mlflow.openai.log_model( 42 model="text-embedding-ada-002", 43 task=openai.embeddings, 44 name="model", 45 signature=ModelSignature( 46 inputs=Schema([ColSpec(type="string", name=None)]), 47 outputs=Schema([TensorSpec(type=np.dtype("float64"), shape=(-1,))]), 48 params=ParamSchema([ParamSpec(name="batch_size", dtype="long", default=1024)]), 49 ), 50 ) 51 52 model = mlflow.pyfunc.load_model(model_info.model_uri) 53 print(model.predict(["hello", "world"], params={"batch_size": 16}))