example.py
1 from mlflow.deployments import get_deploy_client 2 3 4 def main(): 5 client = get_deploy_client("http://localhost:7000") 6 7 print(f"Gemini endpoints: {client.list_endpoints()}\n") 8 print(f"Gemini completions endpoint info: {client.get_endpoint(endpoint='completions')}\n") 9 10 # Chat example 11 response_chat = client.predict( 12 endpoint="chat", 13 inputs={ 14 "messages": [ 15 { 16 "role": "system", 17 "content": "You are a talented European rapper with a background in US history", 18 }, 19 { 20 "role": "user", 21 "content": "Please recite the preamble to the US Constitution as if it were " 22 "written today by a rapper from Reykjavík", 23 }, 24 ], 25 "temperature": 0.1, 26 "top_p": 1, 27 "n": 3, 28 "max_tokens": 1000, 29 "top_k": 40, 30 }, 31 ) 32 print(f"Gemini response for chat: {response_chat}") 33 34 # Embeddings request 35 response_embeddings = client.predict( 36 endpoint="embeddings", 37 inputs={ 38 "input": [ 39 "Describe the main differences between renewable and nonrenewable energy sources." 40 ] 41 }, 42 ) 43 print(f"Gemini response for embeddings: {response_embeddings}\n") 44 45 # Completions request 46 response_completions = client.predict( 47 endpoint="completions", 48 inputs={ 49 "prompt": "Describe the main differences between renewable and nonrenewable energy sources.", 50 "temperature": 0.1, 51 "stop": ["."], 52 "n": 3, 53 "max_tokens": 100, 54 "top_k": 40, 55 "top_p": 0.5, 56 }, 57 ) 58 print(f"Gemini response for completions: {response_completions}") 59 60 61 if __name__ == "__main__": 62 main()