summarization.py
1 import os 2 3 import pandas as pd 4 from langchain.chains import LLMChain 5 from langchain.llms import OpenAI 6 from langchain.prompts import PromptTemplate 7 8 import mlflow 9 10 assert "OPENAI_API_KEY" in os.environ, ( 11 "Please set the OPENAI_API_KEY environment variable to run this example." 12 ) 13 14 15 def build_and_evaluate_model_with_prompt(prompt_template): 16 mlflow.start_run() 17 mlflow.log_param("prompt_template", prompt_template) 18 # Create a news summarization model using prompt engineering with LangChain. Log the model 19 # to MLflow Tracking 20 llm = OpenAI(temperature=0.9) 21 prompt = PromptTemplate(input_variables=["article"], template=prompt_template) 22 chain = LLMChain(llm=llm, prompt=prompt) 23 logged_model = mlflow.langchain.log_model(chain, name="model") 24 25 # Evaluate the model on a small sample dataset 26 sample_data = pd.read_csv("summarization_example_data.csv") 27 mlflow.evaluate( 28 model=logged_model.model_uri, 29 model_type="text-summarization", 30 data=sample_data, 31 targets="highlights", 32 ) 33 mlflow.end_run() 34 35 36 prompt_template_1 = ( 37 "Write a summary of the following article that is between triple backticks: ```{article}```" 38 ) 39 print(f"Building and evaluating model with prompt: '{prompt_template_1}'") 40 build_and_evaluate_model_with_prompt(prompt_template_1) 41 42 prompt_template_2 = ( 43 "Write a summary of the following article that is between triple backticks. Be concise. Make" 44 " sure the summary includes important nouns and dates and keywords in the original text." 45 " Just return the summary. Do not include any text other than the summary: ```{article}```" 46 ) 47 print(f"Building and evaluating model with prompt: '{prompt_template_2}'") 48 build_and_evaluate_model_with_prompt(prompt_template_2) 49 50 # Load the evaluation results 51 results: pd.DataFrame = mlflow.load_table( 52 "eval_results_table.json", extra_columns=["run_id", "params.prompt_template"] 53 ) 54 results_grouped_by_article = results.sort_values(by="id") 55 print("Evaluation results:") 56 print(results_grouped_by_article[["run_id", "params.prompt_template", "article", "outputs"]]) 57 58 # Score the best model on a new article 59 new_article = """ 60 Adnan Januzaj swapped the lush turf of Old Trafford for the green baize at Sheffield when he 61 turned up at the snooker World Championships on Wednesday. The Manchester United winger, who has 62 endured a frustrating season under Louis van Gaal, had turned out for the Under 21 side at Fulham 63 on Tuesday night amid reports he could be farmed out on loan next season. But Januzaj may want to 64 consider trying his hand at another sport after displaying his silky skillls on a mini pool table. 65 Adnan Januzaj (left) cheered on\xa0Shaun Murphy (right) at the World Championship in Sheffield. 66 Januzaj shows off his potting skills on a mini pool table at the Crucible on Wednesday. 67 The 20-year-old Belgium international was at the Crucible to cheer on his friend Shaun Murphy in 68 his quarter-final against Anthony McGill. The 2005 winner moved a step closer to an elusive second 69 title in Sheffield with a 13-8 victory, sealed with a 67 break. Three centuries in the match, and 70 the way he accelerated away from 6-6, showed Murphy is a man to fear, and next for him will be 71 Neil Robertson or Barry Hawkins. Januzaj turned out for Under 21s in the 4-1 victory at Fulham on 72 Tuesday night. 73 """ 74 75 print( 76 f"Scoring the model with prompt '{prompt_template_2}' on the article '{new_article[:70] + '...'}'" 77 ) 78 best_model = mlflow.pyfunc.load_model(f"runs:/{mlflow.last_active_run().info.run_id}/model") 79 summary = best_model.predict({"article": new_article}) 80 print(f"Summary: {summary}")