Cradicle Explorer

/ examples / llms / summarization / summarization.py
summarization.py
 1  import os
 2  
 3  import pandas as pd
 4  from langchain.chains import LLMChain
 5  from langchain.llms import OpenAI
 6  from langchain.prompts import PromptTemplate
 7  
 8  import mlflow
 9  
10  assert "OPENAI_API_KEY" in os.environ, (
11      "Please set the OPENAI_API_KEY environment variable to run this example."
12  )
13  
14  
15  def build_and_evaluate_model_with_prompt(prompt_template):
16      mlflow.start_run()
17      mlflow.log_param("prompt_template", prompt_template)
18      # Create a news summarization model using prompt engineering with LangChain. Log the model
19      # to MLflow Tracking
20      llm = OpenAI(temperature=0.9)
21      prompt = PromptTemplate(input_variables=["article"], template=prompt_template)
22      chain = LLMChain(llm=llm, prompt=prompt)
23      logged_model = mlflow.langchain.log_model(chain, name="model")
24  
25      # Evaluate the model on a small sample dataset
26      sample_data = pd.read_csv("summarization_example_data.csv")
27      mlflow.evaluate(
28          model=logged_model.model_uri,
29          model_type="text-summarization",
30          data=sample_data,
31          targets="highlights",
32      )
33      mlflow.end_run()
34  
35  
36  prompt_template_1 = (
37      "Write a summary of the following article that is between triple backticks: ```{article}```"
38  )
39  print(f"Building and evaluating model with prompt: '{prompt_template_1}'")
40  build_and_evaluate_model_with_prompt(prompt_template_1)
41  
42  prompt_template_2 = (
43      "Write a summary of the following article that is between triple backticks. Be concise. Make"
44      " sure the summary includes important nouns and dates and keywords in the original text."
45      " Just return the summary. Do not include any text other than the summary: ```{article}```"
46  )
47  print(f"Building and evaluating model with prompt: '{prompt_template_2}'")
48  build_and_evaluate_model_with_prompt(prompt_template_2)
49  
50  # Load the evaluation results
51  results: pd.DataFrame = mlflow.load_table(
52      "eval_results_table.json", extra_columns=["run_id", "params.prompt_template"]
53  )
54  results_grouped_by_article = results.sort_values(by="id")
55  print("Evaluation results:")
56  print(results_grouped_by_article[["run_id", "params.prompt_template", "article", "outputs"]])
57  
58  # Score the best model on a new article
59  new_article = """
60  Adnan Januzaj swapped the lush turf of Old Trafford for the green baize at Sheffield when he
61  turned up at the snooker World Championships on Wednesday. The Manchester United winger, who has
62  endured a frustrating season under Louis van Gaal, had turned out for the Under 21 side at Fulham
63  on Tuesday night amid reports he could be farmed out on loan next season. But Januzaj may want to
64  consider trying his hand at another sport after displaying his silky skillls on a mini pool table.
65  Adnan Januzaj (left) cheered on\xa0Shaun Murphy (right) at the World Championship in Sheffield.
66  Januzaj shows off his potting skills on a mini pool table at the Crucible on Wednesday.
67  The 20-year-old Belgium international was at the Crucible to cheer on his friend Shaun Murphy in
68  his quarter-final against Anthony McGill. The 2005 winner moved a step closer to an elusive second
69  title in Sheffield with a 13-8 victory, sealed with a 67 break. Three centuries in the match, and
70  the way he accelerated away from 6-6, showed Murphy is a man to fear, and next for him will be
71  Neil Robertson or Barry Hawkins. Januzaj turned out for Under 21s in the 4-1 victory at Fulham on
72  Tuesday night.
73  """
74  
75  print(
76      f"Scoring the model with prompt '{prompt_template_2}' on the article '{new_article[:70] + '...'}'"
77  )
78  best_model = mlflow.pyfunc.load_model(f"runs:/{mlflow.last_active_run().info.run_id}/model")
79  summary = best_model.predict({"article": new_article})
80  print(f"Summary: {summary}")