prompt_optimization_bookings_example.ipynb
1 { 2 "cells": [ 3 { 4 "cell_type": "markdown", 5 "id": "ff47ac12", 6 "metadata": {}, 7 "source": [ 8 "# Prompt Optimization with Evidently: Bookings Query Classifier\n", 9 "In this tutorial, we'll optimize a prompt for classifying different types of customer service queries (like Booking, Payment, or Technical issues) using an LLM classifier.\n", 10 "\n", 11 "## What you'll learn:\n", 12 "- How to load a dataset for LLM classification\n", 13 "- How to define a multiclass classification prompt\n", 14 "- How to run prompt optimization with Evidently\n", 15 "- How to retrieve the best performing prompt" 16 ] 17 }, 18 { 19 "cell_type": "code", 20 "id": "97a9b37c", 21 "metadata": {}, 22 "source": [ 23 "# Install packages if needed\n", 24 "# !pip install evidently openai pandas" 25 ], 26 "outputs": [], 27 "execution_count": null 28 }, 29 { 30 "cell_type": "code", 31 "id": "f416c601", 32 "metadata": {}, 33 "source": [ 34 "import pandas as pd\n", 35 "\n", 36 "from evidently import Dataset, DataDefinition, LLMClassification\n", 37 "from evidently.descriptors import LLMEval\n", 38 "from evidently.llm.templates import MulticlassClassificationPromptTemplate\n", 39 "from evidently.llm.optimization import PromptOptimizer" 40 ], 41 "outputs": [], 42 "execution_count": null 43 }, 44 { 45 "cell_type": "markdown", 46 "id": "f746075f", 47 "metadata": {}, 48 "source": [ 49 "## Load Your Dataset" 50 ] 51 }, 52 { 53 "cell_type": "code", 54 "id": "12c9627b", 55 "metadata": {}, 56 "source": [ 57 "data = pd.read_csv(\"../datasets/bookings.csv\")\n", 58 "data.head()" 59 ], 60 "outputs": [], 61 "execution_count": null 62 }, 63 { 64 "cell_type": "markdown", 65 "id": "0ed36538", 66 "metadata": {}, 67 "source": [ 68 "## Define Data Structure for Evidently" 69 ] 70 }, 71 { 72 "cell_type": "code", 73 "id": "fbd8d078", 74 "metadata": {}, 75 "source": [ 76 "dd = DataDefinition(\n", 77 " text_columns=[\"query\"],\n", 78 " categorical_columns=[\"label\"],\n", 79 " llm=LLMClassification(input=\"query\", target=\"label\")\n", 80 ")" 81 ], 82 "outputs": [], 83 "execution_count": null 84 }, 85 { 86 "cell_type": "code", 87 "id": "326db04f", 88 "metadata": {}, 89 "source": [ 90 "dataset = Dataset.from_pandas(data, data_definition=dd)" 91 ], 92 "outputs": [], 93 "execution_count": null 94 }, 95 { 96 "cell_type": "markdown", 97 "id": "dc5d1102", 98 "metadata": {}, 99 "source": [ 100 "## Define a Multiclass Prompt and LLM Judge" 101 ] 102 }, 103 { 104 "cell_type": "code", 105 "id": "98bb2304", 106 "metadata": {}, 107 "source": [ 108 "bad_prompt = \"Classify inqueries by categories\"\n", 109 "\n", 110 "t = MulticlassClassificationPromptTemplate(\n", 111 " pre_messages=[(\"system\", \"You are classifying user queries.\")],\n", 112 " criteria=bad_prompt,\n", 113 " category_criteria={\n", 114 " \"Booking\": \"bookings\",\n", 115 " \"Technical\": \"technical questions\",\n", 116 " \"Policy\": \"questions about policies\",\n", 117 " \"Payment\": \"payment questions\",\n", 118 " \"Escalation\": \"escalation requests\"\n", 119 " },\n", 120 " uncertainty=\"unknown\",\n", 121 " include_reasoning=True,\n", 122 ")\n", 123 "\n", 124 "judge = LLMEval(\n", 125 " alias=\"bookings\",\n", 126 " provider=\"openai\",\n", 127 " model=\"gpt-4o-mini\",\n", 128 " column_name=\"query\",\n", 129 " template=t\n", 130 ")" 131 ], 132 "outputs": [], 133 "execution_count": null 134 }, 135 { 136 "cell_type": "markdown", 137 "id": "ee25c3d8", 138 "metadata": {}, 139 "source": [ 140 "## Run the Prompt Optimizer" 141 ] 142 }, 143 { 144 "cell_type": "code", 145 "id": "f03ef9f6", 146 "metadata": {}, 147 "source": [ 148 "optimizer = PromptOptimizer(\"bookings_example\", strategy=\"feedback\", verbose=True)\n", 149 "optimizer.set_input_dataset(dataset)\n", 150 "await optimizer.arun(judge, \"accuracy\", repetitions=5)\n", 151 "# sync version\n", 152 "# optimizer.run(judge, \"accuracy\")" 153 ], 154 "outputs": [], 155 "execution_count": null 156 }, 157 { 158 "cell_type": "markdown", 159 "id": "24e61f14", 160 "metadata": {}, 161 "source": [ 162 "## View the Best Optimized Prompt" 163 ] 164 }, 165 { 166 "cell_type": "code", 167 "id": "cef78310", 168 "metadata": {}, 169 "source": [ 170 "print(optimizer.best_prompt())" 171 ], 172 "outputs": [], 173 "execution_count": null 174 }, 175 { 176 "metadata": {}, 177 "cell_type": "code", 178 "source": "optimizer.print_stats()", 179 "id": "bd9287a0fb70811a", 180 "outputs": [], 181 "execution_count": null 182 }, 183 { 184 "metadata": {}, 185 "cell_type": "code", 186 "source": "", 187 "id": "cc94f518bc6c05c", 188 "outputs": [], 189 "execution_count": null 190 } 191 ], 192 "metadata": { 193 "kernelspec": { 194 "display_name": "Python 3 (ipykernel)", 195 "language": "python", 196 "name": "python3" 197 }, 198 "language_info": { 199 "codemirror_mode": { 200 "name": "ipython", 201 "version": 3 202 }, 203 "file_extension": ".py", 204 "mimetype": "text/x-python", 205 "name": "python", 206 "nbconvert_exporter": "python", 207 "pygments_lexer": "ipython3", 208 "version": "3.11.11" 209 } 210 }, 211 "nbformat": 4, 212 "nbformat_minor": 5 213 }