agent_eval_recipe.html
1 <head> 2 <link 3 rel="stylesheet" 4 href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/xcode.min.css" 5 /> 6 <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script> 7 <script> 8 hljs.highlightAll(); 9 </script> 10 <style> 11 body { 12 margin: 0; 13 font-family: -apple-system, BlinkMacSystemFont, Segoe UI, Roboto, 14 Helvetica Neue, Arial, Noto Sans, sans-serif, Apple Color Emoji, 15 Segoe UI Emoji, Segoe UI Symbol, Noto Color Emoji; 16 -webkit-tap-highlight-color: rgba(0, 0, 0, 0); 17 margin: 0; 18 font-weight: 400; 19 font-size: 13px; 20 line-height: 18px; 21 color: rgb(17, 23, 28); 22 } 23 code { 24 line-height: 18px; 25 font-size: 11px; 26 background: rgb(250, 250, 250) !important; 27 } 28 pre { 29 background: rgb(250, 250, 250); 30 margin: 0; 31 display: none; 32 } 33 pre.active { 34 display: unset; 35 } 36 button { 37 white-space: nowrap; 38 text-align: center; 39 position: relative; 40 cursor: pointer; 41 background: rgba(34, 114, 180, 0) !important; 42 color: rgb(34, 114, 180) !important; 43 border-color: rgba(34, 114, 180, 0) !important; 44 padding: 4px 6px !important; 45 text-decoration: none !important; 46 line-height: 20px !important; 47 box-shadow: none !important; 48 height: 32px !important; 49 display: inline-flex !important; 50 -webkit-box-align: center !important; 51 align-items: center !important; 52 -webkit-box-pack: center !important; 53 justify-content: center !important; 54 vertical-align: middle !important; 55 } 56 p { 57 margin: 0; 58 padding: 0; 59 } 60 button:hover { 61 background: rgba(34, 114, 180, 0.08) !important; 62 color: rgb(14, 83, 139) !important; 63 } 64 button:active { 65 background: rgba(34, 114, 180, 0.16) !important; 66 color: rgb(4, 53, 93) !important; 67 } 68 h1 { 69 margin-top: 4px; 70 font-size: 22px; 71 } 72 .info { 73 font-size: 12px; 74 font-weight: 500; 75 line-height: 16px; 76 color: rgb(95, 114, 129); 77 } 78 .tabs { 79 margin-top: 10px; 80 border-bottom: 1px solid rgb(209, 217, 225) !important; 81 display: flex; 82 line-height: 24px; 83 } 84 .tab { 85 font-size: 13px; 86 font-weight: 600 !important; 87 cursor: pointer; 88 margin: 0 24px 0 2px; 89 padding-left: 2px; 90 } 91 .tab:hover { 92 color: rgb(14, 83, 139) !important; 93 } 94 .tab.active { 95 border-bottom: 3px solid rgb(34, 114, 180) !important; 96 } 97 .link { 98 margin-left: 12px; 99 display: inline-block; 100 text-decoration: none; 101 color: rgb(34, 114, 180) !important; 102 font-size: 13px; 103 font-weight: 400; 104 } 105 .link:hover { 106 color: rgb(14, 83, 139) !important; 107 } 108 .link-content { 109 display: flex; 110 gap: 6px; 111 align-items: center; 112 } 113 .caret-up { 114 transform: rotate(180deg); 115 } 116 </style> 117 </head> 118 <body> 119 <div style="display: flex; align-items: center"> 120 The logged model is compatible with the Mosaic AI Agent Framework. 121 <button onclick="toggleCode()"> 122 See how to evaluate the model 123 <span 124 role="img" 125 id="caret" 126 aria-hidden="true" 127 class="anticon css-6xix1i" 128 style="font-size: 14px" 129 ><svg 130 xmlns="http://www.w3.org/2000/svg" 131 width="1em" 132 height="1em" 133 fill="none" 134 viewBox="0 0 16 16" 135 aria-hidden="true" 136 focusable="false" 137 class="" 138 > 139 <path 140 fill="currentColor" 141 fill-rule="evenodd" 142 d="M8 8.917 10.947 6 12 7.042 8 11 4 7.042 5.053 6z" 143 clip-rule="evenodd" 144 ></path> 145 </svg> 146 </span> 147 </button> 148 </div> 149 <div id="code" style="display: none"> 150 <h1> 151 Agent evaluation 152 <a 153 class="link" 154 href="https://docs.databricks.com/en/generative-ai/agent-evaluation/synthesize-evaluation-set.html?utm_source=mlflow.log_model&utm_medium=notebook" 155 target="_blank" 156 > 157 <span class="link-content"> 158 Learn more 159 <span role="img" aria-hidden="true" class="anticon css-6xix1i" 160 ><svg 161 xmlns="http://www.w3.org/2000/svg" 162 width="1em" 163 height="1em" 164 fill="none" 165 viewBox="0 0 16 16" 166 aria-hidden="true" 167 focusable="false" 168 class="" 169 > 170 <path 171 fill="currentColor" 172 d="M10 1h5v5h-1.5V3.56L8.53 8.53 7.47 7.47l4.97-4.97H10z" 173 ></path> 174 <path 175 fill="currentColor" 176 d="M1 2.75A.75.75 0 0 1 1.75 2H8v1.5H2.5v10h10V8H14v6.25a.75.75 0 0 1-.75.75H1.75a.75.75 0 0 1-.75-.75z" 177 ></path></svg></span></span 178 ></a> 179 </h1> 180 <p class="info"> 181 Copy the following code snippet in a notebook cell (right click → copy) 182 </p> 183 <div class="tabs"> 184 <div class="tab active" onclick="tabClicked(0)">Using synthetic data</div> 185 <div class="tab" onclick="tabClicked(1)">Using your own dataset</div> 186 </div> 187 <div style="height: 472px"> 188 <pre 189 class="active" 190 ><code class="language-python">%pip install -U databricks-agents 191 dbutils.library.restartPython() 192 ## Run the above in a separate cell ## 193 194 from databricks.agents.evals import generate_evals_df 195 import mlflow 196 197 agent_description = "A chatbot that answers questions about Databricks." 198 question_guidelines = """ 199 # User personas 200 - A developer new to the Databricks platform 201 # Example questions 202 - What API lets me parallelize operations over rows of a delta table? 203 """ 204 # TODO: Spark/Pandas DataFrame with "content" and "doc_uri" columns. 205 docs = spark.table("catalog.schema.my_table_of_docs") 206 evals = generate_evals_df( 207 docs=docs, 208 num_evals=25, 209 agent_description=agent_description, 210 question_guidelines=question_guidelines, 211 ) 212 eval_result = mlflow.evaluate(data=evals, model="runs:/1/model", model_type="databricks-agent") 213 </code></pre> 214 215 <pre><code class="language-python">%pip install -U databricks-agents 216 dbutils.library.restartPython() 217 ## Run the above in a separate cell ## 218 219 import pandas as pd 220 import mlflow 221 222 evals = [ 223 { 224 "request": { 225 "messages": [ 226 {"role": "user", "content": "How do I convert a Spark DataFrame to Pandas?"} 227 ], 228 }, 229 # Optional, needed for judging correctness. 230 "expected_facts": [ 231 "To convert a Spark DataFrame to Pandas, you can use the toPandas() method." 232 ], 233 } 234 ] 235 eval_result = mlflow.evaluate( 236 data=pd.DataFrame.from_records(evals), model="runs:/1/model", model_type="databricks-agent" 237 ) 238 </code></pre> 239 </div> 240 </div> 241 <script> 242 var codeShown = false; 243 function clip(el) { 244 var range = document.createRange(); 245 range.selectNodeContents(el); 246 var sel = window.getSelection(); 247 sel.removeAllRanges(); 248 sel.addRange(range); 249 } 250 251 function toggleCode() { 252 if (codeShown) { 253 document.getElementById("code").style.display = "none"; 254 codeShown = false; 255 } else { 256 document.getElementById("code").style.display = "block"; 257 clip(document.querySelector("pre.active")); 258 codeShown = true; 259 } 260 document.getElementById("caret").classList.toggle("caret-up"); 261 } 262 263 function tabClicked(tabIndex) { 264 document.querySelectorAll(".tab").forEach((tab, index) => { 265 if (index === tabIndex) { 266 tab.classList.add("active"); 267 } else { 268 tab.classList.remove("active"); 269 } 270 }); 271 document.querySelectorAll("pre").forEach((pre, index) => { 272 if (index === tabIndex) { 273 pre.classList.add("active"); 274 } else { 275 pre.classList.remove("active"); 276 } 277 }); 278 clip(document.querySelector("pre.active")); 279 } 280 </script> 281 </body>