/ tests / models / resources / agent_eval_recipe.html
agent_eval_recipe.html
  1  <head>
  2    <link
  3      rel="stylesheet"
  4      href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/xcode.min.css"
  5    />
  6    <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
  7    <script>
  8      hljs.highlightAll();
  9    </script>
 10    <style>
 11      body {
 12        margin: 0;
 13        font-family: -apple-system, BlinkMacSystemFont, Segoe UI, Roboto,
 14          Helvetica Neue, Arial, Noto Sans, sans-serif, Apple Color Emoji,
 15          Segoe UI Emoji, Segoe UI Symbol, Noto Color Emoji;
 16        -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
 17        margin: 0;
 18        font-weight: 400;
 19        font-size: 13px;
 20        line-height: 18px;
 21        color: rgb(17, 23, 28);
 22      }
 23      code {
 24        line-height: 18px;
 25        font-size: 11px;
 26        background: rgb(250, 250, 250) !important;
 27      }
 28      pre {
 29        background: rgb(250, 250, 250);
 30        margin: 0;
 31        display: none;
 32      }
 33      pre.active {
 34        display: unset;
 35      }
 36      button {
 37        white-space: nowrap;
 38        text-align: center;
 39        position: relative;
 40        cursor: pointer;
 41        background: rgba(34, 114, 180, 0) !important;
 42        color: rgb(34, 114, 180) !important;
 43        border-color: rgba(34, 114, 180, 0) !important;
 44        padding: 4px 6px !important;
 45        text-decoration: none !important;
 46        line-height: 20px !important;
 47        box-shadow: none !important;
 48        height: 32px !important;
 49        display: inline-flex !important;
 50        -webkit-box-align: center !important;
 51        align-items: center !important;
 52        -webkit-box-pack: center !important;
 53        justify-content: center !important;
 54        vertical-align: middle !important;
 55      }
 56      p {
 57        margin: 0;
 58        padding: 0;
 59      }
 60      button:hover {
 61        background: rgba(34, 114, 180, 0.08) !important;
 62        color: rgb(14, 83, 139) !important;
 63      }
 64      button:active {
 65        background: rgba(34, 114, 180, 0.16) !important;
 66        color: rgb(4, 53, 93) !important;
 67      }
 68      h1 {
 69        margin-top: 4px;
 70        font-size: 22px;
 71      }
 72      .info {
 73        font-size: 12px;
 74        font-weight: 500;
 75        line-height: 16px;
 76        color: rgb(95, 114, 129);
 77      }
 78      .tabs {
 79        margin-top: 10px;
 80        border-bottom: 1px solid rgb(209, 217, 225) !important;
 81        display: flex;
 82        line-height: 24px;
 83      }
 84      .tab {
 85        font-size: 13px;
 86        font-weight: 600 !important;
 87        cursor: pointer;
 88        margin: 0 24px 0 2px;
 89        padding-left: 2px;
 90      }
 91      .tab:hover {
 92        color: rgb(14, 83, 139) !important;
 93      }
 94      .tab.active {
 95        border-bottom: 3px solid rgb(34, 114, 180) !important;
 96      }
 97      .link {
 98        margin-left: 12px;
 99        display: inline-block;
100        text-decoration: none;
101        color: rgb(34, 114, 180) !important;
102        font-size: 13px;
103        font-weight: 400;
104      }
105      .link:hover {
106        color: rgb(14, 83, 139) !important;
107      }
108      .link-content {
109        display: flex;
110        gap: 6px;
111        align-items: center;
112      }
113      .caret-up {
114        transform: rotate(180deg);
115      }
116    </style>
117  </head>
118  <body>
119    <div style="display: flex; align-items: center">
120      The logged model is compatible with the Mosaic AI Agent Framework.
121      <button onclick="toggleCode()">
122        See how to evaluate the model&nbsp;
123        <span
124          role="img"
125          id="caret"
126          aria-hidden="true"
127          class="anticon css-6xix1i"
128          style="font-size: 14px"
129          ><svg
130            xmlns="http://www.w3.org/2000/svg"
131            width="1em"
132            height="1em"
133            fill="none"
134            viewBox="0 0 16 16"
135            aria-hidden="true"
136            focusable="false"
137            class=""
138          >
139            <path
140              fill="currentColor"
141              fill-rule="evenodd"
142              d="M8 8.917 10.947 6 12 7.042 8 11 4 7.042 5.053 6z"
143              clip-rule="evenodd"
144            ></path>
145          </svg>
146        </span>
147      </button>
148    </div>
149    <div id="code" style="display: none">
150      <h1>
151        Agent evaluation
152        <a
153          class="link"
154          href="https://docs.databricks.com/en/generative-ai/agent-evaluation/synthesize-evaluation-set.html?utm_source=mlflow.log_model&utm_medium=notebook"
155          target="_blank"
156        >
157          <span class="link-content">
158            Learn more
159            <span role="img" aria-hidden="true" class="anticon css-6xix1i"
160              ><svg
161                xmlns="http://www.w3.org/2000/svg"
162                width="1em"
163                height="1em"
164                fill="none"
165                viewBox="0 0 16 16"
166                aria-hidden="true"
167                focusable="false"
168                class=""
169              >
170                <path
171                  fill="currentColor"
172                  d="M10 1h5v5h-1.5V3.56L8.53 8.53 7.47 7.47l4.97-4.97H10z"
173                ></path>
174                <path
175                  fill="currentColor"
176                  d="M1 2.75A.75.75 0 0 1 1.75 2H8v1.5H2.5v10h10V8H14v6.25a.75.75 0 0 1-.75.75H1.75a.75.75 0 0 1-.75-.75z"
177                ></path></svg></span></span
178        ></a>
179      </h1>
180      <p class="info">
181        Copy the following code snippet in a notebook cell (right click → copy)
182      </p>
183      <div class="tabs">
184        <div class="tab active" onclick="tabClicked(0)">Using synthetic data</div>
185        <div class="tab" onclick="tabClicked(1)">Using your own dataset</div>
186      </div>
187      <div style="height: 472px">
188        <pre
189          class="active"
190        ><code class="language-python">%pip install -U databricks-agents
191  dbutils.library.restartPython()
192  ## Run the above in a separate cell ##
193  
194  from databricks.agents.evals import generate_evals_df
195  import mlflow
196  
197  agent_description = &quot;A chatbot that answers questions about Databricks.&quot;
198  question_guidelines = &quot;&quot;&quot;
199  # User personas
200  - A developer new to the Databricks platform
201  # Example questions
202  - What API lets me parallelize operations over rows of a delta table?
203  &quot;&quot;&quot;
204  # TODO: Spark/Pandas DataFrame with &quot;content&quot; and &quot;doc_uri&quot; columns.
205  docs = spark.table(&quot;catalog.schema.my_table_of_docs&quot;)
206  evals = generate_evals_df(
207      docs=docs,
208      num_evals=25,
209      agent_description=agent_description,
210      question_guidelines=question_guidelines,
211  )
212  eval_result = mlflow.evaluate(data=evals, model=&quot;runs:/1/model&quot;, model_type=&quot;databricks-agent&quot;)
213  </code></pre>
214  
215        <pre><code class="language-python">%pip install -U databricks-agents
216  dbutils.library.restartPython()
217  ## Run the above in a separate cell ##
218  
219  import pandas as pd
220  import mlflow
221  
222  evals = [
223      {
224          &quot;request&quot;: {
225              &quot;messages&quot;: [
226                  {&quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;How do I convert a Spark DataFrame to Pandas?&quot;}
227              ],
228          },
229          # Optional, needed for judging correctness.
230          &quot;expected_facts&quot;: [
231              &quot;To convert a Spark DataFrame to Pandas, you can use the toPandas() method.&quot;
232          ],
233      }
234  ]
235  eval_result = mlflow.evaluate(
236      data=pd.DataFrame.from_records(evals), model=&quot;runs:/1/model&quot;, model_type=&quot;databricks-agent&quot;
237  )
238  </code></pre>
239      </div>
240    </div>
241    <script>
242      var codeShown = false;
243      function clip(el) {
244        var range = document.createRange();
245        range.selectNodeContents(el);
246        var sel = window.getSelection();
247        sel.removeAllRanges();
248        sel.addRange(range);
249      }
250  
251      function toggleCode() {
252        if (codeShown) {
253          document.getElementById("code").style.display = "none";
254          codeShown = false;
255        } else {
256          document.getElementById("code").style.display = "block";
257          clip(document.querySelector("pre.active"));
258          codeShown = true;
259        }
260        document.getElementById("caret").classList.toggle("caret-up");
261      }
262  
263      function tabClicked(tabIndex) {
264        document.querySelectorAll(".tab").forEach((tab, index) => {
265          if (index === tabIndex) {
266            tab.classList.add("active");
267          } else {
268            tab.classList.remove("active");
269          }
270        });
271        document.querySelectorAll("pre").forEach((pre, index) => {
272          if (index === tabIndex) {
273            pre.classList.add("active");
274          } else {
275            pre.classList.remove("active");
276          }
277        });
278        clip(document.querySelector("pre.active"));
279      }
280    </script>
281  </body>