Cradicle Explorer

/ mlflow / deployments / databricks / __init__.py
__init__.py
  1  import json
  2  import posixpath
  3  import warnings
  4  from typing import Any, Iterator
  5  
  6  from mlflow.deployments import BaseDeploymentClient
  7  from mlflow.deployments.constants import (
  8      MLFLOW_DEPLOYMENT_CLIENT_REQUEST_RETRY_CODES,
  9  )
 10  from mlflow.environment_variables import (
 11      MLFLOW_DEPLOYMENT_PREDICT_TIMEOUT,
 12      MLFLOW_DEPLOYMENT_PREDICT_TOTAL_TIMEOUT,
 13      MLFLOW_HTTP_REQUEST_TIMEOUT,
 14  )
 15  from mlflow.exceptions import MlflowException
 16  from mlflow.utils import AttrDict
 17  from mlflow.utils.annotations import deprecated
 18  from mlflow.utils.databricks_utils import get_databricks_host_creds
 19  from mlflow.utils.rest_utils import (
 20      augmented_raise_for_status,
 21      http_request,
 22      validate_deployment_timeout_config,
 23  )
 24  
 25  
 26  class DatabricksEndpoint(AttrDict):
 27      """
 28      A dictionary-like object representing a Databricks serving endpoint.
 29  
 30      .. code-block:: python
 31  
 32          endpoint = DatabricksEndpoint({
 33              "name": "chat",
 34              "creator": "alice@company.com",
 35              "creation_timestamp": 0,
 36              "last_updated_timestamp": 0,
 37              "state": {...},
 38              "config": {...},
 39              "tags": [...],
 40              "id": "88fd3f75a0d24b0380ddc40484d7a31b",
 41          })
 42          assert endpoint.name == "chat"
 43      """
 44  
 45  
 46  class DatabricksDeploymentClient(BaseDeploymentClient):
 47      """
 48      Client for interacting with Databricks serving endpoints.
 49  
 50      Example:
 51  
 52      First, set up credentials for authentication:
 53  
 54      .. code-block:: bash
 55  
 56          export DATABRICKS_HOST=...
 57          export DATABRICKS_TOKEN=...
 58  
 59      .. seealso::
 60  
 61          See https://docs.databricks.com/en/dev-tools/auth.html for other authentication methods.
 62  
 63      Then, create a deployment client and use it to interact with Databricks serving endpoints:
 64  
 65      .. code-block:: python
 66  
 67          from mlflow.deployments import get_deploy_client
 68  
 69          client = get_deploy_client("databricks")
 70          endpoints = client.list_endpoints()
 71          assert endpoints == [
 72              {
 73                  "name": "chat",
 74                  "creator": "alice@company.com",
 75                  "creation_timestamp": 0,
 76                  "last_updated_timestamp": 0,
 77                  "state": {...},
 78                  "config": {...},
 79                  "tags": [...],
 80                  "id": "88fd3f75a0d24b0380ddc40484d7a31b",
 81              },
 82          ]
 83      """
 84  
 85      def create_deployment(self, name, model_uri, flavor=None, config=None, endpoint=None):
 86          """
 87          .. warning::
 88  
 89              This method is not implemented for `DatabricksDeploymentClient`.
 90          """
 91          raise NotImplementedError
 92  
 93      def update_deployment(self, name, model_uri=None, flavor=None, config=None, endpoint=None):
 94          """
 95          .. warning::
 96  
 97              This method is not implemented for `DatabricksDeploymentClient`.
 98          """
 99          raise NotImplementedError
100  
101      def delete_deployment(self, name, config=None, endpoint=None):
102          """
103          .. warning::
104  
105              This method is not implemented for `DatabricksDeploymentClient`.
106          """
107          raise NotImplementedError
108  
109      def list_deployments(self, endpoint=None):
110          """
111          .. warning::
112  
113              This method is not implemented for `DatabricksDeploymentClient`.
114          """
115          raise NotImplementedError
116  
117      def get_deployment(self, name, endpoint=None):
118          """
119          .. warning::
120  
121              This method is not implemented for `DatabricksDeploymentClient`.
122          """
123          raise NotImplementedError
124  
125      def _call_endpoint(
126          self,
127          *,
128          method: str,
129          prefix: str = "/api/2.0",
130          route: str | None = None,
131          json_body: dict[str, Any] | None = None,
132          timeout: int | None = None,
133          retry_timeout_seconds: int | None = None,
134      ):
135          """
136          Args:
137              method: HTTP method (GET, POST, etc.).
138              prefix: API prefix path.
139              route: Endpoint route.
140              json_body: Request payload.
141              timeout: Maximum time (in seconds) for a single HTTP request.
142              retry_timeout_seconds: Maximum time (in seconds) for all retry attempts combined.
143          """
144          validate_deployment_timeout_config(timeout, retry_timeout_seconds)
145  
146          call_kwargs = {}
147          if method.lower() == "get":
148              call_kwargs["params"] = json_body
149          else:
150              call_kwargs["json"] = json_body
151  
152          response = http_request(
153              host_creds=get_databricks_host_creds(self.target_uri),
154              endpoint=posixpath.join(prefix, "serving-endpoints", route or ""),
155              method=method,
156              timeout=MLFLOW_HTTP_REQUEST_TIMEOUT.get() if timeout is None else timeout,
157              retry_timeout_seconds=retry_timeout_seconds,
158              raise_on_status=False,
159              retry_codes=MLFLOW_DEPLOYMENT_CLIENT_REQUEST_RETRY_CODES,
160              extra_headers={"X-Databricks-Endpoints-API-Client": "Databricks Deployment Client"},
161              **call_kwargs,
162          )
163          augmented_raise_for_status(response)
164          return DatabricksEndpoint(response.json())
165  
166      def _call_endpoint_stream(
167          self,
168          *,
169          method: str,
170          prefix: str = "/api/2.0",
171          route: str | None = None,
172          json_body: dict[str, Any] | None = None,
173          timeout: int | None = None,
174          retry_timeout_seconds: int | None = None,
175      ) -> Iterator[str]:
176          validate_deployment_timeout_config(timeout, retry_timeout_seconds)
177  
178          call_kwargs = {}
179          if method.lower() == "get":
180              call_kwargs["params"] = json_body
181          else:
182              call_kwargs["json"] = json_body
183  
184          response = http_request(
185              host_creds=get_databricks_host_creds(self.target_uri),
186              endpoint=posixpath.join(prefix, "serving-endpoints", route or ""),
187              method=method,
188              timeout=MLFLOW_HTTP_REQUEST_TIMEOUT.get() if timeout is None else timeout,
189              retry_timeout_seconds=retry_timeout_seconds,
190              raise_on_status=False,
191              retry_codes=MLFLOW_DEPLOYMENT_CLIENT_REQUEST_RETRY_CODES,
192              extra_headers={"X-Databricks-Endpoints-API-Client": "Databricks Deployment Client"},
193              stream=True,  # Receive response content in streaming way.
194              **call_kwargs,
195          )
196          augmented_raise_for_status(response)
197  
198          # Streaming response content are composed of multiple lines.
199          # Each line format depends on specific endpoint
200          # Explicitly set the encoding to `utf-8` so the `decode_unicode` in the next line
201          # will decode correctly
202          response.encoding = "utf-8"
203          return (
204              line.strip()
205              for line in response.iter_lines(decode_unicode=True)
206              if line.strip()  # filter out keep-alive new lines
207          )
208  
209      def predict(self, deployment_name=None, inputs=None, endpoint=None):
210          """
211          Query a serving endpoint with the provided model inputs.
212          See https://docs.databricks.com/api/workspace/servingendpoints/query for request/response
213          schema.
214  
215          Args:
216              deployment_name: Unused.
217              inputs: A dictionary containing the model inputs to query.
218              endpoint: The name of the serving endpoint to query.
219  
220          Returns:
221              A :py:class:`DatabricksEndpoint` object containing the query response.
222  
223          Example:
224  
225          .. code-block:: python
226  
227              from mlflow.deployments import get_deploy_client
228  
229              client = get_deploy_client("databricks")
230              response = client.predict(
231                  endpoint="chat",
232                  inputs={
233                      "messages": [
234                          {"role": "user", "content": "Hello!"},
235                      ],
236                  },
237              )
238              assert response == {
239                  "id": "chatcmpl-8OLm5kfqBAJD8CpsMANESWKpLSLXY",
240                  "object": "chat.completion",
241                  "created": 1700814265,
242                  "model": "gpt-4-0613",
243                  "choices": [
244                      {
245                          "index": 0,
246                          "message": {
247                              "role": "assistant",
248                              "content": "Hello! How can I assist you today?",
249                          },
250                          "finish_reason": "stop",
251                      }
252                  ],
253                  "usage": {
254                      "prompt_tokens": 9,
255                      "completion_tokens": 9,
256                      "total_tokens": 18,
257                  },
258              }
259          """
260          return self._call_endpoint(
261              method="POST",
262              prefix="/",
263              route=posixpath.join(endpoint, "invocations"),
264              json_body=inputs,
265              timeout=MLFLOW_DEPLOYMENT_PREDICT_TIMEOUT.get(),
266              retry_timeout_seconds=MLFLOW_DEPLOYMENT_PREDICT_TOTAL_TIMEOUT.get(),
267          )
268  
269      def predict_stream(
270          self, deployment_name=None, inputs=None, endpoint=None
271      ) -> Iterator[dict[str, Any]]:
272          """
273          Submit a query to a configured provider endpoint, and get streaming response
274  
275          Args:
276              deployment_name: Unused.
277              inputs: The inputs to the query, as a dictionary.
278              endpoint: The name of the endpoint to query.
279  
280          Returns:
281              An iterator of dictionary containing the response from the endpoint.
282  
283          Example:
284  
285          .. code-block:: python
286  
287              from mlflow.deployments import get_deploy_client
288  
289              client = get_deploy_client("databricks")
290              chunk_iter = client.predict_stream(
291                  endpoint="databricks-llama-2-70b-chat",
292                  inputs={
293                      "messages": [{"role": "user", "content": "Hello!"}],
294                      "temperature": 0.0,
295                      "n": 1,
296                      "max_tokens": 500,
297                  },
298              )
299              for chunk in chunk_iter:
300                  print(chunk)
301                  # Example:
302                  # {
303                  #     "id": "82a834f5-089d-4fc0-ad6c-db5c7d6a6129",
304                  #     "object": "chat.completion.chunk",
305                  #     "created": 1712133837,
306                  #     "model": "llama-2-70b-chat-030424",
307                  #     "choices": [
308                  #         {
309                  #             "index": 0, "delta": {"role": "assistant", "content": "Hello"},
310                  #             "finish_reason": None,
311                  #         }
312                  #     ],
313                  #     "usage": {"prompt_tokens": 11, "completion_tokens": 1, "total_tokens": 12},
314                  # }
315          """
316          inputs = inputs or {}
317  
318          # Add stream=True param in request body to get streaming response
319          # See https://docs.databricks.com/api/workspace/servingendpoints/query#stream
320          chunk_line_iter = self._call_endpoint_stream(
321              method="POST",
322              prefix="/",
323              route=posixpath.join(endpoint, "invocations"),
324              json_body={**inputs, "stream": True},
325              timeout=MLFLOW_DEPLOYMENT_PREDICT_TIMEOUT.get(),
326              retry_timeout_seconds=MLFLOW_DEPLOYMENT_PREDICT_TOTAL_TIMEOUT.get(),
327          )
328  
329          for line in chunk_line_iter:
330              splits = line.split(":", 1)
331              if len(splits) < 2:
332                  raise MlflowException(
333                      f"Unknown response format: '{line}', "
334                      "expected 'data: <value>' for streaming response."
335                  )
336              key, value = splits
337              if key != "data":
338                  raise MlflowException(
339                      f"Unknown response format with key '{key}'. "
340                      f"Expected 'data: <value>' for streaming response, got '{line}'."
341                  )
342  
343              value = value.strip()
344              if value == "[DONE]":
345                  # Databricks endpoint streaming response ends with
346                  # a line of "data: [DONE]"
347                  return
348  
349              yield json.loads(value)
350  
351      def create_endpoint(self, name=None, config=None, route_optimized=False):
352          """
353          Create a new serving endpoint with the provided name and configuration.
354  
355          See https://docs.databricks.com/api/workspace/servingendpoints/create for request/response
356          schema.
357  
358          Args:
359              name: The name of the serving endpoint to create.
360  
361                  .. warning::
362                      Deprecated. Include `name` in `config` instead.
363  
364              config: A dictionary containing either the full API request payload
365                  or the configuration of the serving endpoint to create.
366              route_optimized: A boolean which defines whether databricks serving endpoint
367                  is optimized for routing traffic. Only used in the deprecated approach.
368  
369                  .. warning::
370                      Deprecated. Include `route_optimized` in `config` instead.
371  
372          Returns:
373              A :py:class:`DatabricksEndpoint` object containing the request response.
374  
375          Example:
376  
377          .. code-block:: python
378  
379              from mlflow.deployments import get_deploy_client
380  
381              client = get_deploy_client("databricks")
382              endpoint = client.create_endpoint(
383                  config={
384                      "name": "test",
385                      "config": {
386                          "served_entities": [
387                              {
388                                  "external_model": {
389                                      "name": "gpt-4",
390                                      "provider": "openai",
391                                      "task": "llm/v1/chat",
392                                      "openai_config": {
393                                          "openai_api_key": "{{secrets/scope/key}}",
394                                      },
395                                  },
396                              }
397                          ],
398                          "route_optimized": True,
399                      },
400                  },
401              )
402              assert endpoint == {
403                  "name": "test",
404                  "creator": "alice@company.com",
405                  "creation_timestamp": 0,
406                  "last_updated_timestamp": 0,
407                  "state": {...},
408                  "config": {...},
409                  "tags": [...],
410                  "id": "88fd3f75a0d24b0380ddc40484d7a31b",
411                  "permission_level": "CAN_MANAGE",
412                  "route_optimized": False,
413                  "task": "llm/v1/chat",
414                  "endpoint_type": "EXTERNAL_MODEL",
415                  "creator_display_name": "Alice",
416                  "creator_kind": "User",
417              }
418  
419          """
420          warnings_list = []
421  
422          if config and "config" in config:
423              # Using new style: full API request payload
424              payload = config.copy()
425  
426              # Validate name conflicts
427              if "name" in payload:
428                  if name is not None:
429                      if payload["name"] == name:
430                          warnings_list.append(
431                              "Passing 'name' as a parameter is deprecated. "
432                              "Please specify 'name' only within the config dictionary."
433                          )
434                      else:
435                          raise MlflowException(
436                              f"Name mismatch. Found '{name}' as parameter and '{payload['name']}' "
437                              "in config. Please specify 'name' only within the config dictionary "
438                              "as this parameter is deprecated."
439                          )
440              else:
441                  if name is None:
442                      raise MlflowException(
443                          "The 'name' field is required. Please specify it within the config "
444                          "dictionary."
445                      )
446                  payload["name"] = name
447                  warnings_list.append(
448                      "Passing 'name' as a parameter is deprecated. "
449                      "Please specify 'name' within the config dictionary."
450                  )
451  
452              # Validate route_optimized conflicts
453              if "route_optimized" in payload:
454                  if route_optimized is not None:
455                      if payload["route_optimized"] != route_optimized:
456                          raise MlflowException(
457                              "Conflicting 'route_optimized' values found. "
458                              "Please specify 'route_optimized' only within the config dictionary "
459                              "as this parameter is deprecated."
460                          )
461                      warnings_list.append(
462                          "Passing 'route_optimized' as a parameter is deprecated. "
463                          "Please specify 'route_optimized' only within the config dictionary."
464                      )
465              else:
466                  if route_optimized:
467                      payload["route_optimized"] = route_optimized
468                      warnings_list.append(
469                          "Passing 'route_optimized' as a parameter is deprecated. "
470                          "Please specify 'route_optimized' within the config dictionary."
471                      )
472          else:
473              # Handle legacy format (backwards compatibility)
474              warnings_list.append(
475                  "Passing 'name', 'config', and 'route_optimized' as separate parameters is "
476                  "deprecated. Please pass the full API request payload as a single dictionary "
477                  "in the 'config' parameter."
478              )
479              config = config.copy() if config else {}  # avoid mutating config
480              extras = {}
481              for key in ("tags", "rate_limits"):
482                  if tags := config.pop(key, None):
483                      extras[key] = tags
484              payload = {"name": name, "config": config, "route_optimized": route_optimized, **extras}
485  
486          if warnings_list:
487              warnings.warn("\n".join(warnings_list), UserWarning)
488  
489          return self._call_endpoint(method="POST", json_body=payload)
490  
491      @deprecated(
492          alternative=(
493              "update_endpoint_config, update_endpoint_tags, update_endpoint_rate_limits, "
494              "or update_endpoint_ai_gateway"
495          )
496      )
497      def update_endpoint(self, endpoint, config=None):
498          """
499          Update a specified serving endpoint with the provided configuration.
500          See https://docs.databricks.com/api/workspace/servingendpoints/updateconfig for
501          request/response schema.
502  
503          Args:
504              endpoint: The name of the serving endpoint to update.
505              config: A dictionary containing the configuration of the serving endpoint to update.
506  
507          Returns:
508              A :py:class:`DatabricksEndpoint` object containing the request response.
509  
510          Example:
511  
512          .. code-block:: python
513  
514              from mlflow.deployments import get_deploy_client
515  
516              client = get_deploy_client("databricks")
517              endpoint = client.update_endpoint(
518                  endpoint="chat",
519                  config={
520                      "served_entities": [
521                          {
522                              "name": "test",
523                              "external_model": {
524                                  "name": "gpt-4",
525                                  "provider": "openai",
526                                  "task": "llm/v1/chat",
527                                  "openai_config": {
528                                      "openai_api_key": "{{secrets/scope/key}}",
529                                  },
530                              },
531                          }
532                      ],
533                  },
534              )
535              assert endpoint == {
536                  "name": "chat",
537                  "creator": "alice@company.com",
538                  "creation_timestamp": 0,
539                  "last_updated_timestamp": 0,
540                  "state": {...},
541                  "config": {...},
542                  "tags": [...],
543                  "id": "88fd3f75a0d24b0380ddc40484d7a31b",
544              }
545  
546              rate_limits = client.update_endpoint(
547                  endpoint="chat",
548                  config={
549                      "rate_limits": [
550                          {
551                              "key": "user",
552                              "renewal_period": "minute",
553                              "calls": 10,
554                          }
555                      ],
556                  },
557              )
558              assert rate_limits == {
559                  "rate_limits": [
560                      {
561                          "key": "user",
562                          "renewal_period": "minute",
563                          "calls": 10,
564                      }
565                  ],
566              }
567          """
568          warnings.warn(
569              "The `update_endpoint` method is deprecated. Use the specific update methods—"
570              "`update_endpoint_config`, `update_endpoint_tags`, `update_endpoint_rate_limits`, "
571              "`update_endpoint_ai_gateway`—instead.",
572              UserWarning,
573          )
574  
575          if list(config) == ["rate_limits"]:
576              return self._call_endpoint(
577                  method="PUT", route=posixpath.join(endpoint, "rate-limits"), json_body=config
578              )
579          else:
580              return self._call_endpoint(
581                  method="PUT", route=posixpath.join(endpoint, "config"), json_body=config
582              )
583  
584      def update_endpoint_config(self, endpoint, config):
585          """
586          Update the configuration of a specified serving endpoint. See
587          https://docs.databricks.com/api/workspace/servingendpoints/updateconfig for request/response
588          request/response schema.
589  
590          Args:
591              endpoint: The name of the serving endpoint to update.
592              config: A dictionary containing the configuration of the serving endpoint to update.
593  
594          Returns:
595              A :py:class:`DatabricksEndpoint` object containing the request response.
596  
597          Example:
598  
599          .. code-block:: python
600  
601              from mlflow.deployments import get_deploy_client
602  
603              client = get_deploy_client("databricks")
604              updated_endpoint = client.update_endpoint_config(
605                  endpoint="test",
606                  config={
607                      "served_entities": [
608                          {
609                              "name": "gpt-4o-mini",
610                              "external_model": {
611                                  "name": "gpt-4o-mini",
612                                  "provider": "openai",
613                                  "task": "llm/v1/chat",
614                                  "openai_config": {
615                                      "openai_api_key": "{{secrets/scope/key}}",
616                                  },
617                              },
618                          }
619                      ]
620                  },
621              )
622              assert updated_endpoint == {
623                  "name": "test",
624                  "creator": "alice@company.com",
625                  "creation_timestamp": 1729527763000,
626                  "last_updated_timestamp": 1729530896000,
627                  "state": {"ready": "READY", "config_update": "NOT_UPDATING"},
628                  "config": {...},
629                  "id": "44b258fb39804564b37603d8d14b853e",
630                  "permission_level": "CAN_MANAGE",
631                  "route_optimized": False,
632                  "task": "llm/v1/chat",
633                  "endpoint_type": "EXTERNAL_MODEL",
634                  "creator_display_name": "Alice",
635                  "creator_kind": "User",
636              }
637          """
638  
639          return self._call_endpoint(
640              method="PUT", route=posixpath.join(endpoint, "config"), json_body=config
641          )
642  
643      def update_endpoint_tags(self, endpoint, config):
644          """
645          Update the tags of a specified serving endpoint. See
646          https://docs.databricks.com/api/workspace/servingendpoints/patch for request/response
647          schema.
648  
649          Args:
650              endpoint: The name of the serving endpoint to update.
651              config: A dictionary containing tags to add and/or remove.
652  
653          Returns:
654              A :py:class:`DatabricksEndpoint` object containing the request response.
655  
656          Example:
657  
658          .. code-block:: python
659  
660              from mlflow.deployments import get_deploy_client
661  
662              client = get_deploy_client("databricks")
663              updated_tags = client.update_endpoint_tags(
664                  endpoint="test", config={"add_tags": [{"key": "project", "value": "test"}]}
665              )
666              assert updated_tags == {"tags": [{"key": "project", "value": "test"}]}
667          """
668          return self._call_endpoint(
669              method="PATCH", route=posixpath.join(endpoint, "tags"), json_body=config
670          )
671  
672      def update_endpoint_rate_limits(self, endpoint, config):
673          """
674          Update the rate limits of a specified serving endpoint.
675          See https://docs.databricks.com/api/workspace/servingendpoints/put for request/response
676          schema.
677  
678          Args:
679              endpoint: The name of the serving endpoint to update.
680              config: A dictionary containing the updated rate limit configuration.
681  
682          Returns:
683              A :py:class:`DatabricksEndpoint` object containing the updated rate limits.
684  
685          Example:
686  
687          .. code-block:: python
688  
689              from mlflow.deployments import get_deploy_client
690  
691              client = get_deploy_client("databricks")
692              name = "databricks-dbrx-instruct"
693              rate_limits = {
694                  "rate_limits": [{"calls": 10, "key": "endpoint", "renewal_period": "minute"}]
695              }
696              updated_rate_limits = client.update_endpoint_rate_limits(
697                  endpoint=name, config=rate_limits
698              )
699              assert updated_rate_limits == {
700                  "rate_limits": [{"calls": 10, "key": "endpoint", "renewal_period": "minute"}]
701              }
702          """
703          return self._call_endpoint(
704              method="PUT", route=posixpath.join(endpoint, "rate-limits"), json_body=config
705          )
706  
707      def update_endpoint_ai_gateway(self, endpoint, config):
708          """
709          Update the AI Gateway configuration of a specified serving endpoint.
710  
711          Args:
712              endpoint (str): The name of the serving endpoint to update.
713              config (dict): A dictionary containing the AI Gateway configuration to update.
714  
715          Returns:
716              dict: A dictionary containing the updated AI Gateway configuration.
717  
718          Example:
719  
720          .. code-block:: python
721  
722              from mlflow.deployments import get_deploy_client
723  
724              client = get_deploy_client("databricks")
725              name = "test"
726  
727              gateway_config = {
728                  "usage_tracking_config": {"enabled": True},
729                  "inference_table_config": {
730                      "enabled": True,
731                      "catalog_name": "my_catalog",
732                      "schema_name": "my_schema",
733                  },
734              }
735  
736              updated_gateway = client.update_endpoint_ai_gateway(
737                  endpoint=name, config=gateway_config
738              )
739              assert updated_gateway == {
740                  "usage_tracking_config": {"enabled": True},
741                  "inference_table_config": {
742                      "catalog_name": "my_catalog",
743                      "schema_name": "my_schema",
744                      "table_name_prefix": "test",
745                      "enabled": True,
746                  },
747              }
748          """
749          return self._call_endpoint(
750              method="PUT", route=posixpath.join(endpoint, "ai-gateway"), json_body=config
751          )
752  
753      def delete_endpoint(self, endpoint):
754          """
755          Delete a specified serving endpoint.
756          See https://docs.databricks.com/api/workspace/servingendpoints/delete for request/response
757          schema.
758  
759          Args:
760              endpoint: The name of the serving endpoint to delete.
761  
762          Returns:
763              A DatabricksEndpoint object containing the request response.
764  
765          Example:
766  
767          .. code-block:: python
768  
769              from mlflow.deployments import get_deploy_client
770  
771              client = get_deploy_client("databricks")
772              client.delete_endpoint(endpoint="chat")
773          """
774          return self._call_endpoint(method="DELETE", route=endpoint)
775  
776      def list_endpoints(self):
777          """
778          Retrieve all serving endpoints.
779  
780          See https://docs.databricks.com/api/workspace/servingendpoints/list for request/response
781          schema.
782  
783          Returns:
784              A list of :py:class:`DatabricksEndpoint` objects containing the request response.
785  
786          Example:
787  
788          .. code-block:: python
789  
790              from mlflow.deployments import get_deploy_client
791  
792              client = get_deploy_client("databricks")
793              endpoints = client.list_endpoints()
794              assert endpoints == [
795                  {
796                      "name": "chat",
797                      "creator": "alice@company.com",
798                      "creation_timestamp": 0,
799                      "last_updated_timestamp": 0,
800                      "state": {...},
801                      "config": {...},
802                      "tags": [...],
803                      "id": "88fd3f75a0d24b0380ddc40484d7a31b",
804                  },
805              ]
806  
807          """
808          return self._call_endpoint(method="GET").endpoints
809  
810      def get_endpoint(self, endpoint):
811          """
812          Get a specified serving endpoint.
813          See https://docs.databricks.com/api/workspace/servingendpoints/get for request/response
814          schema.
815  
816          Args:
817              endpoint: The name of the serving endpoint to get.
818  
819          Returns:
820              A DatabricksEndpoint object containing the request response.
821  
822          Example:
823  
824          .. code-block:: python
825  
826              from mlflow.deployments import get_deploy_client
827  
828              client = get_deploy_client("databricks")
829              endpoint = client.get_endpoint(endpoint="chat")
830              assert endpoint == {
831                  "name": "chat",
832                  "creator": "alice@company.com",
833                  "creation_timestamp": 0,
834                  "last_updated_timestamp": 0,
835                  "state": {...},
836                  "config": {...},
837                  "tags": [...],
838                  "id": "88fd3f75a0d24b0380ddc40484d7a31b",
839              }
840          """
841          return self._call_endpoint(method="GET", route=endpoint)
842  
843  
844  def run_local(name, model_uri, flavor=None, config=None):
845      pass
846  
847  
848  def target_help():
849      pass