__init__.py
1 import json 2 import posixpath 3 import warnings 4 from typing import Any, Iterator 5 6 from mlflow.deployments import BaseDeploymentClient 7 from mlflow.deployments.constants import ( 8 MLFLOW_DEPLOYMENT_CLIENT_REQUEST_RETRY_CODES, 9 ) 10 from mlflow.environment_variables import ( 11 MLFLOW_DEPLOYMENT_PREDICT_TIMEOUT, 12 MLFLOW_DEPLOYMENT_PREDICT_TOTAL_TIMEOUT, 13 MLFLOW_HTTP_REQUEST_TIMEOUT, 14 ) 15 from mlflow.exceptions import MlflowException 16 from mlflow.utils import AttrDict 17 from mlflow.utils.annotations import deprecated 18 from mlflow.utils.databricks_utils import get_databricks_host_creds 19 from mlflow.utils.rest_utils import ( 20 augmented_raise_for_status, 21 http_request, 22 validate_deployment_timeout_config, 23 ) 24 25 26 class DatabricksEndpoint(AttrDict): 27 """ 28 A dictionary-like object representing a Databricks serving endpoint. 29 30 .. code-block:: python 31 32 endpoint = DatabricksEndpoint({ 33 "name": "chat", 34 "creator": "alice@company.com", 35 "creation_timestamp": 0, 36 "last_updated_timestamp": 0, 37 "state": {...}, 38 "config": {...}, 39 "tags": [...], 40 "id": "88fd3f75a0d24b0380ddc40484d7a31b", 41 }) 42 assert endpoint.name == "chat" 43 """ 44 45 46 class DatabricksDeploymentClient(BaseDeploymentClient): 47 """ 48 Client for interacting with Databricks serving endpoints. 49 50 Example: 51 52 First, set up credentials for authentication: 53 54 .. code-block:: bash 55 56 export DATABRICKS_HOST=... 57 export DATABRICKS_TOKEN=... 58 59 .. seealso:: 60 61 See https://docs.databricks.com/en/dev-tools/auth.html for other authentication methods. 62 63 Then, create a deployment client and use it to interact with Databricks serving endpoints: 64 65 .. code-block:: python 66 67 from mlflow.deployments import get_deploy_client 68 69 client = get_deploy_client("databricks") 70 endpoints = client.list_endpoints() 71 assert endpoints == [ 72 { 73 "name": "chat", 74 "creator": "alice@company.com", 75 "creation_timestamp": 0, 76 "last_updated_timestamp": 0, 77 "state": {...}, 78 "config": {...}, 79 "tags": [...], 80 "id": "88fd3f75a0d24b0380ddc40484d7a31b", 81 }, 82 ] 83 """ 84 85 def create_deployment(self, name, model_uri, flavor=None, config=None, endpoint=None): 86 """ 87 .. warning:: 88 89 This method is not implemented for `DatabricksDeploymentClient`. 90 """ 91 raise NotImplementedError 92 93 def update_deployment(self, name, model_uri=None, flavor=None, config=None, endpoint=None): 94 """ 95 .. warning:: 96 97 This method is not implemented for `DatabricksDeploymentClient`. 98 """ 99 raise NotImplementedError 100 101 def delete_deployment(self, name, config=None, endpoint=None): 102 """ 103 .. warning:: 104 105 This method is not implemented for `DatabricksDeploymentClient`. 106 """ 107 raise NotImplementedError 108 109 def list_deployments(self, endpoint=None): 110 """ 111 .. warning:: 112 113 This method is not implemented for `DatabricksDeploymentClient`. 114 """ 115 raise NotImplementedError 116 117 def get_deployment(self, name, endpoint=None): 118 """ 119 .. warning:: 120 121 This method is not implemented for `DatabricksDeploymentClient`. 122 """ 123 raise NotImplementedError 124 125 def _call_endpoint( 126 self, 127 *, 128 method: str, 129 prefix: str = "/api/2.0", 130 route: str | None = None, 131 json_body: dict[str, Any] | None = None, 132 timeout: int | None = None, 133 retry_timeout_seconds: int | None = None, 134 ): 135 """ 136 Args: 137 method: HTTP method (GET, POST, etc.). 138 prefix: API prefix path. 139 route: Endpoint route. 140 json_body: Request payload. 141 timeout: Maximum time (in seconds) for a single HTTP request. 142 retry_timeout_seconds: Maximum time (in seconds) for all retry attempts combined. 143 """ 144 validate_deployment_timeout_config(timeout, retry_timeout_seconds) 145 146 call_kwargs = {} 147 if method.lower() == "get": 148 call_kwargs["params"] = json_body 149 else: 150 call_kwargs["json"] = json_body 151 152 response = http_request( 153 host_creds=get_databricks_host_creds(self.target_uri), 154 endpoint=posixpath.join(prefix, "serving-endpoints", route or ""), 155 method=method, 156 timeout=MLFLOW_HTTP_REQUEST_TIMEOUT.get() if timeout is None else timeout, 157 retry_timeout_seconds=retry_timeout_seconds, 158 raise_on_status=False, 159 retry_codes=MLFLOW_DEPLOYMENT_CLIENT_REQUEST_RETRY_CODES, 160 extra_headers={"X-Databricks-Endpoints-API-Client": "Databricks Deployment Client"}, 161 **call_kwargs, 162 ) 163 augmented_raise_for_status(response) 164 return DatabricksEndpoint(response.json()) 165 166 def _call_endpoint_stream( 167 self, 168 *, 169 method: str, 170 prefix: str = "/api/2.0", 171 route: str | None = None, 172 json_body: dict[str, Any] | None = None, 173 timeout: int | None = None, 174 retry_timeout_seconds: int | None = None, 175 ) -> Iterator[str]: 176 validate_deployment_timeout_config(timeout, retry_timeout_seconds) 177 178 call_kwargs = {} 179 if method.lower() == "get": 180 call_kwargs["params"] = json_body 181 else: 182 call_kwargs["json"] = json_body 183 184 response = http_request( 185 host_creds=get_databricks_host_creds(self.target_uri), 186 endpoint=posixpath.join(prefix, "serving-endpoints", route or ""), 187 method=method, 188 timeout=MLFLOW_HTTP_REQUEST_TIMEOUT.get() if timeout is None else timeout, 189 retry_timeout_seconds=retry_timeout_seconds, 190 raise_on_status=False, 191 retry_codes=MLFLOW_DEPLOYMENT_CLIENT_REQUEST_RETRY_CODES, 192 extra_headers={"X-Databricks-Endpoints-API-Client": "Databricks Deployment Client"}, 193 stream=True, # Receive response content in streaming way. 194 **call_kwargs, 195 ) 196 augmented_raise_for_status(response) 197 198 # Streaming response content are composed of multiple lines. 199 # Each line format depends on specific endpoint 200 # Explicitly set the encoding to `utf-8` so the `decode_unicode` in the next line 201 # will decode correctly 202 response.encoding = "utf-8" 203 return ( 204 line.strip() 205 for line in response.iter_lines(decode_unicode=True) 206 if line.strip() # filter out keep-alive new lines 207 ) 208 209 def predict(self, deployment_name=None, inputs=None, endpoint=None): 210 """ 211 Query a serving endpoint with the provided model inputs. 212 See https://docs.databricks.com/api/workspace/servingendpoints/query for request/response 213 schema. 214 215 Args: 216 deployment_name: Unused. 217 inputs: A dictionary containing the model inputs to query. 218 endpoint: The name of the serving endpoint to query. 219 220 Returns: 221 A :py:class:`DatabricksEndpoint` object containing the query response. 222 223 Example: 224 225 .. code-block:: python 226 227 from mlflow.deployments import get_deploy_client 228 229 client = get_deploy_client("databricks") 230 response = client.predict( 231 endpoint="chat", 232 inputs={ 233 "messages": [ 234 {"role": "user", "content": "Hello!"}, 235 ], 236 }, 237 ) 238 assert response == { 239 "id": "chatcmpl-8OLm5kfqBAJD8CpsMANESWKpLSLXY", 240 "object": "chat.completion", 241 "created": 1700814265, 242 "model": "gpt-4-0613", 243 "choices": [ 244 { 245 "index": 0, 246 "message": { 247 "role": "assistant", 248 "content": "Hello! How can I assist you today?", 249 }, 250 "finish_reason": "stop", 251 } 252 ], 253 "usage": { 254 "prompt_tokens": 9, 255 "completion_tokens": 9, 256 "total_tokens": 18, 257 }, 258 } 259 """ 260 return self._call_endpoint( 261 method="POST", 262 prefix="/", 263 route=posixpath.join(endpoint, "invocations"), 264 json_body=inputs, 265 timeout=MLFLOW_DEPLOYMENT_PREDICT_TIMEOUT.get(), 266 retry_timeout_seconds=MLFLOW_DEPLOYMENT_PREDICT_TOTAL_TIMEOUT.get(), 267 ) 268 269 def predict_stream( 270 self, deployment_name=None, inputs=None, endpoint=None 271 ) -> Iterator[dict[str, Any]]: 272 """ 273 Submit a query to a configured provider endpoint, and get streaming response 274 275 Args: 276 deployment_name: Unused. 277 inputs: The inputs to the query, as a dictionary. 278 endpoint: The name of the endpoint to query. 279 280 Returns: 281 An iterator of dictionary containing the response from the endpoint. 282 283 Example: 284 285 .. code-block:: python 286 287 from mlflow.deployments import get_deploy_client 288 289 client = get_deploy_client("databricks") 290 chunk_iter = client.predict_stream( 291 endpoint="databricks-llama-2-70b-chat", 292 inputs={ 293 "messages": [{"role": "user", "content": "Hello!"}], 294 "temperature": 0.0, 295 "n": 1, 296 "max_tokens": 500, 297 }, 298 ) 299 for chunk in chunk_iter: 300 print(chunk) 301 # Example: 302 # { 303 # "id": "82a834f5-089d-4fc0-ad6c-db5c7d6a6129", 304 # "object": "chat.completion.chunk", 305 # "created": 1712133837, 306 # "model": "llama-2-70b-chat-030424", 307 # "choices": [ 308 # { 309 # "index": 0, "delta": {"role": "assistant", "content": "Hello"}, 310 # "finish_reason": None, 311 # } 312 # ], 313 # "usage": {"prompt_tokens": 11, "completion_tokens": 1, "total_tokens": 12}, 314 # } 315 """ 316 inputs = inputs or {} 317 318 # Add stream=True param in request body to get streaming response 319 # See https://docs.databricks.com/api/workspace/servingendpoints/query#stream 320 chunk_line_iter = self._call_endpoint_stream( 321 method="POST", 322 prefix="/", 323 route=posixpath.join(endpoint, "invocations"), 324 json_body={**inputs, "stream": True}, 325 timeout=MLFLOW_DEPLOYMENT_PREDICT_TIMEOUT.get(), 326 retry_timeout_seconds=MLFLOW_DEPLOYMENT_PREDICT_TOTAL_TIMEOUT.get(), 327 ) 328 329 for line in chunk_line_iter: 330 splits = line.split(":", 1) 331 if len(splits) < 2: 332 raise MlflowException( 333 f"Unknown response format: '{line}', " 334 "expected 'data: <value>' for streaming response." 335 ) 336 key, value = splits 337 if key != "data": 338 raise MlflowException( 339 f"Unknown response format with key '{key}'. " 340 f"Expected 'data: <value>' for streaming response, got '{line}'." 341 ) 342 343 value = value.strip() 344 if value == "[DONE]": 345 # Databricks endpoint streaming response ends with 346 # a line of "data: [DONE]" 347 return 348 349 yield json.loads(value) 350 351 def create_endpoint(self, name=None, config=None, route_optimized=False): 352 """ 353 Create a new serving endpoint with the provided name and configuration. 354 355 See https://docs.databricks.com/api/workspace/servingendpoints/create for request/response 356 schema. 357 358 Args: 359 name: The name of the serving endpoint to create. 360 361 .. warning:: 362 Deprecated. Include `name` in `config` instead. 363 364 config: A dictionary containing either the full API request payload 365 or the configuration of the serving endpoint to create. 366 route_optimized: A boolean which defines whether databricks serving endpoint 367 is optimized for routing traffic. Only used in the deprecated approach. 368 369 .. warning:: 370 Deprecated. Include `route_optimized` in `config` instead. 371 372 Returns: 373 A :py:class:`DatabricksEndpoint` object containing the request response. 374 375 Example: 376 377 .. code-block:: python 378 379 from mlflow.deployments import get_deploy_client 380 381 client = get_deploy_client("databricks") 382 endpoint = client.create_endpoint( 383 config={ 384 "name": "test", 385 "config": { 386 "served_entities": [ 387 { 388 "external_model": { 389 "name": "gpt-4", 390 "provider": "openai", 391 "task": "llm/v1/chat", 392 "openai_config": { 393 "openai_api_key": "{{secrets/scope/key}}", 394 }, 395 }, 396 } 397 ], 398 "route_optimized": True, 399 }, 400 }, 401 ) 402 assert endpoint == { 403 "name": "test", 404 "creator": "alice@company.com", 405 "creation_timestamp": 0, 406 "last_updated_timestamp": 0, 407 "state": {...}, 408 "config": {...}, 409 "tags": [...], 410 "id": "88fd3f75a0d24b0380ddc40484d7a31b", 411 "permission_level": "CAN_MANAGE", 412 "route_optimized": False, 413 "task": "llm/v1/chat", 414 "endpoint_type": "EXTERNAL_MODEL", 415 "creator_display_name": "Alice", 416 "creator_kind": "User", 417 } 418 419 """ 420 warnings_list = [] 421 422 if config and "config" in config: 423 # Using new style: full API request payload 424 payload = config.copy() 425 426 # Validate name conflicts 427 if "name" in payload: 428 if name is not None: 429 if payload["name"] == name: 430 warnings_list.append( 431 "Passing 'name' as a parameter is deprecated. " 432 "Please specify 'name' only within the config dictionary." 433 ) 434 else: 435 raise MlflowException( 436 f"Name mismatch. Found '{name}' as parameter and '{payload['name']}' " 437 "in config. Please specify 'name' only within the config dictionary " 438 "as this parameter is deprecated." 439 ) 440 else: 441 if name is None: 442 raise MlflowException( 443 "The 'name' field is required. Please specify it within the config " 444 "dictionary." 445 ) 446 payload["name"] = name 447 warnings_list.append( 448 "Passing 'name' as a parameter is deprecated. " 449 "Please specify 'name' within the config dictionary." 450 ) 451 452 # Validate route_optimized conflicts 453 if "route_optimized" in payload: 454 if route_optimized is not None: 455 if payload["route_optimized"] != route_optimized: 456 raise MlflowException( 457 "Conflicting 'route_optimized' values found. " 458 "Please specify 'route_optimized' only within the config dictionary " 459 "as this parameter is deprecated." 460 ) 461 warnings_list.append( 462 "Passing 'route_optimized' as a parameter is deprecated. " 463 "Please specify 'route_optimized' only within the config dictionary." 464 ) 465 else: 466 if route_optimized: 467 payload["route_optimized"] = route_optimized 468 warnings_list.append( 469 "Passing 'route_optimized' as a parameter is deprecated. " 470 "Please specify 'route_optimized' within the config dictionary." 471 ) 472 else: 473 # Handle legacy format (backwards compatibility) 474 warnings_list.append( 475 "Passing 'name', 'config', and 'route_optimized' as separate parameters is " 476 "deprecated. Please pass the full API request payload as a single dictionary " 477 "in the 'config' parameter." 478 ) 479 config = config.copy() if config else {} # avoid mutating config 480 extras = {} 481 for key in ("tags", "rate_limits"): 482 if tags := config.pop(key, None): 483 extras[key] = tags 484 payload = {"name": name, "config": config, "route_optimized": route_optimized, **extras} 485 486 if warnings_list: 487 warnings.warn("\n".join(warnings_list), UserWarning) 488 489 return self._call_endpoint(method="POST", json_body=payload) 490 491 @deprecated( 492 alternative=( 493 "update_endpoint_config, update_endpoint_tags, update_endpoint_rate_limits, " 494 "or update_endpoint_ai_gateway" 495 ) 496 ) 497 def update_endpoint(self, endpoint, config=None): 498 """ 499 Update a specified serving endpoint with the provided configuration. 500 See https://docs.databricks.com/api/workspace/servingendpoints/updateconfig for 501 request/response schema. 502 503 Args: 504 endpoint: The name of the serving endpoint to update. 505 config: A dictionary containing the configuration of the serving endpoint to update. 506 507 Returns: 508 A :py:class:`DatabricksEndpoint` object containing the request response. 509 510 Example: 511 512 .. code-block:: python 513 514 from mlflow.deployments import get_deploy_client 515 516 client = get_deploy_client("databricks") 517 endpoint = client.update_endpoint( 518 endpoint="chat", 519 config={ 520 "served_entities": [ 521 { 522 "name": "test", 523 "external_model": { 524 "name": "gpt-4", 525 "provider": "openai", 526 "task": "llm/v1/chat", 527 "openai_config": { 528 "openai_api_key": "{{secrets/scope/key}}", 529 }, 530 }, 531 } 532 ], 533 }, 534 ) 535 assert endpoint == { 536 "name": "chat", 537 "creator": "alice@company.com", 538 "creation_timestamp": 0, 539 "last_updated_timestamp": 0, 540 "state": {...}, 541 "config": {...}, 542 "tags": [...], 543 "id": "88fd3f75a0d24b0380ddc40484d7a31b", 544 } 545 546 rate_limits = client.update_endpoint( 547 endpoint="chat", 548 config={ 549 "rate_limits": [ 550 { 551 "key": "user", 552 "renewal_period": "minute", 553 "calls": 10, 554 } 555 ], 556 }, 557 ) 558 assert rate_limits == { 559 "rate_limits": [ 560 { 561 "key": "user", 562 "renewal_period": "minute", 563 "calls": 10, 564 } 565 ], 566 } 567 """ 568 warnings.warn( 569 "The `update_endpoint` method is deprecated. Use the specific update methods—" 570 "`update_endpoint_config`, `update_endpoint_tags`, `update_endpoint_rate_limits`, " 571 "`update_endpoint_ai_gateway`—instead.", 572 UserWarning, 573 ) 574 575 if list(config) == ["rate_limits"]: 576 return self._call_endpoint( 577 method="PUT", route=posixpath.join(endpoint, "rate-limits"), json_body=config 578 ) 579 else: 580 return self._call_endpoint( 581 method="PUT", route=posixpath.join(endpoint, "config"), json_body=config 582 ) 583 584 def update_endpoint_config(self, endpoint, config): 585 """ 586 Update the configuration of a specified serving endpoint. See 587 https://docs.databricks.com/api/workspace/servingendpoints/updateconfig for request/response 588 request/response schema. 589 590 Args: 591 endpoint: The name of the serving endpoint to update. 592 config: A dictionary containing the configuration of the serving endpoint to update. 593 594 Returns: 595 A :py:class:`DatabricksEndpoint` object containing the request response. 596 597 Example: 598 599 .. code-block:: python 600 601 from mlflow.deployments import get_deploy_client 602 603 client = get_deploy_client("databricks") 604 updated_endpoint = client.update_endpoint_config( 605 endpoint="test", 606 config={ 607 "served_entities": [ 608 { 609 "name": "gpt-4o-mini", 610 "external_model": { 611 "name": "gpt-4o-mini", 612 "provider": "openai", 613 "task": "llm/v1/chat", 614 "openai_config": { 615 "openai_api_key": "{{secrets/scope/key}}", 616 }, 617 }, 618 } 619 ] 620 }, 621 ) 622 assert updated_endpoint == { 623 "name": "test", 624 "creator": "alice@company.com", 625 "creation_timestamp": 1729527763000, 626 "last_updated_timestamp": 1729530896000, 627 "state": {"ready": "READY", "config_update": "NOT_UPDATING"}, 628 "config": {...}, 629 "id": "44b258fb39804564b37603d8d14b853e", 630 "permission_level": "CAN_MANAGE", 631 "route_optimized": False, 632 "task": "llm/v1/chat", 633 "endpoint_type": "EXTERNAL_MODEL", 634 "creator_display_name": "Alice", 635 "creator_kind": "User", 636 } 637 """ 638 639 return self._call_endpoint( 640 method="PUT", route=posixpath.join(endpoint, "config"), json_body=config 641 ) 642 643 def update_endpoint_tags(self, endpoint, config): 644 """ 645 Update the tags of a specified serving endpoint. See 646 https://docs.databricks.com/api/workspace/servingendpoints/patch for request/response 647 schema. 648 649 Args: 650 endpoint: The name of the serving endpoint to update. 651 config: A dictionary containing tags to add and/or remove. 652 653 Returns: 654 A :py:class:`DatabricksEndpoint` object containing the request response. 655 656 Example: 657 658 .. code-block:: python 659 660 from mlflow.deployments import get_deploy_client 661 662 client = get_deploy_client("databricks") 663 updated_tags = client.update_endpoint_tags( 664 endpoint="test", config={"add_tags": [{"key": "project", "value": "test"}]} 665 ) 666 assert updated_tags == {"tags": [{"key": "project", "value": "test"}]} 667 """ 668 return self._call_endpoint( 669 method="PATCH", route=posixpath.join(endpoint, "tags"), json_body=config 670 ) 671 672 def update_endpoint_rate_limits(self, endpoint, config): 673 """ 674 Update the rate limits of a specified serving endpoint. 675 See https://docs.databricks.com/api/workspace/servingendpoints/put for request/response 676 schema. 677 678 Args: 679 endpoint: The name of the serving endpoint to update. 680 config: A dictionary containing the updated rate limit configuration. 681 682 Returns: 683 A :py:class:`DatabricksEndpoint` object containing the updated rate limits. 684 685 Example: 686 687 .. code-block:: python 688 689 from mlflow.deployments import get_deploy_client 690 691 client = get_deploy_client("databricks") 692 name = "databricks-dbrx-instruct" 693 rate_limits = { 694 "rate_limits": [{"calls": 10, "key": "endpoint", "renewal_period": "minute"}] 695 } 696 updated_rate_limits = client.update_endpoint_rate_limits( 697 endpoint=name, config=rate_limits 698 ) 699 assert updated_rate_limits == { 700 "rate_limits": [{"calls": 10, "key": "endpoint", "renewal_period": "minute"}] 701 } 702 """ 703 return self._call_endpoint( 704 method="PUT", route=posixpath.join(endpoint, "rate-limits"), json_body=config 705 ) 706 707 def update_endpoint_ai_gateway(self, endpoint, config): 708 """ 709 Update the AI Gateway configuration of a specified serving endpoint. 710 711 Args: 712 endpoint (str): The name of the serving endpoint to update. 713 config (dict): A dictionary containing the AI Gateway configuration to update. 714 715 Returns: 716 dict: A dictionary containing the updated AI Gateway configuration. 717 718 Example: 719 720 .. code-block:: python 721 722 from mlflow.deployments import get_deploy_client 723 724 client = get_deploy_client("databricks") 725 name = "test" 726 727 gateway_config = { 728 "usage_tracking_config": {"enabled": True}, 729 "inference_table_config": { 730 "enabled": True, 731 "catalog_name": "my_catalog", 732 "schema_name": "my_schema", 733 }, 734 } 735 736 updated_gateway = client.update_endpoint_ai_gateway( 737 endpoint=name, config=gateway_config 738 ) 739 assert updated_gateway == { 740 "usage_tracking_config": {"enabled": True}, 741 "inference_table_config": { 742 "catalog_name": "my_catalog", 743 "schema_name": "my_schema", 744 "table_name_prefix": "test", 745 "enabled": True, 746 }, 747 } 748 """ 749 return self._call_endpoint( 750 method="PUT", route=posixpath.join(endpoint, "ai-gateway"), json_body=config 751 ) 752 753 def delete_endpoint(self, endpoint): 754 """ 755 Delete a specified serving endpoint. 756 See https://docs.databricks.com/api/workspace/servingendpoints/delete for request/response 757 schema. 758 759 Args: 760 endpoint: The name of the serving endpoint to delete. 761 762 Returns: 763 A DatabricksEndpoint object containing the request response. 764 765 Example: 766 767 .. code-block:: python 768 769 from mlflow.deployments import get_deploy_client 770 771 client = get_deploy_client("databricks") 772 client.delete_endpoint(endpoint="chat") 773 """ 774 return self._call_endpoint(method="DELETE", route=endpoint) 775 776 def list_endpoints(self): 777 """ 778 Retrieve all serving endpoints. 779 780 See https://docs.databricks.com/api/workspace/servingendpoints/list for request/response 781 schema. 782 783 Returns: 784 A list of :py:class:`DatabricksEndpoint` objects containing the request response. 785 786 Example: 787 788 .. code-block:: python 789 790 from mlflow.deployments import get_deploy_client 791 792 client = get_deploy_client("databricks") 793 endpoints = client.list_endpoints() 794 assert endpoints == [ 795 { 796 "name": "chat", 797 "creator": "alice@company.com", 798 "creation_timestamp": 0, 799 "last_updated_timestamp": 0, 800 "state": {...}, 801 "config": {...}, 802 "tags": [...], 803 "id": "88fd3f75a0d24b0380ddc40484d7a31b", 804 }, 805 ] 806 807 """ 808 return self._call_endpoint(method="GET").endpoints 809 810 def get_endpoint(self, endpoint): 811 """ 812 Get a specified serving endpoint. 813 See https://docs.databricks.com/api/workspace/servingendpoints/get for request/response 814 schema. 815 816 Args: 817 endpoint: The name of the serving endpoint to get. 818 819 Returns: 820 A DatabricksEndpoint object containing the request response. 821 822 Example: 823 824 .. code-block:: python 825 826 from mlflow.deployments import get_deploy_client 827 828 client = get_deploy_client("databricks") 829 endpoint = client.get_endpoint(endpoint="chat") 830 assert endpoint == { 831 "name": "chat", 832 "creator": "alice@company.com", 833 "creation_timestamp": 0, 834 "last_updated_timestamp": 0, 835 "state": {...}, 836 "config": {...}, 837 "tags": [...], 838 "id": "88fd3f75a0d24b0380ddc40484d7a31b", 839 } 840 """ 841 return self._call_endpoint(method="GET", route=endpoint) 842 843 844 def run_local(name, model_uri, flavor=None, config=None): 845 pass 846 847 848 def target_help(): 849 pass