schemas.py
1 import json 2 import platform 3 import sys 4 from dataclasses import dataclass 5 from enum import Enum 6 from typing import Any 7 8 from mlflow.version import IS_MLFLOW_SKINNY, IS_TRACING_SDK_ONLY, VERSION 9 10 11 class Status(str, Enum): 12 UNKNOWN = "unknown" 13 SUCCESS = "success" 14 FAILURE = "failure" 15 16 17 @dataclass 18 class Record: 19 event_name: str 20 timestamp_ns: int 21 params: dict[str, Any] | None = None 22 status: Status = Status.UNKNOWN 23 duration_ms: int | None = None 24 # installation and session ID usually comes from the telemetry client, 25 # but callers can override with these fields (e.g. in UI telemetry records) 26 installation_id: str | None = None 27 session_id: str | None = None 28 server_installation_id: str | None = None 29 30 def to_dict(self) -> dict[str, Any]: 31 result = { 32 "timestamp_ns": self.timestamp_ns, 33 "event_name": self.event_name, 34 # dump params to string so we can parse them easily in ETL pipeline 35 "params": json.dumps(self.params) if self.params else None, 36 "status": self.status.value, 37 "duration_ms": self.duration_ms, 38 } 39 if self.installation_id: 40 result["installation_id"] = self.installation_id 41 if self.session_id: 42 result["session_id"] = self.session_id 43 if self.server_installation_id: 44 result["server_installation_id"] = self.server_installation_id 45 return result 46 47 48 class Environment(str, Enum): 49 KAGGLE = "kaggle" 50 COLAB = "colab" 51 AZURE_ML = "azure_ml" 52 SAGEMAKER_STUDIO = "sagemaker_studio" 53 SAGEMAKER_NOTEBOOK = "sagemaker_notebook" 54 DOCKER = "docker" 55 56 57 # The following env vars were found by manually inspecting 58 # env vars in the specified environments and avoiding potentially 59 # PII-containing variables. 60 ENV_VAR_TO_ENVIRONMENT_MAP = { 61 # Undocumented env var in Kaggle notebooks 62 # https://www.kaggle.com/discussions/general/147433 63 "KAGGLE_KERNEL_RUN_TYPE": Environment.KAGGLE, 64 # Undocumented env var in Colab notebooks 65 "COLAB_RELEASE_TAG": Environment.COLAB, 66 # Undocumented env var in AzureML notebooks 67 "AZUREML_FRAMEWORK": Environment.AZURE_ML, 68 # Internal env var that SageMaker inserts 69 # https://docs.aws.amazon.com/sagemaker/latest/dg/studio-updated-byoi-specs.html#studio-updated-byoi-specs-run 70 "SAGEMAKER_APP_TYPE": Environment.SAGEMAKER_STUDIO, 71 } 72 73 74 class SourceSDK(str, Enum): 75 MLFLOW_TRACING = "mlflow-tracing" 76 MLFLOW = "mlflow" 77 MLFLOW_SKINNY = "mlflow-skinny" 78 79 80 def get_source_sdk() -> SourceSDK: 81 if IS_TRACING_SDK_ONLY: 82 return SourceSDK.MLFLOW_TRACING 83 elif IS_MLFLOW_SKINNY: 84 return SourceSDK.MLFLOW_SKINNY 85 else: 86 return SourceSDK.MLFLOW 87 88 89 @dataclass 90 class TelemetryInfo: 91 session_id: str 92 source_sdk: str = get_source_sdk().value 93 mlflow_version: str = VERSION 94 schema_version: int = 2 95 python_version: str = ( 96 f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" 97 ) 98 operating_system: str = platform.platform() 99 environment: str | None = None 100 tracking_uri_scheme: str | None = None 101 is_localhost: bool | None = None 102 installation_id: str | None = None 103 # Whether a workspace is enabled at client side or not. Using short name to 104 # minimize the payload size, because these fields are included to every 105 # telemetry event. 106 ws_enabled: bool | None = None 107 108 109 @dataclass 110 class TelemetryConfig: 111 ingestion_url: str 112 disable_events: set[str]