/ mlflow / telemetry / schemas.py
schemas.py
  1  import json
  2  import platform
  3  import sys
  4  from dataclasses import dataclass
  5  from enum import Enum
  6  from typing import Any
  7  
  8  from mlflow.version import IS_MLFLOW_SKINNY, IS_TRACING_SDK_ONLY, VERSION
  9  
 10  
 11  class Status(str, Enum):
 12      UNKNOWN = "unknown"
 13      SUCCESS = "success"
 14      FAILURE = "failure"
 15  
 16  
 17  @dataclass
 18  class Record:
 19      event_name: str
 20      timestamp_ns: int
 21      params: dict[str, Any] | None = None
 22      status: Status = Status.UNKNOWN
 23      duration_ms: int | None = None
 24      # installation and session ID usually comes from the telemetry client,
 25      # but callers can override with these fields (e.g. in UI telemetry records)
 26      installation_id: str | None = None
 27      session_id: str | None = None
 28      server_installation_id: str | None = None
 29  
 30      def to_dict(self) -> dict[str, Any]:
 31          result = {
 32              "timestamp_ns": self.timestamp_ns,
 33              "event_name": self.event_name,
 34              # dump params to string so we can parse them easily in ETL pipeline
 35              "params": json.dumps(self.params) if self.params else None,
 36              "status": self.status.value,
 37              "duration_ms": self.duration_ms,
 38          }
 39          if self.installation_id:
 40              result["installation_id"] = self.installation_id
 41          if self.session_id:
 42              result["session_id"] = self.session_id
 43          if self.server_installation_id:
 44              result["server_installation_id"] = self.server_installation_id
 45          return result
 46  
 47  
 48  class Environment(str, Enum):
 49      KAGGLE = "kaggle"
 50      COLAB = "colab"
 51      AZURE_ML = "azure_ml"
 52      SAGEMAKER_STUDIO = "sagemaker_studio"
 53      SAGEMAKER_NOTEBOOK = "sagemaker_notebook"
 54      DOCKER = "docker"
 55  
 56  
 57  # The following env vars were found by manually inspecting
 58  # env vars in the specified environments and avoiding potentially
 59  # PII-containing variables.
 60  ENV_VAR_TO_ENVIRONMENT_MAP = {
 61      # Undocumented env var in Kaggle notebooks
 62      # https://www.kaggle.com/discussions/general/147433
 63      "KAGGLE_KERNEL_RUN_TYPE": Environment.KAGGLE,
 64      # Undocumented env var in Colab notebooks
 65      "COLAB_RELEASE_TAG": Environment.COLAB,
 66      # Undocumented env var in AzureML notebooks
 67      "AZUREML_FRAMEWORK": Environment.AZURE_ML,
 68      # Internal env var that SageMaker inserts
 69      # https://docs.aws.amazon.com/sagemaker/latest/dg/studio-updated-byoi-specs.html#studio-updated-byoi-specs-run
 70      "SAGEMAKER_APP_TYPE": Environment.SAGEMAKER_STUDIO,
 71  }
 72  
 73  
 74  class SourceSDK(str, Enum):
 75      MLFLOW_TRACING = "mlflow-tracing"
 76      MLFLOW = "mlflow"
 77      MLFLOW_SKINNY = "mlflow-skinny"
 78  
 79  
 80  def get_source_sdk() -> SourceSDK:
 81      if IS_TRACING_SDK_ONLY:
 82          return SourceSDK.MLFLOW_TRACING
 83      elif IS_MLFLOW_SKINNY:
 84          return SourceSDK.MLFLOW_SKINNY
 85      else:
 86          return SourceSDK.MLFLOW
 87  
 88  
 89  @dataclass
 90  class TelemetryInfo:
 91      session_id: str
 92      source_sdk: str = get_source_sdk().value
 93      mlflow_version: str = VERSION
 94      schema_version: int = 2
 95      python_version: str = (
 96          f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
 97      )
 98      operating_system: str = platform.platform()
 99      environment: str | None = None
100      tracking_uri_scheme: str | None = None
101      is_localhost: bool | None = None
102      installation_id: str | None = None
103      # Whether a workspace is enabled at client side or not. Using short name to
104      # minimize the payload size, because these fields are included to every
105      # telemetry event.
106      ws_enabled: bool | None = None
107  
108  
109  @dataclass
110  class TelemetryConfig:
111      ingestion_url: str
112      disable_events: set[str]