utils.py
1 import importlib.util 2 import json 3 import os.path 4 from importlib import import_module 5 from pathlib import Path 6 from typing import Optional 7 from typing import Type 8 from typing import TypeVar 9 from typing import Union 10 11 from evidently import Dataset 12 from evidently._pydantic_compat import BaseModel 13 from evidently._pydantic_compat import parse_obj_as 14 from evidently.core.report import Snapshot 15 from evidently.legacy.ui.type_aliases import DatasetID 16 from evidently.legacy.ui.type_aliases import ProjectID 17 from evidently.legacy.utils import NumpyEncoder 18 from evidently.ui.workspace import CloudWorkspace 19 from evidently.ui.workspace import RemoteWorkspace 20 21 22 class _URI: 23 def __init__(self, uri: str): 24 self.uri = uri 25 if self.is_cloud or self.is_remote: 26 self.ws = self._get_ws() 27 28 def _get_ws(self) -> Union[RemoteWorkspace, CloudWorkspace]: 29 if self.is_remote: 30 proto, addr = self.uri.split("://", maxsplit=1) 31 return RemoteWorkspace(f"{proto}://{addr.split('/')[0]}") 32 if self.is_cloud: 33 _, addr = self.uri.split("://", maxsplit=1) 34 if len(addr.split("/")) > 1: 35 base_url = "https://" + addr.split("/")[0] 36 else: 37 base_url = None 38 return CloudWorkspace(url=base_url) 39 raise ValueError(f"{self.uri} is not a valid remote or cloud URI") 40 41 @property 42 def is_cloud(self): 43 return self.uri.startswith("cloud://") 44 45 @property 46 def is_remote(self): 47 return self.uri.startswith("http") 48 49 @property 50 def is_local(self): 51 return not self.is_cloud and not self.is_remote 52 53 def load_dataset(self) -> Dataset: 54 if self.is_local: 55 # raise NotImplementedError("not yet implemented") 56 return Dataset.load(self.uri) 57 if self.is_remote: 58 raise ValueError("Remote workspace does not support dataset loading") 59 if self.is_cloud: 60 assert isinstance(self.ws, CloudWorkspace) 61 return self.ws.load_dataset(DatasetID(self.uri.split("/")[-1])) 62 raise ValueError(f"{self.uri} is not a valid dataset URI") 63 64 def upload_snapshot(self, snapshot: Snapshot, include_datasets: bool): 65 if self.is_local: 66 with open(self.uri, "w") as f: 67 f.write(json.dumps(snapshot.to_snapshot_model().dict(), indent=2, ensure_ascii=False, cls=NumpyEncoder)) 68 return self.uri 69 if self.is_remote or self.is_cloud: 70 project_id = self.uri.split("/")[-1] 71 ref = self.ws.add_run(project_id, snapshot, include_datasets) 72 return ref.url 73 raise ValueError(f"{self.uri} is not a valid URI") 74 75 def upload_dataset(self, dataset: Dataset, name: Optional[str]) -> str: 76 if self.is_local: 77 dataset.save(self.uri) 78 return self.uri 79 if self.is_cloud: 80 project_id = ProjectID(self.uri.split("/")[-1]) 81 dataset_id = self.ws.add_dataset(project_id, dataset, name or "", None) 82 return f"{self.ws.base_url}/v2/projects/{project_id}/datasets/{dataset_id}" 83 if self.is_remote: 84 raise ValueError("Remote workspace does not support dataset uploading") 85 raise ValueError(f"{self.uri} is not a valid URI") 86 87 88 T = TypeVar("T", bound="_Config") 89 90 91 class _Config(BaseModel): 92 @classmethod 93 def load(cls: Type[T], path: str) -> "T": 94 with open(path) as f: 95 return parse_obj_as(cls, json.load(f)) 96 97 def save(self, path: str) -> None: 98 with open(path, "w") as f: 99 f.write(self.json(indent=2, ensure_ascii=False)) 100 101 102 def _load_config_from_python(config_type: Type[T], path_or_module: str) -> T: 103 object_path: Optional[str] 104 if ":" in path_or_module: 105 path_or_module, object_path = path_or_module.split(":", 1) 106 else: 107 object_path = None 108 if os.path.exists(path_or_module): 109 path = Path(path_or_module) 110 module_name = path.stem # e.g. 'my_script' from 'my_script.py' 111 spec = importlib.util.spec_from_file_location(module_name, path_or_module) 112 if spec is None: 113 raise ImportError(f"Could not load spec for {path_or_module}") 114 115 module = importlib.util.module_from_spec(spec) 116 if spec.loader is None: 117 raise ImportError(f"No loader for spec {spec}") 118 119 spec.loader.exec_module(module) 120 else: 121 module = import_module(path_or_module) 122 if object_path is None: 123 obj = next((o for o in module.__dict__.values() if isinstance(o, config_type)), None) 124 if obj is None: 125 raise ValueError(f"Could not load {config_type.__name__} from {path_or_module}") 126 return obj 127 obj = getattr(module, object_path) 128 assert isinstance(obj, config_type) 129 return obj 130 131 132 def load_config(config_type: Type[T], config_path: str) -> T: 133 if os.path.exists(config_path) and ".py" not in config_path: 134 return config_type.load(config_path) 135 136 return _load_config_from_python(config_type, config_path)