artifacts.py
1 import json 2 import pathlib 3 import pickle 4 from json import JSONDecodeError 5 from typing import NamedTuple 6 7 import matplotlib.pyplot as plt 8 import numpy as np 9 import pandas as pd 10 11 from mlflow.exceptions import MlflowException 12 from mlflow.models.evaluation.base import EvaluationArtifact 13 from mlflow.utils.annotations import developer_stable 14 from mlflow.utils.proto_json_utils import NumpyEncoder 15 16 17 @developer_stable 18 class ImageEvaluationArtifact(EvaluationArtifact): 19 def _save(self, output_artifact_path): 20 self._content.save(output_artifact_path) 21 22 def _load_content_from_file(self, local_artifact_path): 23 from PIL.Image import open as open_image 24 25 self._content = open_image(local_artifact_path) 26 self._content.load() # Load image and close the file descriptor. 27 return self._content 28 29 30 @developer_stable 31 class CsvEvaluationArtifact(EvaluationArtifact): 32 def _save(self, output_artifact_path): 33 self._content.to_csv(output_artifact_path, index=False) 34 35 def _load_content_from_file(self, local_artifact_path): 36 self._content = pd.read_csv(local_artifact_path) 37 return self._content 38 39 40 @developer_stable 41 class ParquetEvaluationArtifact(EvaluationArtifact): 42 def _save(self, output_artifact_path): 43 self._content.to_parquet(output_artifact_path, compression="brotli") 44 45 def _load_content_from_file(self, local_artifact_path): 46 self._content = pd.read_parquet(local_artifact_path) 47 return self._content 48 49 50 @developer_stable 51 class NumpyEvaluationArtifact(EvaluationArtifact): 52 def _save(self, output_artifact_path): 53 np.save(output_artifact_path, self._content, allow_pickle=False) 54 55 def _load_content_from_file(self, local_artifact_path): 56 self._content = np.load(local_artifact_path, allow_pickle=False) 57 return self._content 58 59 60 @developer_stable 61 class JsonEvaluationArtifact(EvaluationArtifact): 62 def _save(self, output_artifact_path): 63 with open(output_artifact_path, "w") as f: 64 json.dump(self._content, f) 65 66 def _load_content_from_file(self, local_artifact_path): 67 with open(local_artifact_path) as f: 68 self._content = json.load(f) 69 return self._content 70 71 72 @developer_stable 73 class TextEvaluationArtifact(EvaluationArtifact): 74 def _save(self, output_artifact_path): 75 with open(output_artifact_path, "w") as f: 76 f.write(self._content) 77 78 def _load_content_from_file(self, local_artifact_path): 79 with open(local_artifact_path) as f: 80 self._content = f.read() 81 return self._content 82 83 84 @developer_stable 85 class PickleEvaluationArtifact(EvaluationArtifact): 86 def _save(self, output_artifact_path): 87 with open(output_artifact_path, "wb") as f: 88 pickle.dump(self._content, f) 89 90 def _load_content_from_file(self, local_artifact_path): 91 with open(local_artifact_path, "rb") as f: 92 self._content = pickle.load(f) 93 return self._content 94 95 96 _EXT_TO_ARTIFACT_MAP = { 97 ".png": ImageEvaluationArtifact, 98 ".jpg": ImageEvaluationArtifact, 99 ".jpeg": ImageEvaluationArtifact, 100 ".json": JsonEvaluationArtifact, 101 ".npy": NumpyEvaluationArtifact, 102 ".csv": CsvEvaluationArtifact, 103 ".parquet": ParquetEvaluationArtifact, 104 ".txt": TextEvaluationArtifact, 105 } 106 107 _TYPE_TO_EXT_MAP = { 108 pd.DataFrame: ".csv", 109 np.ndarray: ".npy", 110 plt.Figure: ".png", 111 } 112 113 _TYPE_TO_ARTIFACT_MAP = { 114 pd.DataFrame: CsvEvaluationArtifact, 115 np.ndarray: NumpyEvaluationArtifact, 116 plt.Figure: ImageEvaluationArtifact, 117 } 118 119 120 class _InferredArtifactProperties(NamedTuple): 121 from_path: bool 122 type: type[EvaluationArtifact] 123 ext: str 124 125 126 def _infer_artifact_type_and_ext(artifact_name, raw_artifact, custom_metric_tuple): 127 """ 128 This function performs type and file extension inference on the provided artifact 129 130 Args: 131 artifact_name: The name of the provided artifact 132 raw_artifact: The artifact object 133 custom_metric_tuple: Containing a user provided function and its index in the 134 ``custom_metrics`` parameter of ``mlflow.evaluate`` 135 136 Returns: 137 InferredArtifactProperties namedtuple 138 """ 139 140 exception_header = ( 141 f"Custom metric function '{custom_metric_tuple.name}' at index " 142 f"{custom_metric_tuple.index} in the `custom_metrics` parameter produced an " 143 f"artifact '{artifact_name}'" 144 ) 145 146 # Given a string, first see if it is a path. Otherwise, check if it is a JsonEvaluationArtifact 147 if isinstance(raw_artifact, str): 148 potential_path = pathlib.Path(raw_artifact) 149 if potential_path.exists(): 150 raw_artifact = potential_path 151 else: 152 try: 153 json.loads(raw_artifact) 154 return _InferredArtifactProperties( 155 from_path=False, type=JsonEvaluationArtifact, ext=".json" 156 ) 157 except JSONDecodeError: 158 raise MlflowException( 159 f"{exception_header} with string representation '{raw_artifact}' that is " 160 f"neither a valid path to a file nor a JSON string." 161 ) 162 163 # Type inference based on the file extension 164 if isinstance(raw_artifact, pathlib.Path): 165 if not raw_artifact.exists(): 166 raise MlflowException(f"{exception_header} with path '{raw_artifact}' does not exist.") 167 if not raw_artifact.is_file(): 168 raise MlflowException(f"{exception_header} with path '{raw_artifact}' is not a file.") 169 if raw_artifact.suffix not in _EXT_TO_ARTIFACT_MAP: 170 raise MlflowException( 171 f"{exception_header} with path '{raw_artifact}' does not match any of the supported" 172 f" file extensions: {', '.join(_EXT_TO_ARTIFACT_MAP.keys())}." 173 ) 174 return _InferredArtifactProperties( 175 from_path=True, type=_EXT_TO_ARTIFACT_MAP[raw_artifact.suffix], ext=raw_artifact.suffix 176 ) 177 178 # Type inference based on object type 179 if type(raw_artifact) in _TYPE_TO_ARTIFACT_MAP: 180 return _InferredArtifactProperties( 181 from_path=False, 182 type=_TYPE_TO_ARTIFACT_MAP[type(raw_artifact)], 183 ext=_TYPE_TO_EXT_MAP[type(raw_artifact)], 184 ) 185 186 # Given as other python object, we first attempt to infer as JsonEvaluationArtifact. If that 187 # fails, we store it as PickleEvaluationArtifact 188 try: 189 json.dumps(raw_artifact, cls=NumpyEncoder) 190 return _InferredArtifactProperties( 191 from_path=False, type=JsonEvaluationArtifact, ext=".json" 192 ) 193 except TypeError: 194 return _InferredArtifactProperties( 195 from_path=False, type=PickleEvaluationArtifact, ext=".pickle" 196 )