/ mlflow / models / evaluation / artifacts.py
artifacts.py
  1  import json
  2  import pathlib
  3  import pickle
  4  from json import JSONDecodeError
  5  from typing import NamedTuple
  6  
  7  import matplotlib.pyplot as plt
  8  import numpy as np
  9  import pandas as pd
 10  
 11  from mlflow.exceptions import MlflowException
 12  from mlflow.models.evaluation.base import EvaluationArtifact
 13  from mlflow.utils.annotations import developer_stable
 14  from mlflow.utils.proto_json_utils import NumpyEncoder
 15  
 16  
 17  @developer_stable
 18  class ImageEvaluationArtifact(EvaluationArtifact):
 19      def _save(self, output_artifact_path):
 20          self._content.save(output_artifact_path)
 21  
 22      def _load_content_from_file(self, local_artifact_path):
 23          from PIL.Image import open as open_image
 24  
 25          self._content = open_image(local_artifact_path)
 26          self._content.load()  # Load image and close the file descriptor.
 27          return self._content
 28  
 29  
 30  @developer_stable
 31  class CsvEvaluationArtifact(EvaluationArtifact):
 32      def _save(self, output_artifact_path):
 33          self._content.to_csv(output_artifact_path, index=False)
 34  
 35      def _load_content_from_file(self, local_artifact_path):
 36          self._content = pd.read_csv(local_artifact_path)
 37          return self._content
 38  
 39  
 40  @developer_stable
 41  class ParquetEvaluationArtifact(EvaluationArtifact):
 42      def _save(self, output_artifact_path):
 43          self._content.to_parquet(output_artifact_path, compression="brotli")
 44  
 45      def _load_content_from_file(self, local_artifact_path):
 46          self._content = pd.read_parquet(local_artifact_path)
 47          return self._content
 48  
 49  
 50  @developer_stable
 51  class NumpyEvaluationArtifact(EvaluationArtifact):
 52      def _save(self, output_artifact_path):
 53          np.save(output_artifact_path, self._content, allow_pickle=False)
 54  
 55      def _load_content_from_file(self, local_artifact_path):
 56          self._content = np.load(local_artifact_path, allow_pickle=False)
 57          return self._content
 58  
 59  
 60  @developer_stable
 61  class JsonEvaluationArtifact(EvaluationArtifact):
 62      def _save(self, output_artifact_path):
 63          with open(output_artifact_path, "w") as f:
 64              json.dump(self._content, f)
 65  
 66      def _load_content_from_file(self, local_artifact_path):
 67          with open(local_artifact_path) as f:
 68              self._content = json.load(f)
 69          return self._content
 70  
 71  
 72  @developer_stable
 73  class TextEvaluationArtifact(EvaluationArtifact):
 74      def _save(self, output_artifact_path):
 75          with open(output_artifact_path, "w") as f:
 76              f.write(self._content)
 77  
 78      def _load_content_from_file(self, local_artifact_path):
 79          with open(local_artifact_path) as f:
 80              self._content = f.read()
 81          return self._content
 82  
 83  
 84  @developer_stable
 85  class PickleEvaluationArtifact(EvaluationArtifact):
 86      def _save(self, output_artifact_path):
 87          with open(output_artifact_path, "wb") as f:
 88              pickle.dump(self._content, f)
 89  
 90      def _load_content_from_file(self, local_artifact_path):
 91          with open(local_artifact_path, "rb") as f:
 92              self._content = pickle.load(f)
 93          return self._content
 94  
 95  
 96  _EXT_TO_ARTIFACT_MAP = {
 97      ".png": ImageEvaluationArtifact,
 98      ".jpg": ImageEvaluationArtifact,
 99      ".jpeg": ImageEvaluationArtifact,
100      ".json": JsonEvaluationArtifact,
101      ".npy": NumpyEvaluationArtifact,
102      ".csv": CsvEvaluationArtifact,
103      ".parquet": ParquetEvaluationArtifact,
104      ".txt": TextEvaluationArtifact,
105  }
106  
107  _TYPE_TO_EXT_MAP = {
108      pd.DataFrame: ".csv",
109      np.ndarray: ".npy",
110      plt.Figure: ".png",
111  }
112  
113  _TYPE_TO_ARTIFACT_MAP = {
114      pd.DataFrame: CsvEvaluationArtifact,
115      np.ndarray: NumpyEvaluationArtifact,
116      plt.Figure: ImageEvaluationArtifact,
117  }
118  
119  
120  class _InferredArtifactProperties(NamedTuple):
121      from_path: bool
122      type: type[EvaluationArtifact]
123      ext: str
124  
125  
126  def _infer_artifact_type_and_ext(artifact_name, raw_artifact, custom_metric_tuple):
127      """
128      This function performs type and file extension inference on the provided artifact
129  
130      Args:
131          artifact_name: The name of the provided artifact
132          raw_artifact: The artifact object
133          custom_metric_tuple: Containing a user provided function and its index in the
134              ``custom_metrics`` parameter of ``mlflow.evaluate``
135  
136      Returns:
137          InferredArtifactProperties namedtuple
138      """
139  
140      exception_header = (
141          f"Custom metric function '{custom_metric_tuple.name}' at index "
142          f"{custom_metric_tuple.index} in the `custom_metrics` parameter produced an "
143          f"artifact '{artifact_name}'"
144      )
145  
146      # Given a string, first see if it is a path. Otherwise, check if it is a JsonEvaluationArtifact
147      if isinstance(raw_artifact, str):
148          potential_path = pathlib.Path(raw_artifact)
149          if potential_path.exists():
150              raw_artifact = potential_path
151          else:
152              try:
153                  json.loads(raw_artifact)
154                  return _InferredArtifactProperties(
155                      from_path=False, type=JsonEvaluationArtifact, ext=".json"
156                  )
157              except JSONDecodeError:
158                  raise MlflowException(
159                      f"{exception_header} with string representation '{raw_artifact}' that is "
160                      f"neither a valid path to a file nor a JSON string."
161                  )
162  
163      # Type inference based on the file extension
164      if isinstance(raw_artifact, pathlib.Path):
165          if not raw_artifact.exists():
166              raise MlflowException(f"{exception_header} with path '{raw_artifact}' does not exist.")
167          if not raw_artifact.is_file():
168              raise MlflowException(f"{exception_header} with path '{raw_artifact}' is not a file.")
169          if raw_artifact.suffix not in _EXT_TO_ARTIFACT_MAP:
170              raise MlflowException(
171                  f"{exception_header} with path '{raw_artifact}' does not match any of the supported"
172                  f" file extensions: {', '.join(_EXT_TO_ARTIFACT_MAP.keys())}."
173              )
174          return _InferredArtifactProperties(
175              from_path=True, type=_EXT_TO_ARTIFACT_MAP[raw_artifact.suffix], ext=raw_artifact.suffix
176          )
177  
178      # Type inference based on object type
179      if type(raw_artifact) in _TYPE_TO_ARTIFACT_MAP:
180          return _InferredArtifactProperties(
181              from_path=False,
182              type=_TYPE_TO_ARTIFACT_MAP[type(raw_artifact)],
183              ext=_TYPE_TO_EXT_MAP[type(raw_artifact)],
184          )
185  
186      # Given as other python object, we first attempt to infer as JsonEvaluationArtifact. If that
187      # fails, we store it as PickleEvaluationArtifact
188      try:
189          json.dumps(raw_artifact, cls=NumpyEncoder)
190          return _InferredArtifactProperties(
191              from_path=False, type=JsonEvaluationArtifact, ext=".json"
192          )
193      except TypeError:
194          return _InferredArtifactProperties(
195              from_path=False, type=PickleEvaluationArtifact, ext=".pickle"
196          )