Cradicle Explorer

/ tests / pyfunc / test_virtualenv.py
test_virtualenv.py
  1  import os
  2  import sys
  3  from io import BytesIO
  4  from stat import S_IRGRP, S_IROTH, S_IRUSR, S_IXGRP, S_IXOTH, S_IXUSR
  5  from typing import NamedTuple
  6  
  7  import numpy as np
  8  import pandas as pd
  9  import pytest
 10  import sklearn
 11  from sklearn.datasets import load_iris
 12  from sklearn.linear_model import LogisticRegression
 13  
 14  import mlflow
 15  from mlflow.environment_variables import MLFLOW_ENV_ROOT
 16  from mlflow.pyfunc.scoring_server import CONTENT_TYPE_JSON
 17  from mlflow.utils.environment import _PYTHON_ENV_FILE_NAME, _REQUIREMENTS_FILE_NAME
 18  from mlflow.utils.virtualenv import _is_pyenv_available
 19  
 20  from tests.helper_functions import pyfunc_serve_and_score_model
 21  
 22  pytestmark = pytest.mark.skipif(
 23      not _is_pyenv_available(),
 24      reason="requires pyenv",
 25  )
 26  
 27  TEST_DIR = "tests"
 28  TEST_MLFLOW_1X_MODEL_DIR = os.path.join(TEST_DIR, "resources", "example_mlflow_1x_sklearn_model")
 29  
 30  
 31  class Model(NamedTuple):
 32      model: LogisticRegression
 33      X_pred: pd.DataFrame
 34      y_pred: np.ndarray
 35  
 36  
 37  @pytest.fixture(scope="module")
 38  def sklearn_model():
 39      X, y = load_iris(return_X_y=True, as_frame=True)
 40      model = LogisticRegression().fit(X, y)
 41      X_pred = X.sample(frac=0.1, random_state=0)
 42      y_pred = model.predict(X_pred)
 43      return Model(model, X_pred, y_pred)
 44  
 45  
 46  def serve_and_score(model_uri, data, extra_args=None):
 47      resp = pyfunc_serve_and_score_model(
 48          model_uri,
 49          data=data,
 50          content_type=CONTENT_TYPE_JSON,
 51          extra_args=["--env-manager=virtualenv"] + (extra_args or []),
 52      )
 53      return pd.read_json(BytesIO(resp.content), orient="records").values.squeeze()
 54  
 55  
 56  @pytest.fixture
 57  def temp_mlflow_env_root(tmp_path, monkeypatch):
 58      env_root = tmp_path / "envs"
 59      env_root.mkdir(exist_ok=True)
 60      monkeypatch.setenv(MLFLOW_ENV_ROOT.name, str(env_root))
 61      return env_root
 62  
 63  
 64  use_temp_mlflow_env_root = pytest.mark.usefixtures(temp_mlflow_env_root.__name__)
 65  
 66  
 67  @use_temp_mlflow_env_root
 68  def test_restore_environment_with_virtualenv(sklearn_model):
 69      with mlflow.start_run():
 70          model_info = mlflow.sklearn.log_model(sklearn_model.model, name="model")
 71  
 72      scores = serve_and_score(model_info.model_uri, sklearn_model.X_pred)
 73      np.testing.assert_array_almost_equal(scores, sklearn_model.y_pred)
 74  
 75  
 76  @use_temp_mlflow_env_root
 77  def test_serve_and_score_read_only_model_directory(sklearn_model, tmp_path):
 78      model_path = str(tmp_path / "model")
 79      mlflow.sklearn.save_model(sklearn_model.model, path=model_path)
 80      os.chmod(
 81          model_path,
 82          S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH,
 83      )
 84  
 85      scores = serve_and_score(model_path, sklearn_model.X_pred)
 86      np.testing.assert_array_almost_equal(scores, sklearn_model.y_pred)
 87  
 88  
 89  @use_temp_mlflow_env_root
 90  def test_serve_and_score_1x_models():
 91      X, _ = load_iris(return_X_y=True, as_frame=True)
 92      X_pred = X.sample(frac=0.1, random_state=0)
 93      loaded_model = mlflow.pyfunc.load_model(TEST_MLFLOW_1X_MODEL_DIR)
 94      y_pred = loaded_model.predict(X_pred)
 95  
 96      scores = serve_and_score(TEST_MLFLOW_1X_MODEL_DIR, X_pred)
 97      np.testing.assert_array_almost_equal(scores, y_pred)
 98  
 99  
100  @use_temp_mlflow_env_root
101  def test_reuse_environment(temp_mlflow_env_root, sklearn_model):
102      with mlflow.start_run():
103          model_info = mlflow.sklearn.log_model(sklearn_model.model, name="model")
104  
105      # Serve the model
106      scores = serve_and_score(model_info.model_uri, sklearn_model.X_pred)
107      np.testing.assert_array_almost_equal(scores, sklearn_model.y_pred)
108      # Serve the model again. The environment created in the previous serving should be reused.
109      scores = serve_and_score(model_info.model_uri, sklearn_model.X_pred)
110      np.testing.assert_array_almost_equal(scores, sklearn_model.y_pred)
111      assert len(list(temp_mlflow_env_root.iterdir())) == 1
112  
113  
114  @use_temp_mlflow_env_root
115  def test_different_requirements_create_different_environments(temp_mlflow_env_root, sklearn_model):
116      sklearn_req = f"scikit-learn=={sklearn.__version__}"
117      with mlflow.start_run():
118          model_info1 = mlflow.sklearn.log_model(
119              sklearn_model.model,
120              name="model",
121              pip_requirements=[sklearn_req],
122          )
123      scores = serve_and_score(model_info1.model_uri, sklearn_model.X_pred)
124      np.testing.assert_array_almost_equal(scores, sklearn_model.y_pred)
125  
126      # Log the same model with different requirements
127      with mlflow.start_run():
128          model_info2 = mlflow.sklearn.log_model(
129              sklearn_model.model,
130              name="model",
131              pip_requirements=[sklearn_req, "numpy"],
132          )
133      scores = serve_and_score(model_info2.model_uri, sklearn_model.X_pred)
134      np.testing.assert_array_almost_equal(scores, sklearn_model.y_pred)
135      # Two environments should exist now because the first and second models have different
136      # requirements
137      assert len(list(temp_mlflow_env_root.iterdir())) == 2
138  
139  
140  @use_temp_mlflow_env_root
141  def test_environment_directory_is_cleaned_up_when_unexpected_error_occurs(
142      temp_mlflow_env_root, sklearn_model
143  ):
144      sklearn_req = "scikit-learn==999.999.999"
145      with mlflow.start_run():
146          model_info1 = mlflow.sklearn.log_model(
147              sklearn_model.model,
148              name="model",
149              pip_requirements=[sklearn_req],
150          )
151  
152      try:
153          serve_and_score(model_info1.model_uri, sklearn_model.X_pred)
154      except Exception:
155          pass
156      else:
157          assert False, "Should have raised an exception"
158      assert len(list(temp_mlflow_env_root.iterdir())) == 0
159  
160  
161  @use_temp_mlflow_env_root
162  def test_python_env_file_does_not_exist(sklearn_model, tmp_path):
163      with mlflow.start_run():
164          model_info = mlflow.sklearn.log_model(sklearn_model.model, name="model")
165  
166      mlflow.artifacts.download_artifacts(artifact_uri=model_info.model_uri, dst_path=tmp_path)
167      python_env = next(tmp_path.rglob(_PYTHON_ENV_FILE_NAME))
168      python_env.unlink()
169  
170      scores = serve_and_score(tmp_path, sklearn_model.X_pred)
171      np.testing.assert_array_almost_equal(scores, sklearn_model.y_pred)
172  
173  
174  @use_temp_mlflow_env_root
175  def test_python_env_file_and_requirements_file_do_not_exist(sklearn_model, tmp_path):
176      with mlflow.start_run():
177          model_info = mlflow.sklearn.log_model(sklearn_model.model, name="model")
178  
179      mlflow.artifacts.download_artifacts(artifact_uri=model_info.model_uri, dst_path=tmp_path)
180      python_env = next(tmp_path.rglob(_PYTHON_ENV_FILE_NAME))
181      python_env.unlink()
182      requirements = next(tmp_path.rglob(_REQUIREMENTS_FILE_NAME))
183      requirements.unlink()
184  
185      scores = serve_and_score(tmp_path, sklearn_model.X_pred)
186      np.testing.assert_array_almost_equal(scores, sklearn_model.y_pred)
187  
188  
189  def test_environment_is_removed_when_package_installation_fails(
190      temp_mlflow_env_root, sklearn_model
191  ):
192      with mlflow.start_run():
193          model_info = mlflow.sklearn.log_model(
194              sklearn_model.model,
195              name="model",
196              # Enforce pip install to fail using a non-existent package version
197              pip_requirements=["mlflow==999.999.999"],
198          )
199      with pytest.raises(AssertionError, match="scoring process died"):
200          serve_and_score(model_info.model_uri, sklearn_model.X_pred)
201      assert len(list(temp_mlflow_env_root.iterdir())) == 0
202  
203  
204  @use_temp_mlflow_env_root
205  def test_restore_environment_from_conda_yaml_containing_conda_packages(sklearn_model, tmp_path):
206      conda_env = {
207          "name": "mlflow-env",
208          "channels": ["conda-forge"],
209          "dependencies": [
210              "python=" + ".".join(map(str, sys.version_info[:3])),
211              "conda-package=1.2.3",  # conda package
212              "pip",
213              {
214                  "pip": [
215                      "mlflow",
216                      f"scikit-learn=={sklearn.__version__}",
217                  ]
218              },
219          ],
220      }
221      with mlflow.start_run():
222          model_info = mlflow.sklearn.log_model(
223              sklearn_model.model,
224              name="model",
225              conda_env=conda_env,
226          )
227  
228      mlflow.artifacts.download_artifacts(artifact_uri=model_info.model_uri, dst_path=tmp_path)
229      python_env = next(tmp_path.rglob(_PYTHON_ENV_FILE_NAME))
230      python_env.unlink()
231      serve_and_score(tmp_path, sklearn_model.X_pred)