Cradicle Explorer

/ mlflow / utils / virtualenv.py
virtualenv.py
  1  import logging
  2  import os
  3  import re
  4  import shutil
  5  import tempfile
  6  import uuid
  7  from pathlib import Path
  8  from typing import Literal
  9  
 10  from packaging.version import Version
 11  
 12  import mlflow
 13  from mlflow.environment_variables import _MLFLOW_TESTING, MLFLOW_ENV_ROOT
 14  from mlflow.exceptions import MlflowException
 15  from mlflow.models.model import MLMODEL_FILE_NAME, Model
 16  from mlflow.utils import env_manager as em
 17  from mlflow.utils.conda import _PIP_CACHE_DIR
 18  from mlflow.utils.databricks_utils import is_in_databricks_runtime
 19  from mlflow.utils.environment import (
 20      _CONDA_ENV_FILE_NAME,
 21      _PYTHON_ENV_FILE_NAME,
 22      _REQUIREMENTS_FILE_NAME,
 23      _get_mlflow_env_name,
 24      _PythonEnv,
 25  )
 26  from mlflow.utils.file_utils import remove_on_error
 27  from mlflow.utils.os import is_windows
 28  from mlflow.utils.process import _exec_cmd, _join_commands
 29  from mlflow.utils.requirements_utils import _parse_requirements
 30  from mlflow.utils.uv_utils import has_uv_lock_artifact, run_uv_sync, setup_uv_sync_environment
 31  
 32  _logger = logging.getLogger(__name__)
 33  
 34  
 35  def _get_mlflow_virtualenv_root():
 36      """
 37      Returns the root directory to store virtualenv environments created by MLflow.
 38      """
 39      return MLFLOW_ENV_ROOT.get()
 40  
 41  
 42  _DATABRICKS_PYENV_BIN_PATH = "/databricks/.pyenv/bin/pyenv"
 43  
 44  
 45  def _is_pyenv_available():
 46      """
 47      Returns True if pyenv is available, otherwise False.
 48      """
 49      return _get_pyenv_bin_path() is not None
 50  
 51  
 52  def _validate_pyenv_is_available():
 53      """
 54      Validates pyenv is available. If not, throws an `MlflowException` with a brief instruction on
 55      how to install pyenv.
 56      """
 57      url = (
 58          "https://github.com/pyenv/pyenv#installation"
 59          if not is_windows()
 60          else "https://github.com/pyenv-win/pyenv-win#installation"
 61      )
 62      if not _is_pyenv_available():
 63          raise MlflowException(
 64              f"Could not find the pyenv binary. See {url} for installation instructions."
 65          )
 66  
 67  
 68  _SEMANTIC_VERSION_REGEX = re.compile(r"^([0-9]+)\.([0-9]+)\.([0-9]+)$")
 69  
 70  
 71  def _get_pyenv_bin_path():
 72      if os.path.exists(_DATABRICKS_PYENV_BIN_PATH):
 73          return _DATABRICKS_PYENV_BIN_PATH
 74      return shutil.which("pyenv")
 75  
 76  
 77  def _find_latest_installable_python_version(version_prefix):
 78      """
 79      Find the latest installable python version that matches the given version prefix
 80      from the output of `pyenv install --list`. For example, `version_prefix("3.8")` returns '3.8.x'
 81      where 'x' represents the latest micro version in 3.8.
 82      """
 83      lines = _exec_cmd(
 84          [_get_pyenv_bin_path(), "install", "--list"],
 85          capture_output=True,
 86          shell=is_windows(),
 87      ).stdout.splitlines()
 88      semantic_versions = filter(_SEMANTIC_VERSION_REGEX.match, map(str.strip, lines))
 89      matched = [v for v in semantic_versions if v.startswith(version_prefix)]
 90      if not matched:
 91          raise MlflowException(f"Could not find python version that matches {version_prefix}")
 92      return max(matched, key=Version)
 93  
 94  
 95  def _install_python(version, pyenv_root=None, capture_output=False):
 96      """Installs a specified version of python with pyenv and returns a path to the installed python
 97      binary.
 98  
 99      Args:
100          version: Python version to install.
101          pyenv_root: The value of the "PYENV_ROOT" environment variable used when running
102              `pyenv install` which installs python in `{PYENV_ROOT}/versions/{version}`.
103          capture_output: Set the `capture_output` argument when calling `_exec_cmd`.
104  
105      Returns:
106          Path to the installed python binary.
107      """
108      version = (
109          version
110          if _SEMANTIC_VERSION_REGEX.match(version)
111          else _find_latest_installable_python_version(version)
112      )
113      _logger.info("Installing python %s if it does not exist", version)
114      # pyenv-win doesn't support `--skip-existing` but its behavior is enabled by default
115      # https://github.com/pyenv-win/pyenv-win/pull/314
116      pyenv_install_options = ("--skip-existing",) if not is_windows() else ()
117      extra_env = {"PYENV_ROOT": pyenv_root} if pyenv_root else None
118      pyenv_bin_path = _get_pyenv_bin_path()
119      _exec_cmd(
120          [pyenv_bin_path, "install", *pyenv_install_options, version],
121          capture_output=capture_output,
122          # Windows fails to find pyenv and throws `FileNotFoundError` without `shell=True`
123          shell=is_windows(),
124          extra_env=extra_env,
125      )
126  
127      if not is_windows():
128          if pyenv_root is None:
129              pyenv_root = _exec_cmd([pyenv_bin_path, "root"], capture_output=True).stdout.strip()
130          path_to_bin = ("bin", "python")
131      else:
132          # pyenv-win doesn't provide the `pyenv root` command
133          pyenv_root = os.environ.get("PYENV_ROOT")
134          if pyenv_root is None:
135              raise MlflowException("Environment variable 'PYENV_ROOT' must be set")
136          path_to_bin = ("python.exe",)
137      return Path(pyenv_root).joinpath("versions", version, *path_to_bin)
138  
139  
140  def _get_conda_env_file(model_config):
141      from mlflow.pyfunc import _extract_conda_env
142  
143      for flavor, config in model_config.flavors.items():
144          if flavor == mlflow.pyfunc.FLAVOR_NAME:
145              if env := config.get(mlflow.pyfunc.ENV):
146                  return _extract_conda_env(env)
147      return _CONDA_ENV_FILE_NAME
148  
149  
150  def _get_python_env_file(model_config):
151      from mlflow.pyfunc import EnvType
152  
153      for flavor, config in model_config.flavors.items():
154          if flavor == mlflow.pyfunc.FLAVOR_NAME:
155              env = config.get(mlflow.pyfunc.ENV)
156              if isinstance(env, dict):
157                  # Models saved in MLflow >= 2.0 use a dictionary for the pyfunc flavor
158                  # `env` config, where the keys are different environment managers (e.g.
159                  # conda, virtualenv) and the values are corresponding environment paths
160                  return env[EnvType.VIRTUALENV]
161      return _PYTHON_ENV_FILE_NAME
162  
163  
164  def _get_python_env(local_model_path):
165      """Constructs `_PythonEnv` from the model artifacts stored in `local_model_path`. If
166      `python_env.yaml` is available, use it, otherwise extract model dependencies from `conda.yaml`.
167      If `conda.yaml` contains conda dependencies except `python`, `pip`, `setuptools`, and, `wheel`,
168      an `MlflowException` is thrown because conda dependencies cannot be installed in a virtualenv
169      environment.
170  
171      Args:
172          local_model_path: Local directory containing the model artifacts.
173  
174      Returns:
175          `_PythonEnv` instance.
176  
177      """
178      model_config = Model.load(local_model_path / MLMODEL_FILE_NAME)
179      python_env_file = local_model_path / _get_python_env_file(model_config)
180      conda_env_file = local_model_path / _get_conda_env_file(model_config)
181      requirements_file = local_model_path / _REQUIREMENTS_FILE_NAME
182  
183      if python_env_file.exists():
184          return _PythonEnv.from_yaml(python_env_file)
185      else:
186          _logger.info(
187              "This model is missing %s, which is because it was logged in an older version"
188              "of MLflow (< 1.26.0) that does not support restoring a model environment with "
189              "virtualenv. Attempting to extract model dependencies from %s and %s instead.",
190              _PYTHON_ENV_FILE_NAME,
191              _REQUIREMENTS_FILE_NAME,
192              _CONDA_ENV_FILE_NAME,
193          )
194          if requirements_file.exists():
195              deps = _PythonEnv.get_dependencies_from_conda_yaml(conda_env_file)
196              return _PythonEnv(
197                  python=deps["python"],
198                  build_dependencies=deps["build_dependencies"],
199                  dependencies=[f"-r {_REQUIREMENTS_FILE_NAME}"],
200              )
201          else:
202              return _PythonEnv.from_conda_yaml(conda_env_file)
203  
204  
205  def _get_virtualenv_name(python_env, work_dir_path, env_id=None):
206      requirements = _parse_requirements(
207          python_env.dependencies,
208          is_constraint=False,
209          base_dir=work_dir_path,
210      )
211      return _get_mlflow_env_name(
212          str(python_env) + "".join(map(str, sorted(requirements))) + (env_id or "")
213      )
214  
215  
216  def _get_virtualenv_activate_cmd(env_dir: Path) -> str:
217      # Created a command to activate the environment
218      paths = ("bin", "activate") if not is_windows() else ("Scripts", "activate.bat")
219      activate_cmd = env_dir.joinpath(*paths)
220      return f"source {activate_cmd}" if not is_windows() else str(activate_cmd)
221  
222  
223  def _get_uv_env_creation_command(env_dir: str | Path, python_version: str) -> str:
224      return ["uv", "venv", str(env_dir), f"--python={python_version}"]
225  
226  
227  def _create_virtualenv(
228      local_model_path: Path,
229      python_env: _PythonEnv,
230      env_dir: Path,
231      python_install_dir: str | None = None,
232      env_manager: Literal["virtualenv", "uv"] = em.UV,
233      extra_env: dict[str, str] | None = None,
234      capture_output: bool = False,
235      pip_requirements_override: list[str] | None = None,
236  ):
237      if env_manager not in {em.VIRTUALENV, em.UV}:
238          raise MlflowException.invalid_parameter_value(
239              f"Invalid value for `env_manager`: {env_manager}. "
240              f"Must be one of `{em.VIRTUALENV}, {em.UV}`"
241          )
242  
243      activate_cmd = _get_virtualenv_activate_cmd(env_dir)
244      if env_dir.exists():
245          _logger.info(f"Environment {env_dir} already exists")
246          return activate_cmd
247  
248      env_creation_extra_env = {}
249      if env_manager == em.VIRTUALENV:
250          python_bin_path = _install_python(
251              python_env.python, pyenv_root=python_install_dir, capture_output=capture_output
252          )
253          _logger.info(f"Creating a new environment in {env_dir} with {python_bin_path}")
254          env_creation_cmd = [python_bin_path, "-m", "venv", env_dir]
255          install_deps_cmd_prefix = "python -m pip install"
256      elif env_manager == em.UV:
257          _logger.info(
258              f"Creating a new environment in {env_dir} with python "
259              f"version {python_env.python} using uv"
260          )
261          env_creation_cmd = _get_uv_env_creation_command(env_dir, python_env.python)
262          install_deps_cmd_prefix = "uv pip install"
263          if python_install_dir:
264              # Setting `UV_PYTHON_INSTALL_DIR` to make `uv env` install python into
265              # the directory it points to.
266              env_creation_extra_env["UV_PYTHON_INSTALL_DIR"] = python_install_dir
267          if _MLFLOW_TESTING.get():
268              os.environ["RUST_LOG"] = "uv=debug"
269      with remove_on_error(
270          env_dir,
271          onerror=lambda e: _logger.warning(
272              "Encountered an unexpected error: %s while creating a virtualenv environment in %s, "
273              "removing the environment directory...",
274              repr(e),
275              env_dir,
276          ),
277      ):
278          _exec_cmd(
279              env_creation_cmd,
280              capture_output=capture_output,
281              extra_env=env_creation_extra_env,
282          )
283  
284          # Use UV sync if model has uv.lock artifact and using UV env manager
285          if env_manager == em.UV and has_uv_lock_artifact(local_model_path):
286              _logger.info("Found uv.lock artifact, restoring environment with uv sync")
287              if not setup_uv_sync_environment(env_dir, local_model_path, python_env.python):
288                  raise MlflowException(
289                      "Failed to set up uv sync environment. Ensure the model's uv.lock "
290                      "and pyproject.toml artifacts are valid."
291                  )
292              if not run_uv_sync(env_dir, capture_output=capture_output):
293                  raise MlflowException(
294                      "Failed to restore model environment using uv sync. Ensure that uv is "
295                      "installed and the model's uv.lock artifact is valid. To install "
296                      "dependencies with pip instead, set the env_manager parameter to "
297                      "'virtualenv' instead of 'uv'."
298                  )
299              _logger.info("UV sync completed successfully")
300          else:
301              _logger.info("Installing dependencies")
302              for deps in filter(None, [python_env.build_dependencies, python_env.dependencies]):
303                  with tempfile.TemporaryDirectory() as tmpdir:
304                      # Create a temporary requirements file in the model directory to resolve the
305                      # references in it correctly. To do this, we must first symlink or copy the
306                      # model directory's contents to a temporary location for compatibility with
307                      # deployment tools that store models in a read-only mount
308                      try:
309                          for model_item in os.listdir(local_model_path):
310                              os.symlink(
311                                  src=os.path.join(local_model_path, model_item),
312                                  dst=os.path.join(tmpdir, model_item),
313                              )
314                      except Exception as e:
315                          _logger.warning(
316                              "Failed to symlink model directory during dependency installation"
317                              " Copying instead. Exception: %s",
318                              e,
319                          )
320                          _copy_model_to_writeable_destination(local_model_path, tmpdir)
321  
322                      tmp_req_file = f"requirements.{uuid.uuid4().hex}.txt"
323                      Path(tmpdir).joinpath(tmp_req_file).write_text("\n".join(deps))
324                      cmd = _join_commands(
325                          activate_cmd, f"{install_deps_cmd_prefix} -r {tmp_req_file}"
326                      )
327                      _exec_cmd(cmd, capture_output=capture_output, cwd=tmpdir, extra_env=extra_env)
328  
329          if pip_requirements_override:
330              _logger.info(
331                  "Installing additional dependencies specified by "
332                  f"pip_requirements_override: {pip_requirements_override}"
333              )
334              cmd = _join_commands(
335                  activate_cmd,
336                  f"{install_deps_cmd_prefix} --quiet {' '.join(pip_requirements_override)}",
337              )
338              _exec_cmd(cmd, capture_output=capture_output, extra_env=extra_env)
339  
340          return activate_cmd
341  
342  
343  def _copy_model_to_writeable_destination(model_src, dst):
344      """
345      Copies the specified `model_src` directory, which may be read-only, to the writeable `dst`
346      directory.
347      """
348      os.makedirs(dst, exist_ok=True)
349      for model_item in os.listdir(model_src):
350          # Copy individual files and subdirectories, rather than using `shutil.copytree()`
351          # because `shutil.copytree()` will apply the permissions from the source directory,
352          # which may be read-only
353          copy_fn = shutil.copytree if os.path.isdir(model_item) else shutil.copy2
354  
355          copy_fn(
356              src=os.path.join(model_src, model_item),
357              dst=os.path.join(dst, model_item),
358          )
359  
360  
361  def _get_virtualenv_extra_env_vars(env_root_dir=None):
362      extra_env = {
363          # PIP_NO_INPUT=1 makes pip run in non-interactive mode,
364          # otherwise pip might prompt "yes or no" and ask stdin input
365          "PIP_NO_INPUT": "1",
366      }
367      if env_root_dir is not None:
368          # Note: Both conda pip and virtualenv can use the pip cache directory.
369          extra_env["PIP_CACHE_DIR"] = os.path.join(env_root_dir, _PIP_CACHE_DIR)
370      return extra_env
371  
372  
373  _VIRTUALENV_ENVS_DIR = "virtualenv_envs"
374  _PYENV_ROOT_DIR = "pyenv_root"
375  
376  
377  def _get_or_create_virtualenv(
378      local_model_path,
379      env_id=None,
380      env_root_dir=None,
381      capture_output=False,
382      pip_requirements_override: list[str] | None = None,
383      env_manager: Literal["virtualenv", "uv"] = em.UV,
384      extra_envs: dict[str, str] | None = None,
385  ):
386      """Restores an MLflow model's environment in a virtual environment and returns a command
387      to activate it.
388  
389      Args:
390          local_model_path: Local directory containing the model artifacts.
391          env_id: Optional string that is added to the contents of the yaml file before
392              calculating the hash. It can be used to distinguish environments that have the
393              same conda dependencies but are supposed to be different based on the context.
394              For example, when serving the model we may install additional dependencies to the
395              environment after the environment has been activated.
396          pip_requirements_override: If specified, install the specified python dependencies to
397              the environment (upgrade if already installed).
398          env_manager: Specifies the environment manager to use to create the environment.
399              Defaults to "uv".
400          extra_envs: If specified, a dictionary of extra environment variables will be passed to the
401              environment creation command.
402  
403              .. tip::
404                  It is highly recommended to use "uv" as it has significant performance improvements
405                  over "virtualenv".
406  
407      Returns:
408          Command to activate the created virtual environment
409          (e.g. "source /path/to/bin/activate").
410  
411      """
412      if env_manager == em.VIRTUALENV:
413          _validate_pyenv_is_available()
414  
415      local_model_path = Path(local_model_path)
416      python_env = _get_python_env(local_model_path)
417  
418      if env_root_dir is None:
419          virtual_envs_root_path = Path(_get_mlflow_virtualenv_root())
420          python_install_dir = None
421      else:
422          virtual_envs_root_path = Path(env_root_dir) / _VIRTUALENV_ENVS_DIR
423          pyenv_root_path = Path(env_root_dir) / _PYENV_ROOT_DIR
424          pyenv_root_path.mkdir(parents=True, exist_ok=True)
425          python_install_dir = str(pyenv_root_path)
426  
427      virtual_envs_root_path.mkdir(parents=True, exist_ok=True)
428      env_name = _get_virtualenv_name(python_env, local_model_path, env_id)
429      env_dir = virtual_envs_root_path / env_name
430      try:
431          env_dir.exists()
432      except PermissionError:
433          if is_in_databricks_runtime():
434              # Updating env_name only doesn't work because the cluster may not have
435              # permission to access the original virtual_envs_root_path
436              virtual_envs_root_path = (
437                  Path(env_root_dir) / f"{_VIRTUALENV_ENVS_DIR}_{uuid.uuid4().hex[:8]}"
438              )
439              virtual_envs_root_path.mkdir(parents=True, exist_ok=True)
440              env_dir = virtual_envs_root_path / env_name
441          else:
442              _logger.warning(
443                  f"Existing virtual environment directory {env_dir} cannot be accessed "
444                  "due to permission error. Check the permissions of the directory and "
445                  "try again. If the issue persists, consider cleaning up the directory manually."
446              )
447              raise
448  
449      extra_envs = extra_envs or {}
450      extra_envs |= _get_virtualenv_extra_env_vars(env_root_dir)
451  
452      # Create an environment
453      return _create_virtualenv(
454          local_model_path=local_model_path,
455          python_env=python_env,
456          env_dir=env_dir,
457          python_install_dir=python_install_dir,
458          env_manager=env_manager,
459          extra_env=extra_envs,
460          capture_output=capture_output,
461          pip_requirements_override=pip_requirements_override,
462      )