virtualenv.py
1 import logging 2 import os 3 import re 4 import shutil 5 import tempfile 6 import uuid 7 from pathlib import Path 8 from typing import Literal 9 10 from packaging.version import Version 11 12 import mlflow 13 from mlflow.environment_variables import _MLFLOW_TESTING, MLFLOW_ENV_ROOT 14 from mlflow.exceptions import MlflowException 15 from mlflow.models.model import MLMODEL_FILE_NAME, Model 16 from mlflow.utils import env_manager as em 17 from mlflow.utils.conda import _PIP_CACHE_DIR 18 from mlflow.utils.databricks_utils import is_in_databricks_runtime 19 from mlflow.utils.environment import ( 20 _CONDA_ENV_FILE_NAME, 21 _PYTHON_ENV_FILE_NAME, 22 _REQUIREMENTS_FILE_NAME, 23 _get_mlflow_env_name, 24 _PythonEnv, 25 ) 26 from mlflow.utils.file_utils import remove_on_error 27 from mlflow.utils.os import is_windows 28 from mlflow.utils.process import _exec_cmd, _join_commands 29 from mlflow.utils.requirements_utils import _parse_requirements 30 from mlflow.utils.uv_utils import has_uv_lock_artifact, run_uv_sync, setup_uv_sync_environment 31 32 _logger = logging.getLogger(__name__) 33 34 35 def _get_mlflow_virtualenv_root(): 36 """ 37 Returns the root directory to store virtualenv environments created by MLflow. 38 """ 39 return MLFLOW_ENV_ROOT.get() 40 41 42 _DATABRICKS_PYENV_BIN_PATH = "/databricks/.pyenv/bin/pyenv" 43 44 45 def _is_pyenv_available(): 46 """ 47 Returns True if pyenv is available, otherwise False. 48 """ 49 return _get_pyenv_bin_path() is not None 50 51 52 def _validate_pyenv_is_available(): 53 """ 54 Validates pyenv is available. If not, throws an `MlflowException` with a brief instruction on 55 how to install pyenv. 56 """ 57 url = ( 58 "https://github.com/pyenv/pyenv#installation" 59 if not is_windows() 60 else "https://github.com/pyenv-win/pyenv-win#installation" 61 ) 62 if not _is_pyenv_available(): 63 raise MlflowException( 64 f"Could not find the pyenv binary. See {url} for installation instructions." 65 ) 66 67 68 _SEMANTIC_VERSION_REGEX = re.compile(r"^([0-9]+)\.([0-9]+)\.([0-9]+)$") 69 70 71 def _get_pyenv_bin_path(): 72 if os.path.exists(_DATABRICKS_PYENV_BIN_PATH): 73 return _DATABRICKS_PYENV_BIN_PATH 74 return shutil.which("pyenv") 75 76 77 def _find_latest_installable_python_version(version_prefix): 78 """ 79 Find the latest installable python version that matches the given version prefix 80 from the output of `pyenv install --list`. For example, `version_prefix("3.8")` returns '3.8.x' 81 where 'x' represents the latest micro version in 3.8. 82 """ 83 lines = _exec_cmd( 84 [_get_pyenv_bin_path(), "install", "--list"], 85 capture_output=True, 86 shell=is_windows(), 87 ).stdout.splitlines() 88 semantic_versions = filter(_SEMANTIC_VERSION_REGEX.match, map(str.strip, lines)) 89 matched = [v for v in semantic_versions if v.startswith(version_prefix)] 90 if not matched: 91 raise MlflowException(f"Could not find python version that matches {version_prefix}") 92 return max(matched, key=Version) 93 94 95 def _install_python(version, pyenv_root=None, capture_output=False): 96 """Installs a specified version of python with pyenv and returns a path to the installed python 97 binary. 98 99 Args: 100 version: Python version to install. 101 pyenv_root: The value of the "PYENV_ROOT" environment variable used when running 102 `pyenv install` which installs python in `{PYENV_ROOT}/versions/{version}`. 103 capture_output: Set the `capture_output` argument when calling `_exec_cmd`. 104 105 Returns: 106 Path to the installed python binary. 107 """ 108 version = ( 109 version 110 if _SEMANTIC_VERSION_REGEX.match(version) 111 else _find_latest_installable_python_version(version) 112 ) 113 _logger.info("Installing python %s if it does not exist", version) 114 # pyenv-win doesn't support `--skip-existing` but its behavior is enabled by default 115 # https://github.com/pyenv-win/pyenv-win/pull/314 116 pyenv_install_options = ("--skip-existing",) if not is_windows() else () 117 extra_env = {"PYENV_ROOT": pyenv_root} if pyenv_root else None 118 pyenv_bin_path = _get_pyenv_bin_path() 119 _exec_cmd( 120 [pyenv_bin_path, "install", *pyenv_install_options, version], 121 capture_output=capture_output, 122 # Windows fails to find pyenv and throws `FileNotFoundError` without `shell=True` 123 shell=is_windows(), 124 extra_env=extra_env, 125 ) 126 127 if not is_windows(): 128 if pyenv_root is None: 129 pyenv_root = _exec_cmd([pyenv_bin_path, "root"], capture_output=True).stdout.strip() 130 path_to_bin = ("bin", "python") 131 else: 132 # pyenv-win doesn't provide the `pyenv root` command 133 pyenv_root = os.environ.get("PYENV_ROOT") 134 if pyenv_root is None: 135 raise MlflowException("Environment variable 'PYENV_ROOT' must be set") 136 path_to_bin = ("python.exe",) 137 return Path(pyenv_root).joinpath("versions", version, *path_to_bin) 138 139 140 def _get_conda_env_file(model_config): 141 from mlflow.pyfunc import _extract_conda_env 142 143 for flavor, config in model_config.flavors.items(): 144 if flavor == mlflow.pyfunc.FLAVOR_NAME: 145 if env := config.get(mlflow.pyfunc.ENV): 146 return _extract_conda_env(env) 147 return _CONDA_ENV_FILE_NAME 148 149 150 def _get_python_env_file(model_config): 151 from mlflow.pyfunc import EnvType 152 153 for flavor, config in model_config.flavors.items(): 154 if flavor == mlflow.pyfunc.FLAVOR_NAME: 155 env = config.get(mlflow.pyfunc.ENV) 156 if isinstance(env, dict): 157 # Models saved in MLflow >= 2.0 use a dictionary for the pyfunc flavor 158 # `env` config, where the keys are different environment managers (e.g. 159 # conda, virtualenv) and the values are corresponding environment paths 160 return env[EnvType.VIRTUALENV] 161 return _PYTHON_ENV_FILE_NAME 162 163 164 def _get_python_env(local_model_path): 165 """Constructs `_PythonEnv` from the model artifacts stored in `local_model_path`. If 166 `python_env.yaml` is available, use it, otherwise extract model dependencies from `conda.yaml`. 167 If `conda.yaml` contains conda dependencies except `python`, `pip`, `setuptools`, and, `wheel`, 168 an `MlflowException` is thrown because conda dependencies cannot be installed in a virtualenv 169 environment. 170 171 Args: 172 local_model_path: Local directory containing the model artifacts. 173 174 Returns: 175 `_PythonEnv` instance. 176 177 """ 178 model_config = Model.load(local_model_path / MLMODEL_FILE_NAME) 179 python_env_file = local_model_path / _get_python_env_file(model_config) 180 conda_env_file = local_model_path / _get_conda_env_file(model_config) 181 requirements_file = local_model_path / _REQUIREMENTS_FILE_NAME 182 183 if python_env_file.exists(): 184 return _PythonEnv.from_yaml(python_env_file) 185 else: 186 _logger.info( 187 "This model is missing %s, which is because it was logged in an older version" 188 "of MLflow (< 1.26.0) that does not support restoring a model environment with " 189 "virtualenv. Attempting to extract model dependencies from %s and %s instead.", 190 _PYTHON_ENV_FILE_NAME, 191 _REQUIREMENTS_FILE_NAME, 192 _CONDA_ENV_FILE_NAME, 193 ) 194 if requirements_file.exists(): 195 deps = _PythonEnv.get_dependencies_from_conda_yaml(conda_env_file) 196 return _PythonEnv( 197 python=deps["python"], 198 build_dependencies=deps["build_dependencies"], 199 dependencies=[f"-r {_REQUIREMENTS_FILE_NAME}"], 200 ) 201 else: 202 return _PythonEnv.from_conda_yaml(conda_env_file) 203 204 205 def _get_virtualenv_name(python_env, work_dir_path, env_id=None): 206 requirements = _parse_requirements( 207 python_env.dependencies, 208 is_constraint=False, 209 base_dir=work_dir_path, 210 ) 211 return _get_mlflow_env_name( 212 str(python_env) + "".join(map(str, sorted(requirements))) + (env_id or "") 213 ) 214 215 216 def _get_virtualenv_activate_cmd(env_dir: Path) -> str: 217 # Created a command to activate the environment 218 paths = ("bin", "activate") if not is_windows() else ("Scripts", "activate.bat") 219 activate_cmd = env_dir.joinpath(*paths) 220 return f"source {activate_cmd}" if not is_windows() else str(activate_cmd) 221 222 223 def _get_uv_env_creation_command(env_dir: str | Path, python_version: str) -> str: 224 return ["uv", "venv", str(env_dir), f"--python={python_version}"] 225 226 227 def _create_virtualenv( 228 local_model_path: Path, 229 python_env: _PythonEnv, 230 env_dir: Path, 231 python_install_dir: str | None = None, 232 env_manager: Literal["virtualenv", "uv"] = em.UV, 233 extra_env: dict[str, str] | None = None, 234 capture_output: bool = False, 235 pip_requirements_override: list[str] | None = None, 236 ): 237 if env_manager not in {em.VIRTUALENV, em.UV}: 238 raise MlflowException.invalid_parameter_value( 239 f"Invalid value for `env_manager`: {env_manager}. " 240 f"Must be one of `{em.VIRTUALENV}, {em.UV}`" 241 ) 242 243 activate_cmd = _get_virtualenv_activate_cmd(env_dir) 244 if env_dir.exists(): 245 _logger.info(f"Environment {env_dir} already exists") 246 return activate_cmd 247 248 env_creation_extra_env = {} 249 if env_manager == em.VIRTUALENV: 250 python_bin_path = _install_python( 251 python_env.python, pyenv_root=python_install_dir, capture_output=capture_output 252 ) 253 _logger.info(f"Creating a new environment in {env_dir} with {python_bin_path}") 254 env_creation_cmd = [python_bin_path, "-m", "venv", env_dir] 255 install_deps_cmd_prefix = "python -m pip install" 256 elif env_manager == em.UV: 257 _logger.info( 258 f"Creating a new environment in {env_dir} with python " 259 f"version {python_env.python} using uv" 260 ) 261 env_creation_cmd = _get_uv_env_creation_command(env_dir, python_env.python) 262 install_deps_cmd_prefix = "uv pip install" 263 if python_install_dir: 264 # Setting `UV_PYTHON_INSTALL_DIR` to make `uv env` install python into 265 # the directory it points to. 266 env_creation_extra_env["UV_PYTHON_INSTALL_DIR"] = python_install_dir 267 if _MLFLOW_TESTING.get(): 268 os.environ["RUST_LOG"] = "uv=debug" 269 with remove_on_error( 270 env_dir, 271 onerror=lambda e: _logger.warning( 272 "Encountered an unexpected error: %s while creating a virtualenv environment in %s, " 273 "removing the environment directory...", 274 repr(e), 275 env_dir, 276 ), 277 ): 278 _exec_cmd( 279 env_creation_cmd, 280 capture_output=capture_output, 281 extra_env=env_creation_extra_env, 282 ) 283 284 # Use UV sync if model has uv.lock artifact and using UV env manager 285 if env_manager == em.UV and has_uv_lock_artifact(local_model_path): 286 _logger.info("Found uv.lock artifact, restoring environment with uv sync") 287 if not setup_uv_sync_environment(env_dir, local_model_path, python_env.python): 288 raise MlflowException( 289 "Failed to set up uv sync environment. Ensure the model's uv.lock " 290 "and pyproject.toml artifacts are valid." 291 ) 292 if not run_uv_sync(env_dir, capture_output=capture_output): 293 raise MlflowException( 294 "Failed to restore model environment using uv sync. Ensure that uv is " 295 "installed and the model's uv.lock artifact is valid. To install " 296 "dependencies with pip instead, set the env_manager parameter to " 297 "'virtualenv' instead of 'uv'." 298 ) 299 _logger.info("UV sync completed successfully") 300 else: 301 _logger.info("Installing dependencies") 302 for deps in filter(None, [python_env.build_dependencies, python_env.dependencies]): 303 with tempfile.TemporaryDirectory() as tmpdir: 304 # Create a temporary requirements file in the model directory to resolve the 305 # references in it correctly. To do this, we must first symlink or copy the 306 # model directory's contents to a temporary location for compatibility with 307 # deployment tools that store models in a read-only mount 308 try: 309 for model_item in os.listdir(local_model_path): 310 os.symlink( 311 src=os.path.join(local_model_path, model_item), 312 dst=os.path.join(tmpdir, model_item), 313 ) 314 except Exception as e: 315 _logger.warning( 316 "Failed to symlink model directory during dependency installation" 317 " Copying instead. Exception: %s", 318 e, 319 ) 320 _copy_model_to_writeable_destination(local_model_path, tmpdir) 321 322 tmp_req_file = f"requirements.{uuid.uuid4().hex}.txt" 323 Path(tmpdir).joinpath(tmp_req_file).write_text("\n".join(deps)) 324 cmd = _join_commands( 325 activate_cmd, f"{install_deps_cmd_prefix} -r {tmp_req_file}" 326 ) 327 _exec_cmd(cmd, capture_output=capture_output, cwd=tmpdir, extra_env=extra_env) 328 329 if pip_requirements_override: 330 _logger.info( 331 "Installing additional dependencies specified by " 332 f"pip_requirements_override: {pip_requirements_override}" 333 ) 334 cmd = _join_commands( 335 activate_cmd, 336 f"{install_deps_cmd_prefix} --quiet {' '.join(pip_requirements_override)}", 337 ) 338 _exec_cmd(cmd, capture_output=capture_output, extra_env=extra_env) 339 340 return activate_cmd 341 342 343 def _copy_model_to_writeable_destination(model_src, dst): 344 """ 345 Copies the specified `model_src` directory, which may be read-only, to the writeable `dst` 346 directory. 347 """ 348 os.makedirs(dst, exist_ok=True) 349 for model_item in os.listdir(model_src): 350 # Copy individual files and subdirectories, rather than using `shutil.copytree()` 351 # because `shutil.copytree()` will apply the permissions from the source directory, 352 # which may be read-only 353 copy_fn = shutil.copytree if os.path.isdir(model_item) else shutil.copy2 354 355 copy_fn( 356 src=os.path.join(model_src, model_item), 357 dst=os.path.join(dst, model_item), 358 ) 359 360 361 def _get_virtualenv_extra_env_vars(env_root_dir=None): 362 extra_env = { 363 # PIP_NO_INPUT=1 makes pip run in non-interactive mode, 364 # otherwise pip might prompt "yes or no" and ask stdin input 365 "PIP_NO_INPUT": "1", 366 } 367 if env_root_dir is not None: 368 # Note: Both conda pip and virtualenv can use the pip cache directory. 369 extra_env["PIP_CACHE_DIR"] = os.path.join(env_root_dir, _PIP_CACHE_DIR) 370 return extra_env 371 372 373 _VIRTUALENV_ENVS_DIR = "virtualenv_envs" 374 _PYENV_ROOT_DIR = "pyenv_root" 375 376 377 def _get_or_create_virtualenv( 378 local_model_path, 379 env_id=None, 380 env_root_dir=None, 381 capture_output=False, 382 pip_requirements_override: list[str] | None = None, 383 env_manager: Literal["virtualenv", "uv"] = em.UV, 384 extra_envs: dict[str, str] | None = None, 385 ): 386 """Restores an MLflow model's environment in a virtual environment and returns a command 387 to activate it. 388 389 Args: 390 local_model_path: Local directory containing the model artifacts. 391 env_id: Optional string that is added to the contents of the yaml file before 392 calculating the hash. It can be used to distinguish environments that have the 393 same conda dependencies but are supposed to be different based on the context. 394 For example, when serving the model we may install additional dependencies to the 395 environment after the environment has been activated. 396 pip_requirements_override: If specified, install the specified python dependencies to 397 the environment (upgrade if already installed). 398 env_manager: Specifies the environment manager to use to create the environment. 399 Defaults to "uv". 400 extra_envs: If specified, a dictionary of extra environment variables will be passed to the 401 environment creation command. 402 403 .. tip:: 404 It is highly recommended to use "uv" as it has significant performance improvements 405 over "virtualenv". 406 407 Returns: 408 Command to activate the created virtual environment 409 (e.g. "source /path/to/bin/activate"). 410 411 """ 412 if env_manager == em.VIRTUALENV: 413 _validate_pyenv_is_available() 414 415 local_model_path = Path(local_model_path) 416 python_env = _get_python_env(local_model_path) 417 418 if env_root_dir is None: 419 virtual_envs_root_path = Path(_get_mlflow_virtualenv_root()) 420 python_install_dir = None 421 else: 422 virtual_envs_root_path = Path(env_root_dir) / _VIRTUALENV_ENVS_DIR 423 pyenv_root_path = Path(env_root_dir) / _PYENV_ROOT_DIR 424 pyenv_root_path.mkdir(parents=True, exist_ok=True) 425 python_install_dir = str(pyenv_root_path) 426 427 virtual_envs_root_path.mkdir(parents=True, exist_ok=True) 428 env_name = _get_virtualenv_name(python_env, local_model_path, env_id) 429 env_dir = virtual_envs_root_path / env_name 430 try: 431 env_dir.exists() 432 except PermissionError: 433 if is_in_databricks_runtime(): 434 # Updating env_name only doesn't work because the cluster may not have 435 # permission to access the original virtual_envs_root_path 436 virtual_envs_root_path = ( 437 Path(env_root_dir) / f"{_VIRTUALENV_ENVS_DIR}_{uuid.uuid4().hex[:8]}" 438 ) 439 virtual_envs_root_path.mkdir(parents=True, exist_ok=True) 440 env_dir = virtual_envs_root_path / env_name 441 else: 442 _logger.warning( 443 f"Existing virtual environment directory {env_dir} cannot be accessed " 444 "due to permission error. Check the permissions of the directory and " 445 "try again. If the issue persists, consider cleaning up the directory manually." 446 ) 447 raise 448 449 extra_envs = extra_envs or {} 450 extra_envs |= _get_virtualenv_extra_env_vars(env_root_dir) 451 452 # Create an environment 453 return _create_virtualenv( 454 local_model_path=local_model_path, 455 python_env=python_env, 456 env_dir=env_dir, 457 python_install_dir=python_install_dir, 458 env_manager=env_manager, 459 extra_env=extra_envs, 460 capture_output=capture_output, 461 pip_requirements_override=pip_requirements_override, 462 )