docker_utils.py
1 import logging 2 import os 3 from subprocess import Popen 4 from typing import Literal 5 from urllib.parse import urlparse 6 7 from packaging.version import Version 8 9 from mlflow.environment_variables import MLFLOW_DOCKER_OPENJDK_VERSION 10 from mlflow.utils import env_manager as em 11 from mlflow.utils.file_utils import _copy_project 12 from mlflow.version import VERSION 13 14 _logger = logging.getLogger(__name__) 15 16 UBUNTU_BASE_IMAGE = "ubuntu:22.04" 17 PYTHON_SLIM_BASE_IMAGE = "python:{version}-slim" 18 19 20 SETUP_PYENV = r"""# Setup pyenv 21 RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \ 22 libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm \ 23 libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev 24 RUN git clone \ 25 --depth 1 \ 26 --branch $(git ls-remote --tags --sort=v:refname https://github.com/pyenv/pyenv.git | grep -o -E 'v[1-9]+(\.[1-9]+)+$' | tail -1) \ 27 https://github.com/pyenv/pyenv.git /root/.pyenv 28 ENV PYENV_ROOT="/root/.pyenv" 29 ENV PATH="$PYENV_ROOT/bin:$PATH" 30 RUN apt install -y software-properties-common \ 31 && apt update \ 32 && add-apt-repository -y ppa:deadsnakes/ppa \ 33 && apt update \ 34 && apt install -y python3.10 python3.10-distutils \ 35 # Remove python3-blinker to avoid pip uninstall conflicts 36 && apt remove -y python3-blinker \ 37 && ln -s -f $(which python3.10) /usr/bin/python \ 38 && wget https://bootstrap.pypa.io/get-pip.py -O /tmp/get-pip.py \ 39 && python /tmp/get-pip.py 40 """ # noqa: E501 41 42 _DOCKERFILE_TEMPLATE = """# Build an image that can serve mlflow models. 43 FROM {base_image} 44 45 {setup_python_venv} 46 47 {setup_java} 48 49 WORKDIR /opt/mlflow 50 51 {install_mlflow} 52 53 {install_model_and_deps} 54 55 ENV MLFLOW_DISABLE_ENV_CREATION={disable_env_creation} 56 ENV ENABLE_MLSERVER={enable_mlserver} 57 58 # granting read/write access and conditional execution authority to all child directories 59 # and files to allow for deployment to AWS Sagemaker Serverless Endpoints 60 # (see https://docs.aws.amazon.com/sagemaker/latest/dg/serverless-endpoints.html) 61 RUN chmod o+rwX /opt/mlflow/ 62 63 # clean up apt cache to reduce image size 64 RUN rm -rf /var/lib/apt/lists/* 65 66 ENTRYPOINT ["python", "-c", "{entrypoint}"] 67 """ 68 69 70 SETUP_MINICONDA = """# Setup miniconda 71 RUN curl --fail -L https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh 72 RUN bash ./miniconda.sh -b -p /miniconda && rm ./miniconda.sh 73 ENV PATH="/miniconda/bin:$PATH" 74 # Remove default channels to avoid `CondaToSNonInteractiveError`. 75 # See https://github.com/mlflow/mlflow/pull/16752 for more details. 76 RUN conda config --system --remove channels defaults && conda config --system --add channels conda-forge 77 """ # noqa: E501 78 79 80 def generate_dockerfile( 81 output_dir: str, 82 base_image: str, 83 model_install_steps: str | None, 84 entrypoint: str, 85 env_manager: Literal["conda", "local", "virtualenv"] = em.CONDA, 86 mlflow_home: str | None = None, 87 enable_mlserver: bool = False, 88 disable_env_creation_at_runtime: bool = True, 89 install_java: bool | None = None, 90 ): 91 """ 92 Generates a Dockerfile that can be used to build a docker image, that serves ML model 93 stored and tracked in MLflow. 94 """ 95 96 setup_java_steps = "" 97 setup_python_venv_steps = "" 98 install_mlflow_steps = _pip_mlflow_install_step(output_dir, mlflow_home) 99 100 if base_image.startswith("python:"): 101 if install_java: 102 _logger.warning( 103 "`install_java` option is not supported when using python base image, " 104 "switch to UBUNTU_BASE_IMAGE to enable java installation." 105 ) 106 setup_python_venv_steps = ( 107 "RUN apt-get -y update && apt-get install -y --no-install-recommends nginx" 108 ) 109 110 elif base_image == UBUNTU_BASE_IMAGE: 111 setup_python_venv_steps = ( 112 "RUN apt-get -y update && DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get install -y " 113 "--no-install-recommends wget curl nginx ca-certificates bzip2 build-essential cmake " 114 "git-core\n\n" 115 ) 116 setup_python_venv_steps += SETUP_MINICONDA if env_manager == em.CONDA else SETUP_PYENV 117 if install_java is not False: 118 jdk_ver = MLFLOW_DOCKER_OPENJDK_VERSION.get() 119 setup_java_steps = ( 120 "# Setup Java\n" 121 f"RUN apt-get install -y --no-install-recommends openjdk-{jdk_ver}-jdk maven\n" 122 f"ENV JAVA_HOME=/usr/lib/jvm/java-{jdk_ver}-openjdk-amd64" 123 ) 124 125 with open(os.path.join(output_dir, "Dockerfile"), "w") as f: 126 f.write( 127 _DOCKERFILE_TEMPLATE.format( 128 base_image=base_image, 129 setup_python_venv=setup_python_venv_steps, 130 setup_java=setup_java_steps, 131 install_mlflow=install_mlflow_steps, 132 install_model_and_deps=model_install_steps, 133 entrypoint=entrypoint, 134 enable_mlserver=enable_mlserver, 135 disable_env_creation=disable_env_creation_at_runtime, 136 ) 137 ) 138 139 140 def _get_maven_proxy(): 141 http_proxy = os.environ.get("http_proxy") 142 https_proxy = os.environ.get("https_proxy") 143 if not http_proxy or not https_proxy: 144 return "" 145 146 # Expects proxies as either PROTOCOL://{USER}:{PASSWORD}@HOSTNAME:PORT 147 # or PROTOCOL://HOSTNAME:PORT 148 parsed_http_proxy = urlparse(http_proxy) 149 assert parsed_http_proxy.hostname is not None, "Invalid `http_proxy` hostname." 150 assert parsed_http_proxy.port is not None, f"Invalid proxy port: {parsed_http_proxy.port}" 151 152 parsed_https_proxy = urlparse(https_proxy) 153 assert parsed_https_proxy.hostname is not None, "Invalid `https_proxy` hostname." 154 assert parsed_https_proxy.port is not None, f"Invalid proxy port: {parsed_https_proxy.port}" 155 156 maven_proxy_options = ( 157 "-DproxySet=true", 158 f"-Dhttp.proxyHost={parsed_http_proxy.hostname}", 159 f"-Dhttp.proxyPort={parsed_http_proxy.port}", 160 f"-Dhttps.proxyHost={parsed_https_proxy.hostname}", 161 f"-Dhttps.proxyPort={parsed_https_proxy.port}", 162 "-Dhttps.nonProxyHosts=repo.maven.apache.org", 163 ) 164 165 if parsed_http_proxy.username is None or parsed_http_proxy.password is None: 166 return " ".join(maven_proxy_options) 167 168 return " ".join(( 169 *maven_proxy_options, 170 f"-Dhttp.proxyUser={parsed_http_proxy.username}", 171 f"-Dhttp.proxyPassword={parsed_http_proxy.password}", 172 )) 173 174 175 def _pip_mlflow_install_step(dockerfile_context_dir, mlflow_home): 176 """ 177 Get docker build commands for installing MLflow given a Docker context dir and optional source 178 directory 179 """ 180 if mlflow_home: 181 mlflow_dir = _copy_project( 182 src_path=os.path.abspath(mlflow_home), dst_path=dockerfile_context_dir 183 ) 184 return ( 185 "# Install MLflow from local source\n" 186 f"COPY {mlflow_dir} /opt/mlflow\n" 187 "RUN pip install /opt/mlflow" 188 ) 189 else: 190 # Dev version is not available on PyPI, install from GitHub instead 191 if Version(VERSION).is_devrelease: 192 return "# Install MLflow\nRUN pip install https://github.com/mlflow/mlflow/archive/refs/heads/master.zip" 193 return f"# Install MLflow\nRUN pip install mlflow=={VERSION}" 194 195 196 def build_image_from_context(context_dir: str, image_name: str): 197 import docker 198 199 client = docker.from_env() 200 # In Docker < 19, `docker build` doesn't support the `--platform` option 201 is_platform_supported = int(client.version()["Version"].split(".")[0]) >= 19 202 # Enforcing the AMD64 architecture build for Apple M1 users 203 platform_option = ["--platform", "linux/amd64"] if is_platform_supported else [] 204 commands = [ 205 "docker", 206 "build", 207 "-t", 208 image_name, 209 "-f", 210 "Dockerfile", 211 *platform_option, 212 ".", 213 ] 214 proc = Popen(commands, cwd=context_dir) 215 if proc.wait(): 216 raise RuntimeError("Docker build failed.")