/ mlflow / models / docker_utils.py
docker_utils.py
  1  import logging
  2  import os
  3  from subprocess import Popen
  4  from typing import Literal
  5  from urllib.parse import urlparse
  6  
  7  from packaging.version import Version
  8  
  9  from mlflow.environment_variables import MLFLOW_DOCKER_OPENJDK_VERSION
 10  from mlflow.utils import env_manager as em
 11  from mlflow.utils.file_utils import _copy_project
 12  from mlflow.version import VERSION
 13  
 14  _logger = logging.getLogger(__name__)
 15  
 16  UBUNTU_BASE_IMAGE = "ubuntu:22.04"
 17  PYTHON_SLIM_BASE_IMAGE = "python:{version}-slim"
 18  
 19  
 20  SETUP_PYENV = r"""# Setup pyenv
 21  RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \
 22      libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm \
 23      libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev
 24  RUN git clone \
 25      --depth 1 \
 26      --branch $(git ls-remote --tags --sort=v:refname https://github.com/pyenv/pyenv.git | grep -o -E 'v[1-9]+(\.[1-9]+)+$' | tail -1) \
 27      https://github.com/pyenv/pyenv.git /root/.pyenv
 28  ENV PYENV_ROOT="/root/.pyenv"
 29  ENV PATH="$PYENV_ROOT/bin:$PATH"
 30  RUN apt install -y software-properties-common \
 31      && apt update \
 32      && add-apt-repository -y ppa:deadsnakes/ppa \
 33      && apt update \
 34      && apt install -y python3.10 python3.10-distutils \
 35      # Remove python3-blinker to avoid pip uninstall conflicts
 36      && apt remove -y python3-blinker \
 37      && ln -s -f $(which python3.10) /usr/bin/python \
 38      && wget https://bootstrap.pypa.io/get-pip.py -O /tmp/get-pip.py \
 39      && python /tmp/get-pip.py
 40  """  # noqa: E501
 41  
 42  _DOCKERFILE_TEMPLATE = """# Build an image that can serve mlflow models.
 43  FROM {base_image}
 44  
 45  {setup_python_venv}
 46  
 47  {setup_java}
 48  
 49  WORKDIR /opt/mlflow
 50  
 51  {install_mlflow}
 52  
 53  {install_model_and_deps}
 54  
 55  ENV MLFLOW_DISABLE_ENV_CREATION={disable_env_creation}
 56  ENV ENABLE_MLSERVER={enable_mlserver}
 57  
 58  # granting read/write access and conditional execution authority to all child directories
 59  # and files to allow for deployment to AWS Sagemaker Serverless Endpoints
 60  # (see https://docs.aws.amazon.com/sagemaker/latest/dg/serverless-endpoints.html)
 61  RUN chmod o+rwX /opt/mlflow/
 62  
 63  # clean up apt cache to reduce image size
 64  RUN rm -rf /var/lib/apt/lists/*
 65  
 66  ENTRYPOINT ["python", "-c", "{entrypoint}"]
 67  """
 68  
 69  
 70  SETUP_MINICONDA = """# Setup miniconda
 71  RUN curl --fail -L https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh
 72  RUN bash ./miniconda.sh -b -p /miniconda && rm ./miniconda.sh
 73  ENV PATH="/miniconda/bin:$PATH"
 74  # Remove default channels to avoid `CondaToSNonInteractiveError`.
 75  # See https://github.com/mlflow/mlflow/pull/16752 for more details.
 76  RUN conda config --system --remove channels defaults && conda config --system --add channels conda-forge
 77  """  # noqa: E501
 78  
 79  
 80  def generate_dockerfile(
 81      output_dir: str,
 82      base_image: str,
 83      model_install_steps: str | None,
 84      entrypoint: str,
 85      env_manager: Literal["conda", "local", "virtualenv"] = em.CONDA,
 86      mlflow_home: str | None = None,
 87      enable_mlserver: bool = False,
 88      disable_env_creation_at_runtime: bool = True,
 89      install_java: bool | None = None,
 90  ):
 91      """
 92      Generates a Dockerfile that can be used to build a docker image, that serves ML model
 93      stored and tracked in MLflow.
 94      """
 95  
 96      setup_java_steps = ""
 97      setup_python_venv_steps = ""
 98      install_mlflow_steps = _pip_mlflow_install_step(output_dir, mlflow_home)
 99  
100      if base_image.startswith("python:"):
101          if install_java:
102              _logger.warning(
103                  "`install_java` option is not supported when using python base image, "
104                  "switch to UBUNTU_BASE_IMAGE to enable java installation."
105              )
106          setup_python_venv_steps = (
107              "RUN apt-get -y update && apt-get install -y --no-install-recommends nginx"
108          )
109  
110      elif base_image == UBUNTU_BASE_IMAGE:
111          setup_python_venv_steps = (
112              "RUN apt-get -y update && DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get install -y "
113              "--no-install-recommends wget curl nginx ca-certificates bzip2 build-essential cmake "
114              "git-core\n\n"
115          )
116          setup_python_venv_steps += SETUP_MINICONDA if env_manager == em.CONDA else SETUP_PYENV
117          if install_java is not False:
118              jdk_ver = MLFLOW_DOCKER_OPENJDK_VERSION.get()
119              setup_java_steps = (
120                  "# Setup Java\n"
121                  f"RUN apt-get install -y --no-install-recommends openjdk-{jdk_ver}-jdk maven\n"
122                  f"ENV JAVA_HOME=/usr/lib/jvm/java-{jdk_ver}-openjdk-amd64"
123              )
124  
125      with open(os.path.join(output_dir, "Dockerfile"), "w") as f:
126          f.write(
127              _DOCKERFILE_TEMPLATE.format(
128                  base_image=base_image,
129                  setup_python_venv=setup_python_venv_steps,
130                  setup_java=setup_java_steps,
131                  install_mlflow=install_mlflow_steps,
132                  install_model_and_deps=model_install_steps,
133                  entrypoint=entrypoint,
134                  enable_mlserver=enable_mlserver,
135                  disable_env_creation=disable_env_creation_at_runtime,
136              )
137          )
138  
139  
140  def _get_maven_proxy():
141      http_proxy = os.environ.get("http_proxy")
142      https_proxy = os.environ.get("https_proxy")
143      if not http_proxy or not https_proxy:
144          return ""
145  
146      # Expects proxies as either PROTOCOL://{USER}:{PASSWORD}@HOSTNAME:PORT
147      # or PROTOCOL://HOSTNAME:PORT
148      parsed_http_proxy = urlparse(http_proxy)
149      assert parsed_http_proxy.hostname is not None, "Invalid `http_proxy` hostname."
150      assert parsed_http_proxy.port is not None, f"Invalid proxy port: {parsed_http_proxy.port}"
151  
152      parsed_https_proxy = urlparse(https_proxy)
153      assert parsed_https_proxy.hostname is not None, "Invalid `https_proxy` hostname."
154      assert parsed_https_proxy.port is not None, f"Invalid proxy port: {parsed_https_proxy.port}"
155  
156      maven_proxy_options = (
157          "-DproxySet=true",
158          f"-Dhttp.proxyHost={parsed_http_proxy.hostname}",
159          f"-Dhttp.proxyPort={parsed_http_proxy.port}",
160          f"-Dhttps.proxyHost={parsed_https_proxy.hostname}",
161          f"-Dhttps.proxyPort={parsed_https_proxy.port}",
162          "-Dhttps.nonProxyHosts=repo.maven.apache.org",
163      )
164  
165      if parsed_http_proxy.username is None or parsed_http_proxy.password is None:
166          return " ".join(maven_proxy_options)
167  
168      return " ".join((
169          *maven_proxy_options,
170          f"-Dhttp.proxyUser={parsed_http_proxy.username}",
171          f"-Dhttp.proxyPassword={parsed_http_proxy.password}",
172      ))
173  
174  
175  def _pip_mlflow_install_step(dockerfile_context_dir, mlflow_home):
176      """
177      Get docker build commands for installing MLflow given a Docker context dir and optional source
178      directory
179      """
180      if mlflow_home:
181          mlflow_dir = _copy_project(
182              src_path=os.path.abspath(mlflow_home), dst_path=dockerfile_context_dir
183          )
184          return (
185              "# Install MLflow from local source\n"
186              f"COPY {mlflow_dir} /opt/mlflow\n"
187              "RUN pip install /opt/mlflow"
188          )
189      else:
190          # Dev version is not available on PyPI, install from GitHub instead
191          if Version(VERSION).is_devrelease:
192              return "# Install MLflow\nRUN pip install https://github.com/mlflow/mlflow/archive/refs/heads/master.zip"
193          return f"# Install MLflow\nRUN pip install mlflow=={VERSION}"
194  
195  
196  def build_image_from_context(context_dir: str, image_name: str):
197      import docker
198  
199      client = docker.from_env()
200      # In Docker < 19, `docker build` doesn't support the `--platform` option
201      is_platform_supported = int(client.version()["Version"].split(".")[0]) >= 19
202      # Enforcing the AMD64 architecture build for Apple M1 users
203      platform_option = ["--platform", "linux/amd64"] if is_platform_supported else []
204      commands = [
205          "docker",
206          "build",
207          "-t",
208          image_name,
209          "-f",
210          "Dockerfile",
211          *platform_option,
212          ".",
213      ]
214      proc = Popen(commands, cwd=context_dir)
215      if proc.wait():
216          raise RuntimeError("Docker build failed.")