/ docker-compose / docker-compose.yml
docker-compose.yml
  1  volumes:
  2    pgdata:
  3    storage-data:
  4  
  5  services:
  6    postgres:
  7      image: postgres:15
  8      container_name: mlflow-postgres
  9      environment:
 10        POSTGRES_USER: ${POSTGRES_USER}
 11        POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
 12        POSTGRES_DB: ${POSTGRES_DB}
 13        PGPORT: ${PGPORT}
 14      volumes:
 15        - pgdata:/var/lib/postgresql/data
 16      ports:
 17        - ${PGPORT}:${PGPORT}
 18      healthcheck:
 19        test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB} -p ${PGPORT}"]
 20        interval: 5s
 21        timeout: 3s
 22        retries: 10
 23  
 24    storage:
 25      image: rustfs/rustfs:1.0.0-alpha.83
 26      container_name: storage
 27      environment:
 28        RUSTFS_ADDRESS: :9000
 29        RUSTFS_SERVER_DOMAINS: storage:9000
 30        RUSTFS_REGION: ${AWS_DEFAULT_REGION:-us-east-1}
 31        RUSTFS_ACCESS_KEY: ${AWS_ACCESS_KEY_ID:-s3admin}
 32        RUSTFS_SECRET_KEY: ${AWS_SECRET_ACCESS_KEY:-s3admin}
 33        RUSTFS_CONSOLE_ENABLE: ${RUSTFS_CONSOLE_ENABLE:-true}
 34      ports:
 35        - "9000:9000"
 36        - "9001:9001"
 37      volumes:
 38        - storage-data:/data
 39      restart: unless-stopped
 40      healthcheck:
 41        test: ["CMD-SHELL", 'curl -s http://127.0.0.1:9000/health | grep -q ''"status":"ok"''']
 42        interval: 10s
 43        timeout: 5s
 44        retries: 5
 45        start_period: 10s
 46  
 47    create-bucket:
 48      image: amazon/aws-cli:2.33.25
 49      container_name: mlflow-create-bucket
 50      depends_on:
 51        storage:
 52          condition: service_healthy
 53      entrypoint: >
 54        /bin/sh -c "
 55          set -e;
 56          echo 'Waiting for S3 gateway getting ready...';
 57          if aws --endpoint-url=${MLFLOW_S3_ENDPOINT_URL} s3api head-bucket --bucket ${S3_BUCKET} 2>/dev/null; then
 58            echo 'Bucket ${S3_BUCKET} already exists. Skipping creation.';
 59          else
 60            echo 'Creating bucket ${S3_BUCKET}...';
 61            aws --endpoint-url=${MLFLOW_S3_ENDPOINT_URL} s3api create-bucket --bucket ${S3_BUCKET} --region ${AWS_DEFAULT_REGION};
 62          fi
 63        "
 64      environment:
 65        AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID}
 66        AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY}
 67        AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION}
 68        AWS_S3_ADDRESSING_STYLE: path
 69        MLFLOW_S3_ENDPOINT_URL: ${MLFLOW_S3_ENDPOINT_URL}
 70        S3_BUCKET: ${S3_BUCKET}
 71      restart: "no"
 72  
 73    mlflow:
 74      image: ghcr.io/mlflow/mlflow:${MLFLOW_VERSION}
 75      container_name: mlflow-server
 76      depends_on:
 77        postgres:
 78          condition: service_healthy
 79        storage:
 80          condition: service_healthy
 81        create-bucket:
 82          condition: service_completed_successfully
 83      environment:
 84        # Backend store URI built from vars
 85        MLFLOW_BACKEND_STORE_URI: ${MLFLOW_BACKEND_STORE_URI}
 86  
 87        # S3/RustFS settings
 88        MLFLOW_S3_ENDPOINT_URL: ${MLFLOW_S3_ENDPOINT_URL}
 89        MLFLOW_ARTIFACTS_DESTINATION: ${MLFLOW_ARTIFACTS_DESTINATION}
 90        AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID}
 91        AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY}
 92        AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION}
 93        MLFLOW_S3_IGNORE_TLS: "true"
 94  
 95        # Server host/port
 96        MLFLOW_HOST: ${MLFLOW_HOST}
 97        MLFLOW_PORT: ${MLFLOW_PORT}
 98      command:
 99        - /bin/bash
100        - -c
101        - |
102          pip install --no-cache-dir psycopg2-binary boto3
103          mlflow server \
104            --backend-store-uri "${MLFLOW_BACKEND_STORE_URI}" \
105            --artifacts-destination "${MLFLOW_ARTIFACTS_DESTINATION}" \
106            --serve-artifacts \
107            --host "${MLFLOW_HOST}" \
108            --port "${MLFLOW_PORT}"
109      ports:
110        - "${MLFLOW_PORT}:${MLFLOW_PORT}"
111      healthcheck:
112        test:
113          [
114            "CMD",
115            "python",
116            "-c",
117            "import urllib.request; urllib.request.urlopen('http://localhost:${MLFLOW_PORT}/health')",
118          ]
119        interval: 10s
120        timeout: 5s
121        retries: 30
122  
123  networks:
124    default:
125      name: mlflow-network