llama-server-rocm.Dockerfile
1 ARG UBUNTU_VERSION=22.04 2 3 # This needs to generally match the container host's environment. 4 ARG ROCM_VERSION=5.6 5 6 # Target the CUDA build image 7 ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete 8 9 FROM ${BASE_ROCM_DEV_CONTAINER} as build 10 11 # Unless otherwise specified, we make a fat build. 12 # List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 13 # This is mostly tied to rocBLAS supported archs. 14 ARG ROCM_DOCKER_ARCH=\ 15 gfx803 \ 16 gfx900 \ 17 gfx906 \ 18 gfx908 \ 19 gfx90a \ 20 gfx1010 \ 21 gfx1030 \ 22 gfx1100 \ 23 gfx1101 \ 24 gfx1102 25 26 COPY requirements.txt requirements.txt 27 COPY requirements requirements 28 29 RUN pip install --upgrade pip setuptools wheel \ 30 && pip install -r requirements.txt 31 32 WORKDIR /app 33 34 COPY . . 35 36 # Set nvcc architecture 37 ENV GPU_TARGETS=${ROCM_DOCKER_ARCH} 38 # Enable ROCm 39 ENV LLAMA_HIPBLAS=1 40 ENV CC=/opt/rocm/llvm/bin/clang 41 ENV CXX=/opt/rocm/llvm/bin/clang++ 42 43 # Enable cURL 44 ENV LLAMA_CURL=1 45 RUN apt-get update && \ 46 apt-get install -y libcurl4-openssl-dev 47 48 RUN make -j$(nproc) llama-server 49 50 ENTRYPOINT [ "/app/llama-server" ]