/ .devops / llama-server-rocm.Dockerfile
llama-server-rocm.Dockerfile
 1  ARG UBUNTU_VERSION=22.04
 2  
 3  # This needs to generally match the container host's environment.
 4  ARG ROCM_VERSION=5.6
 5  
 6  # Target the CUDA build image
 7  ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
 8  
 9  FROM ${BASE_ROCM_DEV_CONTAINER} as build
10  
11  # Unless otherwise specified, we make a fat build.
12  # List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
13  # This is mostly tied to rocBLAS supported archs.
14  ARG ROCM_DOCKER_ARCH=\
15      gfx803 \
16      gfx900 \
17      gfx906 \
18      gfx908 \
19      gfx90a \
20      gfx1010 \
21      gfx1030 \
22      gfx1100 \
23      gfx1101 \
24      gfx1102
25  
26  COPY requirements.txt   requirements.txt
27  COPY requirements       requirements
28  
29  RUN pip install --upgrade pip setuptools wheel \
30      && pip install -r requirements.txt
31  
32  WORKDIR /app
33  
34  COPY . .
35  
36  # Set nvcc architecture
37  ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
38  # Enable ROCm
39  ENV LLAMA_HIPBLAS=1
40  ENV CC=/opt/rocm/llvm/bin/clang
41  ENV CXX=/opt/rocm/llvm/bin/clang++
42  
43  # Enable cURL
44  ENV LLAMA_CURL=1
45  RUN apt-get update && \
46      apt-get install -y libcurl4-openssl-dev
47  
48  RUN make -j$(nproc) llama-server
49  
50  ENTRYPOINT [ "/app/llama-server" ]