/ .devops / llama-cli-cuda.Dockerfile
llama-cli-cuda.Dockerfile
 1  ARG UBUNTU_VERSION=22.04
 2  # This needs to generally match the container host's environment.
 3  ARG CUDA_VERSION=11.7.1
 4  # Target the CUDA build image
 5  ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
 6  # Target the CUDA runtime image
 7  ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
 8  
 9  FROM ${BASE_CUDA_DEV_CONTAINER} as build
10  
11  # Unless otherwise specified, we make a fat build.
12  ARG CUDA_DOCKER_ARCH=all
13  
14  RUN apt-get update && \
15      apt-get install -y build-essential git
16  
17  WORKDIR /app
18  
19  COPY . .
20  
21  # Set nvcc architecture
22  ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
23  # Enable CUDA
24  ENV LLAMA_CUDA=1
25  
26  RUN make -j$(nproc) llama-cli
27  
28  FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
29  
30  RUN apt-get update && \
31      apt-get install -y libgomp1
32  
33  COPY --from=build /app/llama-cli /llama-cli
34  
35  ENTRYPOINT [ "/llama-cli" ]