/ Dockerfile
Dockerfile
 1  # Dockerfile for RAG Ingestion & Retrieval Pipeline
 2  FROM python:3.12-slim
 3  
 4  # Set working directory
 5  WORKDIR /app
 6  
 7  # Install system dependencies
 8  RUN apt-get update && apt-get install -y \
 9      build-essential \
10      libgl1 \
11      libglib2.0-0 \
12      g++ \
13      libsm6 \
14      libxext6 \
15      libxrender-dev \
16      libgomp1 \
17      poppler-utils \
18      tesseract-ocr \
19      tesseract-ocr-eng \
20      ffmpeg \
21      && rm -rf /var/lib/apt/lists/*
22  
23  # Copy dependency file first (for better Docker layer caching)
24  COPY pyproject.toml .
25  
26  # Install Python dependencies from pyproject.toml BEFORE copying source code
27  # This way, dependencies are only reinstalled when pyproject.toml changes,
28  # not when source code changes.
29  #
30  # pip install -e . needs src/ to exist, so we create a minimal one temporarily.
31  # After installing dependencies + package, we remove only the package,
32  # keeping all dependencies installed.
33  RUN mkdir -p src && \
34      touch src/__init__.py && \
35      pip install --no-cache-dir --upgrade pip setuptools wheel && \
36      pip install --no-cache-dir -e . && \
37      pip uninstall -y rag-ingestion && \
38      rm -rf src
39  
40  # Copy source code (this layer only invalidates when source changes)
41  COPY src/ ./src/
42  
43  # Install the package in editable mode (fast: dependencies already installed)
44  RUN pip install --no-cache-dir -e . --no-deps
45  
46  # Set Python path
47  ENV PYTHONPATH=/app
48  
49  # Expose API port
50  EXPOSE 8000
51  
52  # Default command (can be overridden)
53  CMD ["python", "--version"]