/ Dockerfile
Dockerfile
1 # Dockerfile for RAG Ingestion & Retrieval Pipeline 2 FROM python:3.12-slim 3 4 # Set working directory 5 WORKDIR /app 6 7 # Install system dependencies 8 RUN apt-get update && apt-get install -y \ 9 build-essential \ 10 libgl1 \ 11 libglib2.0-0 \ 12 g++ \ 13 libsm6 \ 14 libxext6 \ 15 libxrender-dev \ 16 libgomp1 \ 17 poppler-utils \ 18 tesseract-ocr \ 19 tesseract-ocr-eng \ 20 ffmpeg \ 21 && rm -rf /var/lib/apt/lists/* 22 23 # Copy dependency file first (for better Docker layer caching) 24 COPY pyproject.toml . 25 26 # Install Python dependencies from pyproject.toml BEFORE copying source code 27 # This way, dependencies are only reinstalled when pyproject.toml changes, 28 # not when source code changes. 29 # 30 # pip install -e . needs src/ to exist, so we create a minimal one temporarily. 31 # After installing dependencies + package, we remove only the package, 32 # keeping all dependencies installed. 33 RUN mkdir -p src && \ 34 touch src/__init__.py && \ 35 pip install --no-cache-dir --upgrade pip setuptools wheel && \ 36 pip install --no-cache-dir -e . && \ 37 pip uninstall -y rag-ingestion && \ 38 rm -rf src 39 40 # Copy source code (this layer only invalidates when source changes) 41 COPY src/ ./src/ 42 43 # Install the package in editable mode (fast: dependencies already installed) 44 RUN pip install --no-cache-dir -e . --no-deps 45 46 # Set Python path 47 ENV PYTHONPATH=/app 48 49 # Expose API port 50 EXPOSE 8000 51 52 # Default command (can be overridden) 53 CMD ["python", "--version"]