/ pyproject.toml
pyproject.toml
1 [project] 2 name = "rag-ingestion" 3 version = "0.1.0" 4 description = "Utilities for ingesting PDFs into a RAG pipeline." 5 readme = "README.md" 6 requires-python = ">=3.10" 7 dependencies = [ 8 "langchain-core>=0.3.0", 9 "langchain-text-splitters>=0.3.0", 10 "langchain-community>=0.3.0", 11 "langchain-openai>=0.1.0", 12 "pymupdf>=1.24.9", 13 "pymupdf4llm>=0.0.17", 14 "pydantic>=2.0.0", 15 "pydantic-settings>=2.0.0", 16 "pyyaml>=6.0", 17 "chromadb>=0.4.0", 18 "qdrant-client>=1.7.0", 19 "langchain-qdrant>=0.1.0", 20 "sentence-transformers>=2.2.0", 21 "numpy>=1.24.0,<2.0", 22 "langchain-huggingface>=0.0.1", 23 "google-genai>=1.0.0", 24 "langchain-google-genai>=2.1.10", 25 "docling==2.64.0", 26 "fastapi>=0.104.0", 27 "openai-whisper>=20231117", 28 "uvicorn[standard]>=0.24.0", 29 "rapidfuzz>=3.0.0", 30 "boto3>=1.42.24", 31 ] 32 33 [project.optional-dependencies] 34 dev = [ 35 "ipykernel>=6.29.0", 36 "jupyter>=1.0.0", 37 "ruff>=0.1.0", 38 "pre-commit>=3.0.0", 39 ] 40 openai = [ 41 "openai>=1.0.0", # For OpenAI embeddings 42 "tiktoken>=0.5.0", # Required by OpenAIEmbeddings for token counting 43 ] 44 45 [build-system] 46 requires = ["setuptools>=68.0"] 47 build-backend = "setuptools.build_meta" 48 49 [tool.setuptools.packages.find] 50 where = ["src"] 51 52 [tool.ruff] 53 # Line length matching the project style 54 line-length = 100 55 target-version = "py312" 56 57 # Exclude common directories 58 exclude = [ 59 ".git", 60 "__pycache__", 61 ".venv", 62 "venv", 63 "env", 64 ".eggs", 65 "*.egg", 66 "build", 67 "dist", 68 ".pytest_cache", 69 ".mypy_cache", 70 ".ruff_cache", 71 ] 72 73 [tool.ruff.lint] 74 # Ignore specific rules (must be before select) 75 ignore = [ 76 "E501", # Line too long (handled by formatter) 77 "E402", # Module level import not at top of file (docstrings should come first per PEP 257) 78 "ARG002", # Unused method arguments (part of protocol interfaces, some implementations don't use them) 79 "PLR0913", # Too many arguments 80 "PLR2004", # Magic value used in comparison 81 "TRY003", # Avoid specifying long messages outside exception class 82 "TRY301", # Abstract raise to inner function (prefer direct raise for clarity) 83 "TRY300", # Consider moving statement to else block (prefer early return pattern) 84 "PLW0603", # Using global statement (needed for singleton pattern) 85 "PLW0602", # Using global for lock (needed for singleton pattern) 86 ] 87 88 # Enable specific rule sets 89 select = [ 90 "E", # pycodestyle errors 91 "W", # pycodestyle warnings 92 "F", # pyflakes 93 "I", # isort 94 "B", # flake8-bugbear 95 "C4", # flake8-comprehensions 96 "UP", # pyupgrade 97 "ARG", # flake8-unused-arguments 98 "SIM", # flake8-simplify 99 "PTH", # flake8-use-pathlib 100 "ERA", # eradicate (commented-out code) 101 "PD", # pandas-vet 102 "PL", # Pylint 103 "TRY", # tryceratops 104 "RUF", # Ruff-specific rules 105 ] 106 107 # Per-file ignores 108 [tool.ruff.lint.per-file-ignores] 109 "src/__init__.py" = ["RUF022"] # Allow unsorted __all__ to preserve comments 110 111 [tool.ruff.lint.isort] 112 known-first-party = ["src"] 113 114 [tool.ruff.lint.pylint] 115 max-args = 10 116 max-branches = 20 117 max-returns = 10 118 max-statements = 50 119