/ pyproject.toml
pyproject.toml
  1  [build-system]
  2  requires = ["hatchling>=1.8.0"]
  3  build-backend = "hatchling.build"
  4  
  5  [project]
  6  name = "haystack-ai"
  7  dynamic = ["version"]
  8  description = "LLM framework to build customizable, production-ready LLM applications. Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data."
  9  readme = "README.md"
 10  license = "Apache-2.0"
 11  requires-python = ">=3.10"
 12  authors = [{ name = "deepset.ai", email = "malte.pietsch@deepset.ai" }]
 13  keywords = [
 14    "BERT",
 15    "QA",
 16    "Question-Answering",
 17    "Reader",
 18    "Retriever",
 19    "albert",
 20    "language-model",
 21    "mrc",
 22    "roberta",
 23    "search",
 24    "semantic-search",
 25    "squad",
 26    "transfer-learning",
 27    "transformer",
 28  ]
 29  classifiers = [
 30    "Development Status :: 5 - Production/Stable",
 31    "Intended Audience :: Science/Research",
 32    "License :: Freely Distributable",
 33    "License :: OSI Approved :: Apache Software License",
 34    "Operating System :: OS Independent",
 35    "Programming Language :: Python",
 36    "Programming Language :: Python :: 3",
 37    "Programming Language :: Python :: 3.10",
 38    "Programming Language :: Python :: 3.11",
 39    "Programming Language :: Python :: 3.12",
 40    "Programming Language :: Python :: 3.13",
 41    "Programming Language :: Python :: 3.14",
 42    "Topic :: Scientific/Engineering :: Artificial Intelligence",
 43  ]
 44  dependencies = [
 45    "tqdm",
 46    "tenacity!=8.4.0",
 47    "lazy-imports",
 48    "openai>=1.99.2",
 49    "pydantic",
 50    "Jinja2",
 51    "MarkupSafe",             # already required by Jinja2 but used directly in templatize_part
 52    "posthog!=3.12.0",        # telemetry # 3.12.0 was problematic https://github.com/PostHog/posthog-python/issues/187
 53    "pyyaml",
 54    "more-itertools",         # TextDocumentSplitter
 55    "networkx",               # Pipeline graphs
 56    "typing_extensions>=4.7", # Extended typing features (NotRequired, etc.)
 57    "httpx",
 58    "numpy",
 59    "python-dateutil",
 60    "jsonschema",             # JsonSchemaValidator, Tool
 61    "docstring-parser",       # ComponentTool
 62    "filetype",               # MIME type guessing for ImageContent
 63    "haystack-experimental",
 64  ]
 65  
 66  [tool.hatch.envs.default]
 67  installer = "uv"
 68  dependencies = [
 69    "pre-commit",
 70    "ruff>=0.15.0",  # RUF104 (unmatched-suppression-comment) added in 0.15.0
 71    "toml",
 72    "reno",
 73    # dulwich is a reno dependency, they pin it at >=0.15.0 so pip takes ton of time to resolve the dependency tree.
 74    # We pin it here to avoid taking too much time.
 75    # https://opendev.org/openstack/reno/src/branch/master/requirements.txt#L7
 76    "dulwich>=0.21.0,<1.0.0",
 77    "haystack-pydoc-tools",
 78  ]
 79  
 80  [tool.hatch.envs.default.scripts]
 81  release-note = "reno new {args}"
 82  fmt = "ruff check --fix {args}; ruff format {args}"
 83  fmt-check = "ruff check {args} && ruff format --check {args}"
 84  docs = "haystack-pydoc pydoc tmp_api_reference"
 85  
 86  [tool.hatch.envs.test]
 87  
 88  # we override dependencies from the default environment
 89  dependencies = [
 90    "numpy>=2", # Haystack is compatible both with numpy 1.x and 2.x, but we test with 2.x
 91    "numba>=0.54.0", # This pin helps uv resolve the dependency tree. See https://github.com/astral-sh/uv/issues/7881
 92  
 93    "pandas",                                           # AzureOCRDocumentConverter, CSVDocumentCleaner, CSVDocumentSplitter,
 94                                                        # EvaluationRunResult, XLSXToDocument, and pipeline tests
 95  
 96    "transformers[torch, sentencepiece]>=4.57",         # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators...
 97    "huggingface_hub>=0.27.0",                          # Hugging Face API Generators and Embedders
 98    "sentence-transformers>=5.0.0",                     # Sentence Transformers Embedders, Rankers, and SASEvaluator
 99    "langdetect",                                       # TextLanguageRouter and DocumentLanguageClassifier
100    "openai-whisper>=20231106",                         # LocalWhisperTranscriber
101    "arrow>=1.3.0",                                     # Jinja2TimeExtension
102    "pillow",                                           # ImageContent
103    "pypdfium2",                                        # PDFToImageContent
104  
105  
106    # Converters
107    "pypdf",                            # PyPDFToDocument
108    "pdfminer.six",                     # PDFMinerToDocument
109    "markdown-it-py",                   # MarkdownToDocument
110    "mdit_plain",                       # MarkdownToDocument
111    "tika",                             # TikaDocumentConverter
112    "azure-ai-formrecognizer>=3.2.0b2", # AzureOCRDocumentConverter
113    "trafilatura",                      # HTMLToDocument
114    "python-pptx",                      # PPTXToDocument
115    "python-docx",                      # DocxToDocument
116    "jq",                               # JSONConverter
117    "openpyxl",                         # XLSXToDocument
118    "tabulate",                         # XLSXToDocument
119    "python-oxmsg",                     # MSGToDocument
120  
121    "nltk>=3.9.1", # NLTKDocumentSplitter, RecursiveDocumentSplitter
122    "tiktoken", # RecursiveDocumentSplitter
123  
124    # Human in the Loop
125    "rich",                 # Console rendering for HITL
126  
127    # OpenAPI
128    "jsonref",              # OpenAPIServiceConnector, OpenAPIServiceToFunctions
129    "openapi3",
130    "openapi-llm>=0.4.1",   # OpenAPIConnector
131  
132    # Tracing
133    "opentelemetry-sdk",
134    "ddtrace",
135  
136    # Structured logging
137    "structlog",
138  
139    # needed in link content fetcher tests
140    "httpx[http2]",
141  
142    # Azure Utils
143    "azure-identity",
144  
145    # Test
146    "pytest",
147    "pytest-bdd",
148    "pytest-cov",
149    "pytest-asyncio",
150    "pytest-rerunfailures",
151    "coverage",
152    "mypy",
153    "pip",                     # mypy needs pip to install missing stub packages
154    "ipython",
155    "colorama==0.4.6",         # Pipeline checkpoints test
156  ]
157  
158  [tool.hatch.envs.test.scripts]
159  unit = 'pytest --cov-report xml:coverage.xml --cov="haystack" -m "not integration" {args:test}'
160  integration = 'pytest --maxfail=5 -m "integration" {args:test}'
161  integration-only-fast = 'pytest --maxfail=5 -m "integration and not slow" {args:test}'
162  integration-only-slow = 'pytest --maxfail=5 -m "integration and slow" {args:test}'
163  all = 'pytest {args:test}'
164  
165  # TODO We want to eventually type the whole test folder
166  types = "mypy --install-types --non-interactive --cache-dir=.mypy_cache/ {args:haystack test/core/ test/marshal/ test/testing/ test/tracing/}"
167  
168  [tool.hatch.envs.e2e]
169  template = "test"
170  extra-dependencies = [
171    # NamedEntityExtractor
172    "spacy>=3.8.13,<3.9",
173    "en-core-web-trf @ https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.8.0/en_core_web_trf-3.8.0-py3-none-any.whl",
174  ]
175  
176  [tool.hatch.envs.e2e.scripts]
177  test = "pytest {args:e2e}"
178  
179  [project.urls]
180  "CI: GitHub" = "https://github.com/deepset-ai/haystack/actions"
181  "Docs: RTD" = "https://haystack.deepset.ai/overview/intro"
182  "GitHub: issues" = "https://github.com/deepset-ai/haystack/issues"
183  "GitHub: repo" = "https://github.com/deepset-ai/haystack"
184  Homepage = "https://github.com/deepset-ai/haystack"
185  
186  [tool.hatch.version]
187  path = "VERSION.txt"
188  pattern = "(?P<version>.+)"
189  
190  [tool.hatch.metadata]
191  allow-direct-references = true
192  
193  [tool.hatch.build.targets.sdist]
194  include = ["/haystack", "/VERSION.txt"]
195  
196  [tool.hatch.build.targets.wheel]
197  packages = ["haystack"]
198  
199  [tool.codespell]
200  ignore-words-list = "ans,astroid,nd,ned,nin,ue,rouge,ist, Claus,SME"
201  quiet-level = 3
202  skip = "./test,./e2e"
203  
204  [tool.pytest.ini_options]
205  minversion = "6.0"
206  addopts = "--strict-markers"
207  markers = [
208    "unit: unit tests",
209    "integration: integration tests",
210  
211    # integration tests that are slow (e.g. model inference on CPU), unstable (e.g. call unstable external services)
212    # or require special setup (e.g. installing system dependencies, running Docker containers)
213    "slow: slow/unstable integration tests",
214  ]
215  log_cli = true
216  asyncio_mode = "auto"
217  asyncio_default_fixture_loop_scope = "class"
218  
219  [tool.mypy]
220  python_version = "3.10"
221  check_untyped_defs = true
222  disallow_incomplete_defs = true
223  ignore_missing_imports = true
224  warn_unused_ignores = true
225  warn_redundant_casts = true
226  warn_return_any = false
227  
228  
229  [[tool.mypy.overrides]]
230  # TODO: Fix testing typings
231  module = ["haystack.testing.*"]
232  disallow_incomplete_defs = false
233  
234  [tool.ruff]
235  exclude = ["e2e/**", "proposals"]
236  line-length = 120
237  
238  
239  [tool.ruff.format]
240  skip-magic-trailing-comma = true
241  
242  [tool.ruff.lint.isort]
243  split-on-trailing-comma = false
244  
245  [tool.ruff.lint.mccabe]
246  max-complexity = 20   # Default is 10 for Ruff
247  
248  [tool.ruff.lint.pylint]
249  allow-magic-value-types = ["float", "int", "str"]
250  max-args = 13                                     # Default is 5
251  max-branches = 21                                 # Default is 12
252  max-public-methods = 20                           # Default is 20
253  max-returns = 7                                   # Default is 6
254  max-statements = 56                               # Default is 50
255  
256  [tool.ruff.lint]
257  select = [
258    "A001",   # builtin-variable-shadowing
259    "A002",   # builtin-argument-shadowing
260    "A003",   # builtin-attribute-shadowing
261    "ANN",    # flake8-annotations
262    "ARG",    # flake8-unused-arguments
263    "ASYNC",  # flake8-async
264    "B",      # flake8-bugbear
265    "BLE",    # flake8-blind-except
266    "C4",     # flake8-comprehensions
267    "C90",    # mccabe complexity
268    "COM818", # trailing-comma-on-bare-tuple
269    "D102",   # Missing docstring in public method
270    "D103",   # Missing docstring in public function
271    "D205",   # 1 blank line required between summary line and description
272    "D209",   # Closing triple quotes go to new line
273    "D213",   # summary lines must be positioned on the second physical line of the docstring
274    "D417",   # undocumented-parameter
275    "D419",   # undocumented-returns
276    "E",      # pycodestyle errors
277    "EXE",    # flake8-executable
278    "F",      # pyflakes
279    "G001",   # logging-format-interpolation
280    "G002",   # logging-percent-format
281    "G004",   # logging-f-string
282    "I",      # isort
283    "ISC001", # implicit-string-concatenation
284    "INT",    # flake8-gettext
285    "PERF",   # perflint
286    "PIE",    # flake8-pie
287    "PL",     # pylint
288    "RET",    # flake8-return
289    "RUF104", # unmatched-suppression-comment
290    "S102",   # flake8-bandit (exec-builtin)
291    "S307",   # flake8-bandit (eval)
292    "SIM",    # flake8-simplify
293    "SLOT",   # flake8-slots
294    "T10",    # flake8-debugger
295    "TID252", # relative-imports
296    "TRY",    # tryceratops
297    "UP",     # pyupgrade
298    "W",      # pycodestyle warnings
299    "YTT",    # flake8-2020
300  ]
301  
302  ignore = [
303    "ANN401",  # Allow `Any` - used legitimately for dynamic types and SDK boundaries
304    "B008",    # function-call-as-argument-default
305    "B904",    # raise-without-from-inside-except
306    "BLE001",  # blind-except
307    "E721",    # type-comparison
308    "E722",    # bare-except
309    "PERF203", # try-except-in-loop
310    "PERF401", # manual-list-comprehension
311    "PIE790",  # unnecessary-pass
312  
313    # we re-export symbols in __init__ for correct type checking
314    # https://typing.python.org/en/latest/spec/distributing.html#import-conventions
315    "PLC0414", # useless-import-alias
316  
317    "PLC0415", # import-outside-top-level
318    "PLR1714", # repeated-equality-comparison
319    "PLW0603", # global-statement
320    "PLW1514", # unspecified-encoding
321    "PLW2901", # redefined-loop-name
322    "RET505",  # superfluous-else-return
323    "SIM108",  # if-else-block-instead-of-ternary
324    "SIM109",  # multiple-comparisons-with-in
325    "SIM118",  # in-dict-keys
326    "TRY002",  # raise-vanilla-class
327    "TRY003",  # raise-vanilla-args
328    "TRY201",  # verbose-raise
329    "TRY300",  # try-consider-else
330    "UP008",   # super-with-arguments
331    "UP032",   # use-f-string
332    "UP037",   # quoted-annotation
333  ]
334  
335  [tool.ruff.lint.per-file-ignores]
336  "test/**" = [
337    "ANN",     # flake8-annotations
338    "ARG",     # flake8-unused-arguments
339    "B018",    # useless-expression
340    "D",       # pydocstyle (docstring checks)
341    "E711",    # none-comparison
342    "E731",    # lambda-assignment
343    "PLC0206", # dict-index-missing-items
344    "SIM105",  # suppressible-exception
345    "SIM117",  # multiple-with-statements
346  ]
347  # in the following files, we still allow old type hints to handle and test them
348  # UP006: generics like typing.List, typing.Dict
349  # UP007: Union[X, Y] syntax
350  # UP035: Deprecated import
351  # UP045: Optional[X] syntax
352  "haystack/testing/**" = ["ANN"]
353  "haystack/components/agents/state/state_utils.py" = ["UP007"]
354  "haystack/core/super_component/utils.py" = ["UP007"]
355  "haystack/core/type_utils.py" = ["UP007"]
356  "haystack/tools/parameters_schema_utils.py" = ["UP007"]
357  "test/components/agents/test_state_class.py" = ["UP006", "UP007", "UP035", "UP045"]
358  "test/components/converters/test_output_adapter.py" = ["UP006", "UP035"]
359  "test/components/joiners/test_list_joiner.py" = ["UP006", "UP035"]
360  "test/core/pipeline/test_type_syntax_compatibility.py" = ["UP007", "UP045"]
361  "test/core/super_component/test_utils.py" = ["UP007", "UP045"]
362  "test/core/test_type_utils.py" = ["UP006", "UP007", "UP035", "UP045"]
363  "test/tools/test_parameters_schema_utils.py" = ["UP007"]
364  "test/utils/test_type_serialization.py" = ["UP006", "UP007", "UP035", "UP045"]
365  
366  [tool.coverage.run]
367  omit = ["haystack/testing/*"]