haystack/pyproject.toml

[build-system]
requires = ["hatchling>=1.8.0"]
build-backend = "hatchling.build"

[project]
name = "haystack-ai"
dynamic = ["version"]
description = "LLM framework to build customizable, production-ready LLM applications. Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data."
readme = "README.md"
license = "Apache-2.0"
requires-python = ">=3.9"
authors = [{ name = "deepset.ai", email = "malte.pietsch@deepset.ai" }]
keywords = [
  "BERT",
  "QA",
  "Question-Answering",
  "Reader",
  "Retriever",
  "albert",
  "language-model",
  "mrc",
  "roberta",
  "search",
  "semantic-search",
  "squad",
  "transfer-learning",
  "transformer",
]
classifiers = [
  "Development Status :: 5 - Production/Stable",
  "Intended Audience :: Science/Research",
  "License :: Freely Distributable",
  "License :: OSI Approved :: Apache Software License",
  "Operating System :: OS Independent",
  "Programming Language :: Python",
  "Programming Language :: Python :: 3",
  "Programming Language :: Python :: 3.9",
  "Programming Language :: Python :: 3.10",
  "Programming Language :: Python :: 3.11",
  "Programming Language :: Python :: 3.12",
  "Programming Language :: Python :: 3.13",
  "Topic :: Scientific/Engineering :: Artificial Intelligence",
]
dependencies = [
  "tqdm",
  "tenacity!=8.4.0",
  "lazy-imports",
  "openai>=1.56.1",
  "pydantic",
  "Jinja2",
  "posthog!=3.12.0",        # telemetry # 3.12.0 was problematic https://github.com/PostHog/posthog-python/issues/187
  "pyyaml",
  "more-itertools",         # TextDocumentSplitter
  "networkx",               # Pipeline graphs
  "typing_extensions>=4.7", # typing support for Python 3.9
  "requests",
  "numpy",
  "python-dateutil",
  "jsonschema",             # JsonSchemaValidator, Tool
  "docstring-parser",       # ComponentTool
  "haystack-experimental",
]

[tool.hatch.envs.default]
installer = "uv"
dependencies = [
  "pre-commit",
  "ruff",
  "toml",
  "reno",
  # dulwich is a reno dependency, they pin it at >=0.15.0 so pip takes ton of time to resolve the dependency tree.
  # We pin it here to avoid taking too much time.
  # https://opendev.org/openstack/reno/src/branch/master/requirements.txt#L7
  "dulwich>=0.21.0,<1.0.0",
]

[tool.hatch.envs.default.scripts]
release-note = "reno new {args}"
fmt = "ruff check --fix {args} && ruff format {args}"
fmt-check = "ruff check {args} && ruff format --check {args}"

[tool.hatch.envs.test]

# we override dependencies from the default environment
dependencies = [
  "numpy>=2", # Haystack is compatible both with numpy 1.x and 2.x, but we test with 2.x
  "numba>=0.54.0", # This pin helps uv resolve the dependency tree. See https://github.com/astral-sh/uv/issues/7881

  "pandas",                                    # AzureOCRDocumentConverter, CSVDocumentCleaner, CSVDocumentSplitter,
                                               # EvaluationRunResult, XLSXToDocument, and pipeline tests

  "transformers[torch,sentencepiece]>=4.51.1,<4.52", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators...
  "huggingface_hub>=0.27.0",                   # Hugging Face API Generators and Embedders
  "sentence-transformers>=4.1.0",              # Sentence Transformers Embedders, Rankers, and SASEvaluator
  "langdetect",                                # TextLanguageRouter and DocumentLanguageClassifier
  "openai-whisper>=20231106",                  # LocalWhisperTranscriber
  "arrow>=1.3.0",                              # Jinja2TimeExtension


  # Converters
  "pypdf",                            # PyPDFToDocument
  "pdfminer.six",                     # PDFMinerToDocument
  "markdown-it-py",                   # MarkdownToDocument
  "mdit_plain",                       # MarkdownToDocument
  "tika",                             # TikaDocumentConverter
  "azure-ai-formrecognizer>=3.2.0b2", # AzureOCRDocumentConverter
  "trafilatura",                      # HTMLToDocument
  "python-pptx",                      # PPTXToDocument
  "python-docx",                      # DocxToDocument
  "jq",                               # JSONConverter
  "openpyxl",                         # XLSXToDocument
  "tabulate",                         # XLSXToDocument
  "python-oxmsg",                     # MSGToDocument

  "nltk>=3.9.1", # NLTKDocumentSplitter, RecursiveDocumentSplitter
  "tiktoken", # RecursiveDocumentSplitter

  # OpenAPI
  "jsonref",              # OpenAPIServiceConnector, OpenAPIServiceToFunctions
  "openapi3",
  "openapi-llm>=0.4.1",   # OpenAPIConnector

  # Tracing
  "opentelemetry-sdk",
  "ddtrace",

  # Structured logging
  "structlog",

  # needed in link content fetcher tests
  "httpx[http2]",

  # Azure Utils
  "azure-identity",

  # Test
  "pytest",
  "pytest-bdd",
  "pytest-cov",
  "pytest-asyncio",
  "pytest-rerunfailures",
  "coverage",
  "mypy",
  "pip",                     # mypy needs pip to install missing stub packages
  "pylint",
  "ipython",
]

[tool.hatch.envs.test.scripts]
unit = 'pytest --cov-report xml:coverage.xml --cov="haystack" -m "not integration" {args:test}'
integration = 'pytest --maxfail=5 -m "integration" {args:test}'
integration-only-fast = 'pytest --maxfail=5 -m "integration and not slow" {args:test}'
integration-only-slow = 'pytest --maxfail=5 -m "integration and slow" {args:test}'
all = 'pytest {args:test}'

types = "mypy --install-types --non-interactive --cache-dir=.mypy_cache/ {args:haystack}"
lint = "pylint -ry -j 0 {args:haystack}"

[tool.hatch.envs.e2e]
template = "test"
extra-dependencies = [
  # NamedEntityExtractor
  "spacy>=3.8,<3.9",
  "en-core-web-trf @ https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.8.0/en_core_web_trf-3.8.0-py3-none-any.whl",
  # spacy requires thinc, which depends on blis. We pin blis because version 1.2.1 does not have wheels for python 3.9
  # and compiling it from source takes much time.
  "blis<1.2.1; python_version < '3.10'",
]

[tool.hatch.envs.e2e.scripts]
test = "pytest e2e"

[tool.hatch.envs.readme]
installer = "uv"
detached = true                         # To avoid installing the dependencies from the default environment
dependencies = ["haystack-pydoc-tools"]

[tool.hatch.envs.readme.scripts]
sync = "./.github/utils/pydoc-markdown.sh"
delete-outdated = "python ./.github/utils/delete_outdated_docs.py {args}"

[project.urls]
"CI: GitHub" = "https://github.com/deepset-ai/haystack/actions"
"Docs: RTD" = "https://haystack.deepset.ai/overview/intro"
"GitHub: issues" = "https://github.com/deepset-ai/haystack/issues"
"GitHub: repo" = "https://github.com/deepset-ai/haystack"
Homepage = "https://github.com/deepset-ai/haystack"

[tool.hatch.version]
path = "VERSION.txt"
pattern = "(?P<version>.+)"

[tool.hatch.metadata]
allow-direct-references = true

[tool.hatch.build.targets.sdist]
include = ["/haystack", "/VERSION.txt"]

[tool.hatch.build.targets.wheel]
packages = ["haystack"]

[tool.codespell]
ignore-words-list = "ans,astroid,nd,ned,nin,ue,rouge,ist, Claus"
quiet-level = 3
skip = "./test,./e2e"

[tool.pylint.'MESSAGES CONTROL']
max-line-length = 120
disable = [

  # To keep
  "fixme",
  "c-extension-no-member",

  # To review:
  "missing-docstring",
  "unused-argument",
  "no-member",
  "line-too-long",
  "protected-access",
  "too-few-public-methods",
  "raise-missing-from",
  "invalid-name",
  "duplicate-code",
  "arguments-differ",
  "consider-using-f-string",
  "no-else-return",
  "attribute-defined-outside-init",
  "super-with-arguments",
  "redefined-builtin",
  "abstract-method",
  "unspecified-encoding",
  "unidiomatic-typecheck",
  "no-name-in-module",
  "consider-using-with",
  "redefined-outer-name",
  "arguments-renamed",
  "unnecessary-pass",
  "broad-except",
  "unnecessary-comprehension",
  "subprocess-run-check",
  "singleton-comparison",
  "consider-iterating-dictionary",
  "undefined-loop-variable",
  "consider-using-in",
  "bare-except",
  "unexpected-keyword-arg",
  "simplifiable-if-expression",
  "use-list-literal",
  "broad-exception-raised",

  # To review later
  "cyclic-import",
  "import-outside-toplevel",
  "deprecated-method",
]
[tool.pylint.'DESIGN']
max-args = 38           # Default is 5
max-attributes = 28     # Default is 7
max-branches = 34       # Default is 12
max-locals = 45         # Default is 15
max-module-lines = 2468 # Default is 1000
max-nested-blocks = 9   # Default is 5
max-statements = 206    # Default is 50

[tool.pylint.'SIMILARITIES']
min-similarity-lines = 6

[tool.pytest.ini_options]
minversion = "6.0"
addopts = "--strict-markers"
markers = [
  "unit: unit tests",
  "integration: integration tests",

  # integration tests that are slow (e.g. model inference on CPU), unstable (e.g. call unstable external services)
  # or require special setup (e.g. installing system dependencies, running Docker containers)
  "slow: slow/unstable integration tests",
]
log_cli = true
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "class"

[tool.mypy]
python_version = "3.9"
disallow_incomplete_defs = true
warn_return_any = false
warn_unused_configs = true
ignore_missing_imports = true
check_untyped_defs = true

[[tool.mypy.overrides]]
# TODO: Fix component typings
module = ["haystack.components.*", "haystack.testing.*"]
disallow_incomplete_defs = false

[tool.ruff]
line-length = 120
exclude = [".github", "proposals"]

[tool.ruff.format]
skip-magic-trailing-comma = true

[tool.ruff.lint]
isort.split-on-trailing-comma = false
exclude = ["test/**", "e2e/**"]
select = [
  "ASYNC", # flake8-async
  "C4",    # flake8-comprehensions
  "C90",   # McCabe cyclomatic complexity
  "E501",  # Long lines
  "EXE",   # flake8-executable
  "F",     # Pyflakes
  "INT",   # flake8-gettext
  "PERF",  # Perflint
  "PL",    # Pylint
  "Q",     # flake8-quotes
  "SIM",   # flake8-simplify
  "SLOT",  # flake8-slots
  "T10",   # flake8-debugger
  "W",     # pycodestyle
  "YTT",   # flake8-2020
  "I",     # isort
  # built-in shadowing
  "A001", # builtin-variable-shadowing
  "A002", # builtin-argument-shadowing
  "A003", # builtin-attribute-shadowing
  # docstring rules
  "D102", # Missing docstring in public method
  "D103", # Missing docstring in public function
  "D209", # Closing triple quotes go to new line
  "D205", # 1 blank line required between summary line and description
  "D213", # summary lines must be positioned on the second physical line of the docstring
  "D417", # undocumented-parameter
  "D419", # undocumented-returns
]

ignore = [
  "F401",    # unused-import
  "PERF203", # `try`-`except` within a loop incurs performance overhead
  "PERF401", # Use a list comprehension to create a transformed list
  "PLR1714", # repeated-equality-comparison
  "PLR5501", # collapsible-else-if
  "PLW0603", # global-statement
  "PLW1510", # subprocess-run-without-check
  "PLW2901", # redefined-loop-name
  "SIM108",  # if-else-block-instead-of-if-exp
  "SIM115",  # open-file-with-context-handler
  "SIM118",  # in-dict-keys
]

[tool.ruff.lint.mccabe]
max-complexity = 28


[tool.ruff.lint.pylint]
allow-magic-value-types = ["float", "int", "str"]
max-args = 14                                     # Default is 5
max-branches = 21                                 # Default is 12
max-public-methods = 20                           # Default is 20
max-returns = 7                                   # Default is 6
max-statements = 60                               # Default is 50

[tool.coverage.run]
omit = ["haystack/testing/*"]