[build-system] requires = ["hatchling>=1.8.0"] build-backend = "hatchling.build" [project] name = "haystack-ai" dynamic = ["version"] description = "LLM framework to build customizable, production-ready LLM applications. Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data." readme = "README.md" license = "Apache-2.0" requires-python = ">=3.9" authors = [{ name = "deepset.ai", email = "malte.pietsch@deepset.ai" }] keywords = [ "BERT", "QA", "Question-Answering", "Reader", "Retriever", "albert", "language-model", "mrc", "roberta", "search", "semantic-search", "squad", "transfer-learning", "transformer", ] classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Science/Research", "License :: Freely Distributable", "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering :: Artificial Intelligence", ] dependencies = [ "tqdm", "tenacity!=8.4.0", "lazy-imports", "openai>=1.56.1", "pydantic", "Jinja2", "posthog!=3.12.0", # telemetry # 3.12.0 was problematic https://github.com/PostHog/posthog-python/issues/187 "pyyaml", "more-itertools", # TextDocumentSplitter "networkx", # Pipeline graphs "typing_extensions>=4.7", # typing support for Python 3.9 "requests", "numpy", "python-dateutil", "jsonschema", # JsonSchemaValidator, Tool "docstring-parser", # ComponentTool "haystack-experimental", ] [tool.hatch.envs.default] installer = "uv" dependencies = [ "pre-commit", "ruff", "toml", "reno", # dulwich is a reno dependency, they pin it at >=0.15.0 so pip takes ton of time to resolve the dependency tree. # We pin it here to avoid taking too much time. # https://opendev.org/openstack/reno/src/branch/master/requirements.txt#L7 "dulwich>=0.21.0,<1.0.0", ] [tool.hatch.envs.default.scripts] release-note = "reno new {args}" fmt = "ruff check --fix {args} && ruff format {args}" fmt-check = "ruff check {args} && ruff format --check {args}" [tool.hatch.envs.test] # we override dependencies from the default environment dependencies = [ "numpy>=2", # Haystack is compatible both with numpy 1.x and 2.x, but we test with 2.x "numba>=0.54.0", # This pin helps uv resolve the dependency tree. See https://github.com/astral-sh/uv/issues/7881 "pandas", # AzureOCRDocumentConverter, CSVDocumentCleaner, CSVDocumentSplitter, # EvaluationRunResult, XLSXToDocument, and pipeline tests "transformers[torch, sentencepiece]>=4.52.4,<4.53", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators... "huggingface_hub>=0.27.0", # Hugging Face API Generators and Embedders "sentence-transformers>=4.1.0", # Sentence Transformers Embedders, Rankers, and SASEvaluator "langdetect", # TextLanguageRouter and DocumentLanguageClassifier "openai-whisper>=20231106", # LocalWhisperTranscriber "arrow>=1.3.0", # Jinja2TimeExtension # Converters "pypdf", # PyPDFToDocument "pdfminer.six", # PDFMinerToDocument "markdown-it-py", # MarkdownToDocument "mdit_plain", # MarkdownToDocument "tika", # TikaDocumentConverter "azure-ai-formrecognizer>=3.2.0b2", # AzureOCRDocumentConverter "trafilatura", # HTMLToDocument "python-pptx", # PPTXToDocument "python-docx", # DocxToDocument "jq", # JSONConverter "openpyxl", # XLSXToDocument "tabulate", # XLSXToDocument "python-oxmsg", # MSGToDocument "nltk>=3.9.1", # NLTKDocumentSplitter, RecursiveDocumentSplitter "tiktoken", # RecursiveDocumentSplitter # OpenAPI "jsonref", # OpenAPIServiceConnector, OpenAPIServiceToFunctions "openapi3", "openapi-llm>=0.4.1", # OpenAPIConnector # Tracing "opentelemetry-sdk", "ddtrace", # Structured logging "structlog", # needed in link content fetcher tests "httpx[http2]", # Azure Utils "azure-identity", # Test "pytest", "pytest-bdd", "pytest-cov", "pytest-asyncio", "pytest-rerunfailures", "coverage", "mypy", "pip", # mypy needs pip to install missing stub packages "pylint", "ipython", ] [tool.hatch.envs.test.scripts] unit = 'pytest --cov-report xml:coverage.xml --cov="haystack" -m "not integration" {args:test}' integration = 'pytest --maxfail=5 -m "integration" {args:test}' integration-only-fast = 'pytest --maxfail=5 -m "integration and not slow" {args:test}' integration-only-slow = 'pytest --maxfail=5 -m "integration and slow" {args:test}' all = 'pytest {args:test}' types = "mypy --install-types --non-interactive --cache-dir=.mypy_cache/ {args:haystack}" lint = "pylint -ry -j 0 {args:haystack}" [tool.hatch.envs.e2e] template = "test" extra-dependencies = [ # NamedEntityExtractor "spacy>=3.8,<3.9", "en-core-web-trf @ https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.8.0/en_core_web_trf-3.8.0-py3-none-any.whl", # spacy requires thinc, which depends on blis. We pin blis because version 1.2.1 does not have wheels for python 3.9 # and compiling it from source takes much time. "blis<1.2.1; python_version < '3.10'", ] [tool.hatch.envs.e2e.scripts] test = "pytest e2e" [tool.hatch.envs.readme] installer = "uv" detached = true # To avoid installing the dependencies from the default environment dependencies = ["haystack-pydoc-tools"] [tool.hatch.envs.readme.scripts] sync = "./.github/utils/pydoc-markdown.sh {args}" delete-outdated = "python ./.github/utils/delete_outdated_docs.py {args}" [project.urls] "CI: GitHub" = "https://github.com/deepset-ai/haystack/actions" "Docs: RTD" = "https://haystack.deepset.ai/overview/intro" "GitHub: issues" = "https://github.com/deepset-ai/haystack/issues" "GitHub: repo" = "https://github.com/deepset-ai/haystack" Homepage = "https://github.com/deepset-ai/haystack" [tool.hatch.version] path = "VERSION.txt" pattern = "(?P.+)" [tool.hatch.metadata] allow-direct-references = true [tool.hatch.build.targets.sdist] include = ["/haystack", "/VERSION.txt"] [tool.hatch.build.targets.wheel] packages = ["haystack"] [tool.codespell] ignore-words-list = "ans,astroid,nd,ned,nin,ue,rouge,ist, Claus" quiet-level = 3 skip = "./test,./e2e" [tool.pylint.'MESSAGES CONTROL'] max-line-length = 120 disable = [ # To keep "fixme", "c-extension-no-member", # we re-export symbols for correct type checking # https://typing.python.org/en/latest/spec/distributing.html#import-conventions "useless-import-alias", # To review: "missing-docstring", "unused-argument", "line-too-long", "protected-access", "too-few-public-methods", "raise-missing-from", "invalid-name", "duplicate-code", "consider-using-f-string", "no-else-return", "attribute-defined-outside-init", "super-with-arguments", "redefined-builtin", "unspecified-encoding", "no-name-in-module", "redefined-outer-name", "arguments-renamed", "unnecessary-pass", "broad-except", "consider-iterating-dictionary", "consider-using-in", "bare-except", "unexpected-keyword-arg", "broad-exception-raised", "cyclic-import", "import-outside-toplevel", ] [tool.pylint.'DESIGN'] max-args = 20 # Default is 5 max-attributes = 22 # Default is 7 max-branches = 22 # Default is 12 max-locals = 39 # Default is 15 max-module-lines = 1500 # Default is 1000 max-nested-blocks = 6 # Default is 5 max-statements = 102 # Default is 50 [tool.pylint.'SIMILARITIES'] min-similarity-lines = 6 [tool.pytest.ini_options] minversion = "6.0" addopts = "--strict-markers" markers = [ "unit: unit tests", "integration: integration tests", # integration tests that are slow (e.g. model inference on CPU), unstable (e.g. call unstable external services) # or require special setup (e.g. installing system dependencies, running Docker containers) "slow: slow/unstable integration tests", ] log_cli = true asyncio_mode = "auto" asyncio_default_fixture_loop_scope = "class" [tool.mypy] python_version = "3.9" disallow_incomplete_defs = true warn_return_any = false ignore_missing_imports = true check_untyped_defs = true [[tool.mypy.overrides]] # TODO: Fix component typings module = ["haystack.components.*", "haystack.testing.*"] disallow_incomplete_defs = false [tool.ruff] line-length = 120 exclude = [".github", "proposals"] [tool.ruff.format] skip-magic-trailing-comma = true [tool.ruff.lint] isort.split-on-trailing-comma = false exclude = ["test/**", "e2e/**"] select = [ "ASYNC", # flake8-async "C4", # flake8-comprehensions "C90", # McCabe cyclomatic complexity "E501", # Long lines "EXE", # flake8-executable "F", # Pyflakes "INT", # flake8-gettext "PERF", # Perflint "PL", # Pylint "Q", # flake8-quotes "SIM", # flake8-simplify "SLOT", # flake8-slots "T10", # flake8-debugger "W", # pycodestyle "YTT", # flake8-2020 "I", # isort # built-in shadowing "A001", # builtin-variable-shadowing "A002", # builtin-argument-shadowing "A003", # builtin-attribute-shadowing # docstring rules "D102", # Missing docstring in public method "D103", # Missing docstring in public function "D209", # Closing triple quotes go to new line "D205", # 1 blank line required between summary line and description "D213", # summary lines must be positioned on the second physical line of the docstring "D417", # undocumented-parameter "D419", # undocumented-returns ] ignore = [ "F401", # unused-import "PERF203", # `try`-`except` within a loop incurs performance overhead "PERF401", # Use a list comprehension to create a transformed list "PLR1714", # repeated-equality-comparison "PLW0603", # global-statement "PLW2901", # redefined-loop-name "SIM108", # if-else-block-instead-of-if-exp "SIM118", # in-dict-keys # we re-export symbols for correct type checking # https://typing.python.org/en/latest/spec/distributing.html#import-conventions "PLC0414", # useless-import-alias, "PLC0415", # import-outside-top-level, ] [tool.ruff.lint.mccabe] max-complexity = 20 [tool.ruff.lint.pylint] allow-magic-value-types = ["float", "int", "str"] max-args = 13 # Default is 5 max-branches = 21 # Default is 12 max-public-methods = 20 # Default is 20 max-returns = 7 # Default is 6 max-statements = 56 # Default is 50 [tool.coverage.run] omit = ["haystack/testing/*"]