[build-system] requires = ["hatchling>=1.8.0"] build-backend = "hatchling.build" [project] name = "haystack-ai" dynamic = ["version"] description = "LLM framework to build customizable, production-ready LLM applications. Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data." readme = "README.md" license = "Apache-2.0" requires-python = ">=3.8,<3.13" authors = [{ name = "deepset.ai", email = "malte.pietsch@deepset.ai" }] keywords = [ "BERT", "QA", "Question-Answering", "Reader", "Retriever", "albert", "language-model", "mrc", "roberta", "search", "semantic-search", "squad", "transfer-learning", "transformer", ] classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Science/Research", "License :: Freely Distributable", "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering :: Artificial Intelligence", ] dependencies = [ "pandas", "tqdm", "tenacity!=8.4.0", "lazy-imports", "openai>=1.56.1", "pydantic", "Jinja2", "posthog", # telemetry "pyyaml", "more-itertools", # TextDocumentSplitter "networkx", # Pipeline graphs "typing_extensions>=4.7", # typing support for Python 3.8 "requests", "numpy", "python-dateutil", "haystack-experimental", ] [tool.hatch.envs.default] installer = "uv" dependencies = [ "pre-commit", "ruff", "toml", "reno", # dulwich is a reno dependency, they pin it at >=0.15.0 so pip takes ton of time to resolve the dependency tree. # We pin it here to avoid taking too much time. # https://opendev.org/openstack/reno/src/branch/master/requirements.txt#L7 "dulwich>=0.21.0,<1.0.0", ] [tool.hatch.envs.default.scripts] release-note = "reno new {args}" check = "ruff check {args}" fix = "ruff check --fix" format = "ruff format {args}" format-check = "ruff format --check {args}" [tool.hatch.envs.test] extra-dependencies = [ "numpy>=2", # Haystack is compatible both with numpy 1.x and 2.x, but we test with 2.x "transformers[torch,sentencepiece]==4.44.2", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators... "huggingface_hub>=0.27.0", # Hugging Face API Generators and Embedders "sentence-transformers>=3.0.0", # SentenceTransformersTextEmbedder and SentenceTransformersDocumentEmbedder "langdetect", # TextLanguageRouter and DocumentLanguageClassifier "openai-whisper>=20231106", # LocalWhisperTranscriber "arrow>=1.3.0", # Jinja2TimeExtension # NamedEntityExtractor "spacy>=3.8,<3.9", "spacy-curated-transformers>=0.2,<=0.3", "en-core-web-trf @ https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.8.0/en_core_web_trf-3.8.0-py3-none-any.whl", # Converters "pypdf", # PyPDFToDocument "pdfminer.six", # PDFMinerToDocument "markdown-it-py", # MarkdownToDocument "mdit_plain", # MarkdownToDocument "tika", # TikaDocumentConverter "azure-ai-formrecognizer>=3.2.0b2", # AzureOCRDocumentConverter "trafilatura", # HTMLToDocument "python-pptx", # PPTXToDocument "python-docx", # DocxToDocument "jq", # JSONConverter "openpyxl", # XLSXToDocument "tabulate", # XLSXToDocument "nltk", # NLTKDocumentSplitter # OpenAPI "jsonref", # OpenAPIServiceConnector, OpenAPIServiceToFunctions "openapi3", # JsonSchemaValidator, Tool "jsonschema", # Tracing "opentelemetry-sdk", "ddtrace", # Structured logging "structlog", # Test "pytest", "pytest-bdd", "pytest-cov", "pytest-custom_exit_code", # used in the CI "pytest-asyncio", "pytest-rerunfailures", "responses", "tox", "coverage", "python-multipart", "psutil", "mypy", "pip", # mypy needs pip to install missing stub packages "pylint", "ipython", ] [tool.hatch.envs.test.scripts] e2e = "pytest e2e" unit = 'pytest --cov-report xml:coverage.xml --cov="haystack" -m "not integration" {args:test}' integration = 'pytest --maxfail=5 -m "integration" {args:test}' integration-mac = 'pytest --maxfail=5 -m "integration" -k "not tika" {args:test}' integration-windows = 'pytest --maxfail=5 -m "integration" -k "not tika" {args:test}' types = "mypy --install-types --non-interactive --cache-dir=.mypy_cache/ {args:haystack}" lint = "pylint -ry -j 0 {args:haystack}" [tool.hatch.envs.readme] installer = "uv" detached = true # To avoid installing the dependencies from the default environment dependencies = ["haystack-pydoc-tools"] [tool.hatch.envs.readme.scripts] sync = "./.github/utils/pydoc-markdown.sh" delete-outdated = "python ./.github/utils/delete_outdated_docs.py {args}" [project.urls] "CI: GitHub" = "https://github.com/deepset-ai/haystack/actions" "Docs: RTD" = "https://haystack.deepset.ai/overview/intro" "GitHub: issues" = "https://github.com/deepset-ai/haystack/issues" "GitHub: repo" = "https://github.com/deepset-ai/haystack" Homepage = "https://github.com/deepset-ai/haystack" [tool.hatch.version] path = "VERSION.txt" pattern = "(?P.+)" [tool.hatch.metadata] allow-direct-references = true [tool.hatch.build.targets.sdist] include = ["/haystack", "/VERSION.txt"] [tool.hatch.build.targets.wheel] packages = ["haystack"] [tool.codespell] ignore-words-list = "ans,astroid,nd,ned,nin,ue,rouge,ist" quiet-level = 3 skip = "./test,./e2e" [tool.pylint.'MESSAGES CONTROL'] max-line-length = 120 disable = [ # To keep "fixme", "c-extension-no-member", # To review: "missing-docstring", "unused-argument", "no-member", "line-too-long", "protected-access", "too-few-public-methods", "raise-missing-from", "invalid-name", "duplicate-code", "arguments-differ", "consider-using-f-string", "no-else-return", "attribute-defined-outside-init", "super-with-arguments", "redefined-builtin", "abstract-method", "unspecified-encoding", "unidiomatic-typecheck", "no-name-in-module", "consider-using-with", "redefined-outer-name", "arguments-renamed", "unnecessary-pass", "broad-except", "unnecessary-comprehension", "subprocess-run-check", "singleton-comparison", "consider-iterating-dictionary", "undefined-loop-variable", "consider-using-in", "bare-except", "unexpected-keyword-arg", "simplifiable-if-expression", "use-list-literal", "broad-exception-raised", # To review later "cyclic-import", "import-outside-toplevel", "deprecated-method", ] [tool.pylint.'DESIGN'] max-args = 38 # Default is 5 max-attributes = 28 # Default is 7 max-branches = 34 # Default is 12 max-locals = 45 # Default is 15 max-module-lines = 2468 # Default is 1000 max-nested-blocks = 9 # Default is 5 max-statements = 206 # Default is 50 [tool.pylint.'SIMILARITIES'] min-similarity-lines = 6 [tool.pytest.ini_options] minversion = "6.0" addopts = "--strict-markers" markers = [ "unit: unit tests", "integration: integration tests", "generator: generator tests", "summarizer: summarizer tests", "embedding_dim: uses a document store with non-default embedding dimension (e.g @pytest.mark.embedding_dim(128))", "tika: requires Tika container", "parsr: requires Parsr container", "ocr: requires Tesseract", "elasticsearch: requires Elasticsearch container", "weaviate: requires Weaviate container", "pinecone: requires Pinecone credentials", "faiss: uses FAISS", "opensearch", "document_store", ] log_cli = true [tool.mypy] warn_return_any = false warn_unused_configs = true ignore_missing_imports = true [tool.ruff] line-length = 120 target-version = "py38" exclude = [".github", "proposals"] [tool.ruff.format] skip-magic-trailing-comma = true [tool.ruff.lint] isort.split-on-trailing-comma = false exclude = ["test/**", "e2e/**"] select = [ "ASYNC", # flake8-async "C4", # flake8-comprehensions "C90", # McCabe cyclomatic complexity "E501", # Long lines "EXE", # flake8-executable "F", # Pyflakes "INT", # flake8-gettext "PERF", # Perflint "PL", # Pylint "Q", # flake8-quotes "SIM", # flake8-simplify "SLOT", # flake8-slots "T10", # flake8-debugger "W", # pycodestyle "YTT", # flake8-2020 "I", # isort # built-in shadowing "A001", # builtin-variable-shadowing "A002", # builtin-argument-shadowing "A003", # builtin-attribute-shadowing # docstring rules "D102", # Missing docstring in public method "D103", # Missing docstring in public function "D209", # Closing triple quotes go to new line "D205", # 1 blank line required between summary line and description "D213", # summary lines must be positioned on the second physical line of the docstring "D417", # undocumented-parameter "D419", # undocumented-returns ] ignore = [ "F401", # unused-import "PERF203", # `try`-`except` within a loop incurs performance overhead "PERF401", # Use a list comprehension to create a transformed list "PLR1714", # repeated-equality-comparison "PLR5501", # collapsible-else-if "PLW0603", # global-statement "PLW1510", # subprocess-run-without-check "PLW2901", # redefined-loop-name "SIM108", # if-else-block-instead-of-if-exp "SIM115", # open-file-with-context-handler "SIM118", # in-dict-keys ] [tool.ruff.lint.mccabe] max-complexity = 28 [tool.ruff.lint.per-file-ignores] "examples/basic_qa_pipeline.py" = ["C416"] "haystack/preview/testing/document_store.py" = ["C416", "F821"] "haystack/telemetry.py" = ["F821"] [tool.ruff.lint.pylint] allow-magic-value-types = ["float", "int", "str"] max-args = 14 # Default is 5 max-branches = 21 # Default is 12 max-public-methods = 20 # Default is 20 max-returns = 7 # Default is 6 max-statements = 60 # Default is 50 [tool.coverage.run] omit = ["haystack/testing/*"]