2022-01-26 18:12:55 +01:00
[ build-system ]
2024-06-18 17:52:46 +02:00
requires = [ "hatchling>=1.8.0" ]
2022-08-24 03:46:21 -04:00
build-backend = "hatchling.build"
2022-01-26 18:12:55 +01:00
2022-08-24 03:46:21 -04:00
[ project ]
2023-11-24 12:14:43 +01:00
name = "haystack-ai"
2024-06-18 17:52:46 +02:00
dynamic = [ "version" ]
2023-08-30 15:29:12 +02:00
description = "LLM framework to build customizable, production-ready LLM applications. Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data."
2022-08-24 03:46:21 -04:00
readme = "README.md"
license = "Apache-2.0"
2025-03-05 15:59:56 +01:00
requires-python = ">=3.9"
2024-06-18 17:52:46 +02:00
authors = [ { name = "deepset.ai" , email = "malte.pietsch@deepset.ai" } ]
2022-08-24 03:46:21 -04:00
keywords = [
"BERT" ,
"QA" ,
"Question-Answering" ,
"Reader" ,
"Retriever" ,
"albert" ,
"language-model" ,
"mrc" ,
"roberta" ,
"search" ,
"semantic-search" ,
"squad" ,
"transfer-learning" ,
"transformer" ,
]
classifiers = [
"Development Status :: 5 - Production/Stable" ,
"Intended Audience :: Science/Research" ,
"License :: Freely Distributable" ,
"License :: OSI Approved :: Apache Software License" ,
"Operating System :: OS Independent" ,
"Programming Language :: Python" ,
"Programming Language :: Python :: 3" ,
"Programming Language :: Python :: 3.9" ,
"Programming Language :: Python :: 3.10" ,
2024-08-02 14:46:20 +02:00
"Programming Language :: Python :: 3.11" ,
"Programming Language :: Python :: 3.12" ,
2025-03-05 10:49:10 +01:00
"Programming Language :: Python :: 3.13" ,
2022-08-24 03:46:21 -04:00
"Topic :: Scientific/Engineering :: Artificial Intelligence" ,
]
dependencies = [
2023-09-26 12:48:15 +02:00
"tqdm" ,
2024-06-17 10:54:02 +02:00
"tenacity!=8.4.0" ,
2023-09-26 12:48:15 +02:00
"lazy-imports" ,
2024-12-12 16:26:38 +01:00
"openai>=1.56.1" ,
2024-12-18 12:36:44 +01:00
"pydantic" ,
2023-11-24 12:09:47 +01:00
"Jinja2" ,
2025-02-21 10:49:29 +01:00
"posthog!=3.12.0" , # telemetry # 3.12.0 was problematic https://github.com/PostHog/posthog-python/issues/187
2023-10-18 17:53:48 +02:00
"pyyaml" ,
2024-06-18 17:52:46 +02:00
"more-itertools" , # TextDocumentSplitter
"networkx" , # Pipeline graphs
2025-03-05 15:59:56 +01:00
"typing_extensions>=4.7" , # typing support for Python 3.9
2024-03-05 12:14:10 +01:00
"requests" ,
2024-10-28 12:15:17 +01:00
"numpy" ,
2024-03-05 12:14:10 +01:00
"python-dateutil" ,
2025-01-21 10:07:56 +01:00
"jsonschema" , # JsonSchemaValidator, Tool
2025-06-05 11:28:18 +02:00
"docstring-parser" , # ComponentTool
2024-07-08 14:07:15 +02:00
"haystack-experimental" ,
2022-08-24 03:46:21 -04:00
]
2023-11-24 12:09:47 +01:00
2024-02-26 15:40:10 +01:00
[ tool . hatch . envs . default ]
2024-10-17 10:32:14 +02:00
installer = "uv"
2024-02-26 15:40:10 +01:00
dependencies = [
2022-08-24 03:46:21 -04:00
"pre-commit" ,
2025-03-14 15:53:25 +01:00
"ruff" ,
2023-05-12 11:20:48 +02:00
"toml" ,
2023-07-24 17:02:46 +02:00
"reno" ,
2023-07-26 12:59:16 +02:00
# dulwich is a reno dependency, they pin it at >=0.15.0 so pip takes ton of time to resolve the dependency tree.
# We pin it here to avoid taking too much time.
# https://opendev.org/openstack/reno/src/branch/master/requirements.txt#L7
"dulwich>=0.21.0,<1.0.0" ,
2023-02-08 15:34:43 +01:00
]
2024-02-26 15:40:10 +01:00
[ tool . hatch . envs . default . scripts ]
2024-06-14 14:57:24 +02:00
release-note = "reno new {args}"
2025-06-06 10:43:02 +02:00
fmt = "ruff check --fix {args} && ruff format {args}"
fmt-check = "ruff check {args} && ruff format --check {args}"
2024-02-26 15:40:10 +01:00
[ tool . hatch . envs . test ]
2025-04-11 12:28:53 +02:00
# we override dependencies from the default environment
dependencies = [
2024-11-11 11:21:10 +01:00
"numpy>=2" , # Haystack is compatible both with numpy 1.x and 2.x, but we test with 2.x
2025-01-27 11:55:18 +01:00
"numba>=0.54.0" , # This pin helps uv resolve the dependency tree. See https://github.com/astral-sh/uv/issues/7881
2024-10-28 12:15:17 +01:00
2025-03-04 12:06:07 +01:00
"pandas" , # AzureOCRDocumentConverter, CSVDocumentCleaner, CSVDocumentSplitter,
# EvaluationRunResult, XLSXToDocument, and pipeline tests
2025-04-09 08:42:17 +02:00
"transformers[torch,sentencepiece]>=4.51.1,<4.52" , # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators...
2025-02-03 16:11:16 +01:00
"huggingface_hub>=0.27.0" , # Hugging Face API Generators and Embedders
2025-05-21 10:52:46 +02:00
"sentence-transformers>=4.1.0" , # Sentence Transformers Embedders, Rankers, and SASEvaluator
2024-06-18 17:52:46 +02:00
"langdetect" , # TextLanguageRouter and DocumentLanguageClassifier
"openai-whisper>=20231106" , # LocalWhisperTranscriber
2024-09-09 16:09:51 +02:00
"arrow>=1.3.0" , # Jinja2TimeExtension
2024-06-06 15:57:18 +02:00
2024-02-26 15:40:10 +01:00
# Converters
2024-06-18 17:52:46 +02:00
"pypdf" , # PyPDFToDocument
"pdfminer.six" , # PDFMinerToDocument
"markdown-it-py" , # MarkdownToDocument
"mdit_plain" , # MarkdownToDocument
"tika" , # TikaDocumentConverter
"azure-ai-formrecognizer>=3.2.0b2" , # AzureOCRDocumentConverter
"trafilatura" , # HTMLToDocument
"python-pptx" , # PPTXToDocument
"python-docx" , # DocxToDocument
2025-01-09 09:03:19 +01:00
"jq" , # JSONConverter
"openpyxl" , # XLSXToDocument
"tabulate" , # XLSXToDocument
2025-02-24 08:12:32 +01:00
"python-oxmsg" , # MSGToDocument
2024-06-12 11:58:36 +02:00
2025-04-01 09:48:59 +02:00
"nltk>=3.9.1" , # NLTKDocumentSplitter, RecursiveDocumentSplitter
"tiktoken" , # RecursiveDocumentSplitter
2024-09-17 13:59:19 +02:00
2024-02-26 15:40:10 +01:00
# OpenAPI
2025-03-05 11:15:19 +01:00
"jsonref" , # OpenAPIServiceConnector, OpenAPIServiceToFunctions
2024-02-26 15:40:10 +01:00
"openapi3" ,
2025-03-05 11:15:19 +01:00
"openapi-llm>=0.4.1" , # OpenAPIConnector
2024-02-26 15:40:10 +01:00
# Tracing
"opentelemetry-sdk" ,
2025-02-21 14:35:04 +01:00
"ddtrace" ,
2024-02-27 09:15:01 +01:00
# Structured logging
2024-07-24 20:58:34 +02:00
"structlog" ,
2024-03-05 12:14:10 +01:00
2025-03-26 07:55:08 -06:00
# needed in link content fetcher tests
"httpx[http2]" ,
2025-03-12 13:45:40 +01:00
# Azure Utils
"azure-identity" ,
2024-06-14 14:57:24 +02:00
# Test
"pytest" ,
"pytest-bdd" ,
"pytest-cov" ,
"pytest-asyncio" ,
"pytest-rerunfailures" ,
"coverage" ,
"mypy" ,
2024-10-22 12:19:25 +02:00
"pip" , # mypy needs pip to install missing stub packages
2024-06-14 14:57:24 +02:00
"pylint" ,
"ipython" ,
2024-02-26 15:40:10 +01:00
]
[ tool . hatch . envs . test . scripts ]
2024-04-12 18:04:18 +02:00
unit = 'pytest --cov-report xml:coverage.xml --cov="haystack" -m "not integration" {args:test}'
integration = 'pytest --maxfail=5 -m "integration" {args:test}'
2025-04-23 10:36:44 +02:00
integration-only-fast = 'pytest --maxfail=5 -m "integration and not slow" {args:test}'
integration-only-slow = 'pytest --maxfail=5 -m "integration and slow" {args:test}'
2025-06-06 10:43:02 +02:00
all = 'pytest {args:test}'
2025-04-23 10:36:44 +02:00
2024-02-26 15:40:10 +01:00
types = "mypy --install-types --non-interactive --cache-dir=.mypy_cache/ {args:haystack}"
2024-06-14 14:57:24 +02:00
lint = "pylint -ry -j 0 {args:haystack}"
2024-02-26 15:40:10 +01:00
2025-04-11 12:28:53 +02:00
[ tool . hatch . envs . e2e ]
template = "test"
extra-dependencies = [
# NamedEntityExtractor
"spacy>=3.8,<3.9" ,
"en-core-web-trf @ https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.8.0/en_core_web_trf-3.8.0-py3-none-any.whl" ,
# spacy requires thinc, which depends on blis. We pin blis because version 1.2.1 does not have wheels for python 3.9
# and compiling it from source takes much time.
"blis<1.2.1; python_version < '3.10'" ,
]
[ tool . hatch . envs . e2e . scripts ]
test = "pytest e2e"
2024-02-26 15:40:10 +01:00
[ tool . hatch . envs . readme ]
2024-10-17 10:32:14 +02:00
installer = "uv"
2024-06-18 17:52:46 +02:00
detached = true # To avoid installing the dependencies from the default environment
dependencies = [ "haystack-pydoc-tools" ]
2024-02-26 15:40:10 +01:00
[ tool . hatch . envs . readme . scripts ]
sync = "./.github/utils/pydoc-markdown.sh"
2024-02-28 12:41:10 +01:00
delete-outdated = "python ./.github/utils/delete_outdated_docs.py {args}"
2024-02-26 15:40:10 +01:00
2022-08-24 03:46:21 -04:00
[ project . urls ]
"CI: GitHub" = "https://github.com/deepset-ai/haystack/actions"
"Docs: RTD" = "https://haystack.deepset.ai/overview/intro"
"GitHub: issues" = "https://github.com/deepset-ai/haystack/issues"
"GitHub: repo" = "https://github.com/deepset-ai/haystack"
Homepage = "https://github.com/deepset-ai/haystack"
[ tool . hatch . version ]
path = "VERSION.txt"
pattern = "(?P<version>.+)"
2023-03-14 17:01:19 +01:00
[ tool . hatch . metadata ]
allow-direct-references = true
2022-08-24 03:46:21 -04:00
[ tool . hatch . build . targets . sdist ]
2024-06-18 17:52:46 +02:00
include = [ "/haystack" , "/VERSION.txt" ]
2022-08-24 03:46:21 -04:00
[ tool . hatch . build . targets . wheel ]
2024-06-18 17:52:46 +02:00
packages = [ "haystack" ]
2022-01-26 18:12:55 +01:00
2023-09-13 16:14:45 +02:00
[ tool . codespell ]
2025-02-06 15:19:47 +01:00
ignore-words-list = "ans,astroid,nd,ned,nin,ue,rouge,ist, Claus"
2023-09-13 16:14:45 +02:00
quiet-level = 3
2024-09-24 12:00:45 +02:00
skip = "./test,./e2e"
2023-09-13 16:14:45 +02:00
2022-01-26 18:12:55 +01:00
[ tool . pylint . 'MESSAGES CONTROL' ]
2024-06-18 17:52:46 +02:00
max-line-length = 120
2022-01-26 18:12:55 +01:00
disable = [
2022-02-21 20:16:14 +01:00
2022-05-04 17:39:06 +02:00
# To keep
2022-02-21 20:16:14 +01:00
"fixme" ,
"c-extension-no-member" ,
2022-04-12 16:41:05 +02:00
# To review:
2022-01-26 18:12:55 +01:00
"missing-docstring" ,
"unused-argument" ,
"no-member" ,
"line-too-long" ,
"protected-access" ,
"too-few-public-methods" ,
2022-02-09 18:27:12 +01:00
"raise-missing-from" ,
"invalid-name" ,
"duplicate-code" ,
"arguments-differ" ,
"consider-using-f-string" ,
"no-else-return" ,
"attribute-defined-outside-init" ,
"super-with-arguments" ,
"redefined-builtin" ,
"abstract-method" ,
"unspecified-encoding" ,
"unidiomatic-typecheck" ,
"no-name-in-module" ,
"consider-using-with" ,
"redefined-outer-name" ,
"arguments-renamed" ,
"unnecessary-pass" ,
"broad-except" ,
"unnecessary-comprehension" ,
"subprocess-run-check" ,
"singleton-comparison" ,
"consider-iterating-dictionary" ,
"undefined-loop-variable" ,
"consider-using-in" ,
"bare-except" ,
"unexpected-keyword-arg" ,
"simplifiable-if-expression" ,
"use-list-literal" ,
2023-02-02 11:59:17 +01:00
"broad-exception-raised" ,
2022-04-12 16:41:05 +02:00
# To review later
"cyclic-import" ,
"import-outside-toplevel" ,
2022-02-09 18:27:12 +01:00
"deprecated-method" ,
2022-01-26 18:12:55 +01:00
]
[ tool . pylint . 'DESIGN' ]
2024-06-18 17:52:46 +02:00
max-args = 38 # Default is 5
max-attributes = 28 # Default is 7
max-branches = 34 # Default is 12
max-locals = 45 # Default is 15
max-module-lines = 2468 # Default is 1000
max-nested-blocks = 9 # Default is 5
max-statements = 206 # Default is 50
2024-12-09 17:53:37 +01:00
2022-01-26 18:12:55 +01:00
[ tool . pylint . 'SIMILARITIES' ]
2024-06-18 17:52:46 +02:00
min-similarity-lines = 6
2022-02-08 17:23:59 +01:00
[ tool . pytest . ini_options ]
minversion = "6.0"
addopts = "--strict-markers"
markers = [
2022-08-24 03:46:21 -04:00
"unit: unit tests" ,
2022-10-17 18:58:35 +02:00
"integration: integration tests" ,
2025-04-23 10:36:44 +02:00
# integration tests that are slow (e.g. model inference on CPU), unstable (e.g. call unstable external services)
# or require special setup (e.g. installing system dependencies, running Docker containers)
"slow: slow/unstable integration tests" ,
2022-08-24 03:46:21 -04:00
]
log_cli = true
2025-02-20 17:51:46 +01:00
asyncio_mode = "auto"
2025-03-03 20:51:30 +05:00
asyncio_default_fixture_loop_scope = "class"
2022-08-24 03:46:21 -04:00
[ tool . mypy ]
2025-05-26 11:00:22 +02:00
python_version = "3.9"
disallow_incomplete_defs = true
2022-08-24 03:46:21 -04:00
warn_return_any = false
warn_unused_configs = true
ignore_missing_imports = true
2025-05-27 09:35:25 +02:00
check_untyped_defs = true
2023-08-04 12:42:13 +02:00
2025-05-26 11:00:22 +02:00
[ [ tool . mypy . overrides ] ]
# TODO: Fix component typings
2025-05-27 09:35:25 +02:00
module = [ "haystack.components.*" , "haystack.testing.*" ]
2025-05-26 11:00:22 +02:00
disallow_incomplete_defs = false
2023-09-13 15:18:16 +02:00
[ tool . ruff ]
2024-06-18 17:52:46 +02:00
line-length = 120
exclude = [ ".github" , "proposals" ]
2024-04-23 18:43:09 +02:00
2024-06-18 17:52:46 +02:00
[ tool . ruff . format ]
skip-magic-trailing-comma = true
2024-03-01 11:08:51 +01:00
[ tool . ruff . lint ]
2024-06-18 17:52:46 +02:00
isort . split-on-trailing-comma = false
exclude = [ "test/**" , "e2e/**" ]
2023-09-14 16:42:48 +02:00
select = [
2024-06-18 17:52:46 +02:00
"ASYNC" , # flake8-async
"C4" , # flake8-comprehensions
"C90" , # McCabe cyclomatic complexity
"E501" , # Long lines
"EXE" , # flake8-executable
"F" , # Pyflakes
"INT" , # flake8-gettext
"PERF" , # Perflint
"PL" , # Pylint
"Q" , # flake8-quotes
"SIM" , # flake8-simplify
"SLOT" , # flake8-slots
"T10" , # flake8-debugger
"W" , # pycodestyle
"YTT" , # flake8-2020
"I" , # isort
2024-04-23 18:43:09 +02:00
# built-in shadowing
2024-06-18 17:52:46 +02:00
"A001" , # builtin-variable-shadowing
"A002" , # builtin-argument-shadowing
"A003" , # builtin-attribute-shadowing
2024-04-23 18:43:09 +02:00
# docstring rules
2024-06-18 17:52:46 +02:00
"D102" , # Missing docstring in public method
"D103" , # Missing docstring in public function
"D209" , # Closing triple quotes go to new line
"D205" , # 1 blank line required between summary line and description
"D213" , # summary lines must be positioned on the second physical line of the docstring
"D417" , # undocumented-parameter
"D419" , # undocumented-returns
2023-09-14 16:42:48 +02:00
]
2024-03-01 11:08:51 +01:00
2023-09-13 15:18:16 +02:00
ignore = [
2024-06-18 17:52:46 +02:00
"F401" , # unused-import
"PERF203" , # `try`-`except` within a loop incurs performance overhead
"PERF401" , # Use a list comprehension to create a transformed list
"PLR1714" , # repeated-equality-comparison
"PLR5501" , # collapsible-else-if
"PLW0603" , # global-statement
"PLW1510" , # subprocess-run-without-check
"PLW2901" , # redefined-loop-name
"SIM108" , # if-else-block-instead-of-if-exp
"SIM115" , # open-file-with-context-handler
"SIM118" , # in-dict-keys
2023-09-13 15:18:16 +02:00
]
2024-03-01 11:08:51 +01:00
[ tool . ruff . lint . mccabe ]
2023-09-13 15:18:16 +02:00
max-complexity = 28
2023-09-15 18:30:33 +02:00
2024-03-01 11:08:51 +01:00
[ tool . ruff . lint . pylint ]
2023-09-13 15:18:16 +02:00
allow-magic-value-types = [ "float" , "int" , "str" ]
2024-06-18 17:52:46 +02:00
max-args = 14 # Default is 5
max-branches = 21 # Default is 12
max-public-methods = 20 # Default is 20
max-returns = 7 # Default is 6
max-statements = 60 # Default is 50
2023-09-13 15:18:16 +02:00
2023-08-04 12:42:13 +02:00
[ tool . coverage . run ]
2024-06-18 17:52:46 +02:00
omit = [ "haystack/testing/*" ]