haystack/pyproject.toml
Michele Pangrazzi 3207a76d50
chore: Update pydoc-markdown.sh (#9547)
* Make config path a $1 param ; Add usage in comment ; Add echo log

* Update sync command
2025-06-24 14:01:51 +02:00

354 lines
11 KiB
TOML

[build-system]
requires = ["hatchling>=1.8.0"]
build-backend = "hatchling.build"
[project]
name = "haystack-ai"
dynamic = ["version"]
description = "LLM framework to build customizable, production-ready LLM applications. Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data."
readme = "README.md"
license = "Apache-2.0"
requires-python = ">=3.9"
authors = [{ name = "deepset.ai", email = "malte.pietsch@deepset.ai" }]
keywords = [
"BERT",
"QA",
"Question-Answering",
"Reader",
"Retriever",
"albert",
"language-model",
"mrc",
"roberta",
"search",
"semantic-search",
"squad",
"transfer-learning",
"transformer",
]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Science/Research",
"License :: Freely Distributable",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
]
dependencies = [
"tqdm",
"tenacity!=8.4.0",
"lazy-imports",
"openai>=1.56.1",
"pydantic",
"Jinja2",
"posthog!=3.12.0", # telemetry # 3.12.0 was problematic https://github.com/PostHog/posthog-python/issues/187
"pyyaml",
"more-itertools", # TextDocumentSplitter
"networkx", # Pipeline graphs
"typing_extensions>=4.7", # typing support for Python 3.9
"requests",
"numpy",
"python-dateutil",
"jsonschema", # JsonSchemaValidator, Tool
"docstring-parser", # ComponentTool
"haystack-experimental",
]
[tool.hatch.envs.default]
installer = "uv"
dependencies = [
"pre-commit",
"ruff",
"toml",
"reno",
# dulwich is a reno dependency, they pin it at >=0.15.0 so pip takes ton of time to resolve the dependency tree.
# We pin it here to avoid taking too much time.
# https://opendev.org/openstack/reno/src/branch/master/requirements.txt#L7
"dulwich>=0.21.0,<1.0.0",
]
[tool.hatch.envs.default.scripts]
release-note = "reno new {args}"
fmt = "ruff check --fix {args} && ruff format {args}"
fmt-check = "ruff check {args} && ruff format --check {args}"
[tool.hatch.envs.test]
# we override dependencies from the default environment
dependencies = [
"numpy>=2", # Haystack is compatible both with numpy 1.x and 2.x, but we test with 2.x
"numba>=0.54.0", # This pin helps uv resolve the dependency tree. See https://github.com/astral-sh/uv/issues/7881
"pandas", # AzureOCRDocumentConverter, CSVDocumentCleaner, CSVDocumentSplitter,
# EvaluationRunResult, XLSXToDocument, and pipeline tests
"transformers[torch, sentencepiece]>=4.52.4,<4.53", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators...
"huggingface_hub>=0.27.0", # Hugging Face API Generators and Embedders
"sentence-transformers>=4.1.0", # Sentence Transformers Embedders, Rankers, and SASEvaluator
"langdetect", # TextLanguageRouter and DocumentLanguageClassifier
"openai-whisper>=20231106", # LocalWhisperTranscriber
"arrow>=1.3.0", # Jinja2TimeExtension
# Converters
"pypdf", # PyPDFToDocument
"pdfminer.six", # PDFMinerToDocument
"markdown-it-py", # MarkdownToDocument
"mdit_plain", # MarkdownToDocument
"tika", # TikaDocumentConverter
"azure-ai-formrecognizer>=3.2.0b2", # AzureOCRDocumentConverter
"trafilatura", # HTMLToDocument
"python-pptx", # PPTXToDocument
"python-docx", # DocxToDocument
"jq", # JSONConverter
"openpyxl", # XLSXToDocument
"tabulate", # XLSXToDocument
"python-oxmsg", # MSGToDocument
"nltk>=3.9.1", # NLTKDocumentSplitter, RecursiveDocumentSplitter
"tiktoken", # RecursiveDocumentSplitter
# OpenAPI
"jsonref", # OpenAPIServiceConnector, OpenAPIServiceToFunctions
"openapi3",
"openapi-llm>=0.4.1", # OpenAPIConnector
# Tracing
"opentelemetry-sdk",
"ddtrace",
# Structured logging
"structlog",
# needed in link content fetcher tests
"httpx[http2]",
# Azure Utils
"azure-identity",
# Test
"pytest",
"pytest-bdd",
"pytest-cov",
"pytest-asyncio",
"pytest-rerunfailures",
"coverage",
"mypy",
"pip", # mypy needs pip to install missing stub packages
"pylint",
"ipython",
]
[tool.hatch.envs.test.scripts]
unit = 'pytest --cov-report xml:coverage.xml --cov="haystack" -m "not integration" {args:test}'
integration = 'pytest --maxfail=5 -m "integration" {args:test}'
integration-only-fast = 'pytest --maxfail=5 -m "integration and not slow" {args:test}'
integration-only-slow = 'pytest --maxfail=5 -m "integration and slow" {args:test}'
all = 'pytest {args:test}'
types = "mypy --install-types --non-interactive --cache-dir=.mypy_cache/ {args:haystack}"
lint = "pylint -ry -j 0 {args:haystack}"
[tool.hatch.envs.e2e]
template = "test"
extra-dependencies = [
# NamedEntityExtractor
"spacy>=3.8,<3.9",
"en-core-web-trf @ https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.8.0/en_core_web_trf-3.8.0-py3-none-any.whl",
# spacy requires thinc, which depends on blis. We pin blis because version 1.2.1 does not have wheels for python 3.9
# and compiling it from source takes much time.
"blis<1.2.1; python_version < '3.10'",
]
[tool.hatch.envs.e2e.scripts]
test = "pytest e2e"
[tool.hatch.envs.readme]
installer = "uv"
detached = true # To avoid installing the dependencies from the default environment
dependencies = ["haystack-pydoc-tools"]
[tool.hatch.envs.readme.scripts]
sync = "./.github/utils/pydoc-markdown.sh {args}"
delete-outdated = "python ./.github/utils/delete_outdated_docs.py {args}"
[project.urls]
"CI: GitHub" = "https://github.com/deepset-ai/haystack/actions"
"Docs: RTD" = "https://haystack.deepset.ai/overview/intro"
"GitHub: issues" = "https://github.com/deepset-ai/haystack/issues"
"GitHub: repo" = "https://github.com/deepset-ai/haystack"
Homepage = "https://github.com/deepset-ai/haystack"
[tool.hatch.version]
path = "VERSION.txt"
pattern = "(?P<version>.+)"
[tool.hatch.metadata]
allow-direct-references = true
[tool.hatch.build.targets.sdist]
include = ["/haystack", "/VERSION.txt"]
[tool.hatch.build.targets.wheel]
packages = ["haystack"]
[tool.codespell]
ignore-words-list = "ans,astroid,nd,ned,nin,ue,rouge,ist, Claus"
quiet-level = 3
skip = "./test,./e2e"
[tool.pylint.'MESSAGES CONTROL']
max-line-length = 120
disable = [
# To keep
"fixme",
"c-extension-no-member",
# we re-export symbols for correct type checking
# https://typing.python.org/en/latest/spec/distributing.html#import-conventions
"useless-import-alias",
# To review:
"missing-docstring",
"unused-argument",
"line-too-long",
"protected-access",
"too-few-public-methods",
"raise-missing-from",
"invalid-name",
"duplicate-code",
"consider-using-f-string",
"no-else-return",
"attribute-defined-outside-init",
"super-with-arguments",
"redefined-builtin",
"unspecified-encoding",
"no-name-in-module",
"redefined-outer-name",
"arguments-renamed",
"unnecessary-pass",
"broad-except",
"consider-iterating-dictionary",
"consider-using-in",
"bare-except",
"unexpected-keyword-arg",
"broad-exception-raised",
"cyclic-import",
"import-outside-toplevel",
]
[tool.pylint.'DESIGN']
max-args = 20 # Default is 5
max-attributes = 22 # Default is 7
max-branches = 22 # Default is 12
max-locals = 39 # Default is 15
max-module-lines = 1500 # Default is 1000
max-nested-blocks = 6 # Default is 5
max-statements = 102 # Default is 50
[tool.pylint.'SIMILARITIES']
min-similarity-lines = 6
[tool.pytest.ini_options]
minversion = "6.0"
addopts = "--strict-markers"
markers = [
"unit: unit tests",
"integration: integration tests",
# integration tests that are slow (e.g. model inference on CPU), unstable (e.g. call unstable external services)
# or require special setup (e.g. installing system dependencies, running Docker containers)
"slow: slow/unstable integration tests",
]
log_cli = true
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "class"
[tool.mypy]
python_version = "3.9"
disallow_incomplete_defs = true
warn_return_any = false
ignore_missing_imports = true
check_untyped_defs = true
[[tool.mypy.overrides]]
# TODO: Fix component typings
module = ["haystack.components.*", "haystack.testing.*"]
disallow_incomplete_defs = false
[tool.ruff]
line-length = 120
exclude = [".github", "proposals"]
[tool.ruff.format]
skip-magic-trailing-comma = true
[tool.ruff.lint]
isort.split-on-trailing-comma = false
exclude = ["test/**", "e2e/**"]
select = [
"ASYNC", # flake8-async
"C4", # flake8-comprehensions
"C90", # McCabe cyclomatic complexity
"E501", # Long lines
"EXE", # flake8-executable
"F", # Pyflakes
"INT", # flake8-gettext
"PERF", # Perflint
"PL", # Pylint
"Q", # flake8-quotes
"SIM", # flake8-simplify
"SLOT", # flake8-slots
"T10", # flake8-debugger
"W", # pycodestyle
"YTT", # flake8-2020
"I", # isort
# built-in shadowing
"A001", # builtin-variable-shadowing
"A002", # builtin-argument-shadowing
"A003", # builtin-attribute-shadowing
# docstring rules
"D102", # Missing docstring in public method
"D103", # Missing docstring in public function
"D209", # Closing triple quotes go to new line
"D205", # 1 blank line required between summary line and description
"D213", # summary lines must be positioned on the second physical line of the docstring
"D417", # undocumented-parameter
"D419", # undocumented-returns
]
ignore = [
"F401", # unused-import
"PERF203", # `try`-`except` within a loop incurs performance overhead
"PERF401", # Use a list comprehension to create a transformed list
"PLR1714", # repeated-equality-comparison
"PLW0603", # global-statement
"PLW2901", # redefined-loop-name
"SIM108", # if-else-block-instead-of-if-exp
"SIM118", # in-dict-keys
# we re-export symbols for correct type checking
# https://typing.python.org/en/latest/spec/distributing.html#import-conventions
"PLC0414", # useless-import-alias,
"PLC0415", # import-outside-top-level,
]
[tool.ruff.lint.mccabe]
max-complexity = 20
[tool.ruff.lint.pylint]
allow-magic-value-types = ["float", "int", "str"]
max-args = 13 # Default is 5
max-branches = 21 # Default is 12
max-public-methods = 20 # Default is 20
max-returns = 7 # Default is 6
max-statements = 56 # Default is 50
[tool.coverage.run]
omit = ["haystack/testing/*"]