mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-06-26 22:00:13 +00:00

* Add draft of the Excel To Document converter * Add license header * Add release note * Use Union instead of pipe * Add openpyxl as additional dep * Fix zip issue * few updates from Bijay * Update deps * Add markdown test * Adding more example excels and expanding tests * Added more tests * Fix windows test by setting lineterminator * Addressing PR comments * PR comments * Fix linting
355 lines
10 KiB
TOML
355 lines
10 KiB
TOML
[build-system]
|
|
requires = ["hatchling>=1.8.0"]
|
|
build-backend = "hatchling.build"
|
|
|
|
[project]
|
|
name = "haystack-ai"
|
|
dynamic = ["version"]
|
|
description = "LLM framework to build customizable, production-ready LLM applications. Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data."
|
|
readme = "README.md"
|
|
license = "Apache-2.0"
|
|
requires-python = ">=3.8,<3.13"
|
|
authors = [{ name = "deepset.ai", email = "malte.pietsch@deepset.ai" }]
|
|
keywords = [
|
|
"BERT",
|
|
"QA",
|
|
"Question-Answering",
|
|
"Reader",
|
|
"Retriever",
|
|
"albert",
|
|
"language-model",
|
|
"mrc",
|
|
"roberta",
|
|
"search",
|
|
"semantic-search",
|
|
"squad",
|
|
"transfer-learning",
|
|
"transformer",
|
|
]
|
|
classifiers = [
|
|
"Development Status :: 5 - Production/Stable",
|
|
"Intended Audience :: Science/Research",
|
|
"License :: Freely Distributable",
|
|
"License :: OSI Approved :: Apache Software License",
|
|
"Operating System :: OS Independent",
|
|
"Programming Language :: Python",
|
|
"Programming Language :: Python :: 3",
|
|
"Programming Language :: Python :: 3.8",
|
|
"Programming Language :: Python :: 3.9",
|
|
"Programming Language :: Python :: 3.10",
|
|
"Programming Language :: Python :: 3.11",
|
|
"Programming Language :: Python :: 3.12",
|
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
]
|
|
dependencies = [
|
|
"pandas",
|
|
"tqdm",
|
|
"tenacity!=8.4.0",
|
|
"lazy-imports",
|
|
"openai>=1.56.1",
|
|
"pydantic",
|
|
"Jinja2",
|
|
"posthog", # telemetry
|
|
"pyyaml",
|
|
"more-itertools", # TextDocumentSplitter
|
|
"networkx", # Pipeline graphs
|
|
"typing_extensions>=4.7", # typing support for Python 3.8
|
|
"requests",
|
|
"numpy",
|
|
"python-dateutil",
|
|
"haystack-experimental",
|
|
]
|
|
|
|
[tool.hatch.envs.default]
|
|
installer = "uv"
|
|
dependencies = [
|
|
"pre-commit",
|
|
"ruff",
|
|
"toml",
|
|
"reno",
|
|
# dulwich is a reno dependency, they pin it at >=0.15.0 so pip takes ton of time to resolve the dependency tree.
|
|
# We pin it here to avoid taking too much time.
|
|
# https://opendev.org/openstack/reno/src/branch/master/requirements.txt#L7
|
|
"dulwich>=0.21.0,<1.0.0",
|
|
]
|
|
|
|
[tool.hatch.envs.default.scripts]
|
|
release-note = "reno new {args}"
|
|
check = "ruff check {args}"
|
|
fix = "ruff check --fix"
|
|
format = "ruff format {args}"
|
|
format-check = "ruff format --check {args}"
|
|
|
|
[tool.hatch.envs.test]
|
|
extra-dependencies = [
|
|
"numpy>=2", # Haystack is compatible both with numpy 1.x and 2.x, but we test with 2.x
|
|
|
|
"transformers[torch,sentencepiece]==4.44.2", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators...
|
|
"huggingface_hub>=0.27.0", # Hugging Face API Generators and Embedders
|
|
"sentence-transformers>=3.0.0", # SentenceTransformersTextEmbedder and SentenceTransformersDocumentEmbedder
|
|
"langdetect", # TextLanguageRouter and DocumentLanguageClassifier
|
|
"openai-whisper>=20231106", # LocalWhisperTranscriber
|
|
"arrow>=1.3.0", # Jinja2TimeExtension
|
|
|
|
# NamedEntityExtractor
|
|
"spacy>=3.8,<3.9",
|
|
"spacy-curated-transformers>=0.2,<=0.3",
|
|
"en-core-web-trf @ https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.8.0/en_core_web_trf-3.8.0-py3-none-any.whl",
|
|
|
|
# Converters
|
|
"pypdf", # PyPDFToDocument
|
|
"pdfminer.six", # PDFMinerToDocument
|
|
"markdown-it-py", # MarkdownToDocument
|
|
"mdit_plain", # MarkdownToDocument
|
|
"tika", # TikaDocumentConverter
|
|
"azure-ai-formrecognizer>=3.2.0b2", # AzureOCRDocumentConverter
|
|
"trafilatura", # HTMLToDocument
|
|
"python-pptx", # PPTXToDocument
|
|
"python-docx", # DocxToDocument
|
|
"jq", # JSONConverter
|
|
"openpyxl", # XLSXToDocument
|
|
"tabulate", # XLSXToDocument
|
|
|
|
"nltk", # NLTKDocumentSplitter
|
|
|
|
# OpenAPI
|
|
"jsonref", # OpenAPIServiceConnector, OpenAPIServiceToFunctions
|
|
"openapi3",
|
|
|
|
# JsonSchemaValidator, Tool
|
|
"jsonschema",
|
|
|
|
# Tracing
|
|
"opentelemetry-sdk",
|
|
"ddtrace",
|
|
|
|
# Structured logging
|
|
"structlog",
|
|
|
|
# Test
|
|
"pytest",
|
|
"pytest-bdd",
|
|
"pytest-cov",
|
|
"pytest-custom_exit_code", # used in the CI
|
|
"pytest-asyncio",
|
|
"pytest-rerunfailures",
|
|
"responses",
|
|
"tox",
|
|
"coverage",
|
|
"python-multipart",
|
|
"psutil",
|
|
"mypy",
|
|
"pip", # mypy needs pip to install missing stub packages
|
|
"pylint",
|
|
"ipython",
|
|
]
|
|
|
|
[tool.hatch.envs.test.scripts]
|
|
e2e = "pytest e2e"
|
|
unit = 'pytest --cov-report xml:coverage.xml --cov="haystack" -m "not integration" {args:test}'
|
|
integration = 'pytest --maxfail=5 -m "integration" {args:test}'
|
|
integration-mac = 'pytest --maxfail=5 -m "integration" -k "not tika" {args:test}'
|
|
integration-windows = 'pytest --maxfail=5 -m "integration" -k "not tika" {args:test}'
|
|
types = "mypy --install-types --non-interactive --cache-dir=.mypy_cache/ {args:haystack}"
|
|
lint = "pylint -ry -j 0 {args:haystack}"
|
|
|
|
[tool.hatch.envs.readme]
|
|
installer = "uv"
|
|
detached = true # To avoid installing the dependencies from the default environment
|
|
dependencies = ["haystack-pydoc-tools"]
|
|
|
|
[tool.hatch.envs.readme.scripts]
|
|
sync = "./.github/utils/pydoc-markdown.sh"
|
|
delete-outdated = "python ./.github/utils/delete_outdated_docs.py {args}"
|
|
|
|
[project.urls]
|
|
"CI: GitHub" = "https://github.com/deepset-ai/haystack/actions"
|
|
"Docs: RTD" = "https://haystack.deepset.ai/overview/intro"
|
|
"GitHub: issues" = "https://github.com/deepset-ai/haystack/issues"
|
|
"GitHub: repo" = "https://github.com/deepset-ai/haystack"
|
|
Homepage = "https://github.com/deepset-ai/haystack"
|
|
|
|
[tool.hatch.version]
|
|
path = "VERSION.txt"
|
|
pattern = "(?P<version>.+)"
|
|
|
|
[tool.hatch.metadata]
|
|
allow-direct-references = true
|
|
|
|
[tool.hatch.build.targets.sdist]
|
|
include = ["/haystack", "/VERSION.txt"]
|
|
|
|
[tool.hatch.build.targets.wheel]
|
|
packages = ["haystack"]
|
|
|
|
[tool.codespell]
|
|
ignore-words-list = "ans,astroid,nd,ned,nin,ue,rouge,ist"
|
|
quiet-level = 3
|
|
skip = "./test,./e2e"
|
|
|
|
[tool.pylint.'MESSAGES CONTROL']
|
|
max-line-length = 120
|
|
disable = [
|
|
|
|
# To keep
|
|
"fixme",
|
|
"c-extension-no-member",
|
|
|
|
# To review:
|
|
"missing-docstring",
|
|
"unused-argument",
|
|
"no-member",
|
|
"line-too-long",
|
|
"protected-access",
|
|
"too-few-public-methods",
|
|
"raise-missing-from",
|
|
"invalid-name",
|
|
"duplicate-code",
|
|
"arguments-differ",
|
|
"consider-using-f-string",
|
|
"no-else-return",
|
|
"attribute-defined-outside-init",
|
|
"super-with-arguments",
|
|
"redefined-builtin",
|
|
"abstract-method",
|
|
"unspecified-encoding",
|
|
"unidiomatic-typecheck",
|
|
"no-name-in-module",
|
|
"consider-using-with",
|
|
"redefined-outer-name",
|
|
"arguments-renamed",
|
|
"unnecessary-pass",
|
|
"broad-except",
|
|
"unnecessary-comprehension",
|
|
"subprocess-run-check",
|
|
"singleton-comparison",
|
|
"consider-iterating-dictionary",
|
|
"undefined-loop-variable",
|
|
"consider-using-in",
|
|
"bare-except",
|
|
"unexpected-keyword-arg",
|
|
"simplifiable-if-expression",
|
|
"use-list-literal",
|
|
"broad-exception-raised",
|
|
|
|
# To review later
|
|
"cyclic-import",
|
|
"import-outside-toplevel",
|
|
"deprecated-method",
|
|
]
|
|
[tool.pylint.'DESIGN']
|
|
max-args = 38 # Default is 5
|
|
max-attributes = 28 # Default is 7
|
|
max-branches = 34 # Default is 12
|
|
max-locals = 45 # Default is 15
|
|
max-module-lines = 2468 # Default is 1000
|
|
max-nested-blocks = 9 # Default is 5
|
|
max-statements = 206 # Default is 50
|
|
|
|
[tool.pylint.'SIMILARITIES']
|
|
min-similarity-lines = 6
|
|
|
|
[tool.pytest.ini_options]
|
|
minversion = "6.0"
|
|
addopts = "--strict-markers"
|
|
markers = [
|
|
"unit: unit tests",
|
|
"integration: integration tests",
|
|
|
|
"generator: generator tests",
|
|
"summarizer: summarizer tests",
|
|
"embedding_dim: uses a document store with non-default embedding dimension (e.g @pytest.mark.embedding_dim(128))",
|
|
|
|
"tika: requires Tika container",
|
|
"parsr: requires Parsr container",
|
|
"ocr: requires Tesseract",
|
|
|
|
"elasticsearch: requires Elasticsearch container",
|
|
"weaviate: requires Weaviate container",
|
|
"pinecone: requires Pinecone credentials",
|
|
"faiss: uses FAISS",
|
|
"opensearch",
|
|
"document_store",
|
|
]
|
|
log_cli = true
|
|
|
|
[tool.mypy]
|
|
warn_return_any = false
|
|
warn_unused_configs = true
|
|
ignore_missing_imports = true
|
|
|
|
[tool.ruff]
|
|
line-length = 120
|
|
target-version = "py38"
|
|
exclude = [".github", "proposals"]
|
|
|
|
[tool.ruff.format]
|
|
skip-magic-trailing-comma = true
|
|
|
|
[tool.ruff.lint]
|
|
isort.split-on-trailing-comma = false
|
|
exclude = ["test/**", "e2e/**"]
|
|
select = [
|
|
"ASYNC", # flake8-async
|
|
"C4", # flake8-comprehensions
|
|
"C90", # McCabe cyclomatic complexity
|
|
"E501", # Long lines
|
|
"EXE", # flake8-executable
|
|
"F", # Pyflakes
|
|
"INT", # flake8-gettext
|
|
"PERF", # Perflint
|
|
"PL", # Pylint
|
|
"Q", # flake8-quotes
|
|
"SIM", # flake8-simplify
|
|
"SLOT", # flake8-slots
|
|
"T10", # flake8-debugger
|
|
"W", # pycodestyle
|
|
"YTT", # flake8-2020
|
|
"I", # isort
|
|
# built-in shadowing
|
|
"A001", # builtin-variable-shadowing
|
|
"A002", # builtin-argument-shadowing
|
|
"A003", # builtin-attribute-shadowing
|
|
# docstring rules
|
|
"D102", # Missing docstring in public method
|
|
"D103", # Missing docstring in public function
|
|
"D209", # Closing triple quotes go to new line
|
|
"D205", # 1 blank line required between summary line and description
|
|
"D213", # summary lines must be positioned on the second physical line of the docstring
|
|
"D417", # undocumented-parameter
|
|
"D419", # undocumented-returns
|
|
]
|
|
|
|
ignore = [
|
|
"F401", # unused-import
|
|
"PERF203", # `try`-`except` within a loop incurs performance overhead
|
|
"PERF401", # Use a list comprehension to create a transformed list
|
|
"PLR1714", # repeated-equality-comparison
|
|
"PLR5501", # collapsible-else-if
|
|
"PLW0603", # global-statement
|
|
"PLW1510", # subprocess-run-without-check
|
|
"PLW2901", # redefined-loop-name
|
|
"SIM108", # if-else-block-instead-of-if-exp
|
|
"SIM115", # open-file-with-context-handler
|
|
"SIM118", # in-dict-keys
|
|
]
|
|
|
|
[tool.ruff.lint.mccabe]
|
|
max-complexity = 28
|
|
|
|
[tool.ruff.lint.per-file-ignores]
|
|
"examples/basic_qa_pipeline.py" = ["C416"]
|
|
"haystack/preview/testing/document_store.py" = ["C416", "F821"]
|
|
"haystack/telemetry.py" = ["F821"]
|
|
|
|
[tool.ruff.lint.pylint]
|
|
allow-magic-value-types = ["float", "int", "str"]
|
|
max-args = 14 # Default is 5
|
|
max-branches = 21 # Default is 12
|
|
max-public-methods = 20 # Default is 20
|
|
max-returns = 7 # Default is 6
|
|
max-statements = 60 # Default is 50
|
|
|
|
[tool.coverage.run]
|
|
omit = ["haystack/testing/*"]
|