mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-01 18:29:32 +00:00
* ci: Use ruff in pre-commit to further limit complexity * Fix invalid escape sequences in Python code * Delete releasenotes/notes/ruff-4d2504d362035166.yaml
429 lines
10 KiB
TOML
429 lines
10 KiB
TOML
[build-system]
|
|
requires = [
|
|
"hatchling>=1.8.0",
|
|
]
|
|
build-backend = "hatchling.build"
|
|
|
|
[project]
|
|
name = "farm-haystack"
|
|
dynamic = [
|
|
"version",
|
|
]
|
|
description = "LLM framework to build customizable, production-ready LLM applications. Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data."
|
|
readme = "README.md"
|
|
license = "Apache-2.0"
|
|
requires-python = ">=3.8"
|
|
authors = [
|
|
{ name = "deepset.ai", email = "malte.pietsch@deepset.ai" },
|
|
]
|
|
keywords = [
|
|
"BERT",
|
|
"QA",
|
|
"Question-Answering",
|
|
"Reader",
|
|
"Retriever",
|
|
"albert",
|
|
"language-model",
|
|
"mrc",
|
|
"roberta",
|
|
"search",
|
|
"semantic-search",
|
|
"squad",
|
|
"transfer-learning",
|
|
"transformer",
|
|
]
|
|
classifiers = [
|
|
"Development Status :: 5 - Production/Stable",
|
|
"Intended Audience :: Science/Research",
|
|
"License :: Freely Distributable",
|
|
"License :: OSI Approved :: Apache Software License",
|
|
"Operating System :: OS Independent",
|
|
"Programming Language :: Python",
|
|
"Programming Language :: Python :: 3",
|
|
"Programming Language :: Python :: 3.8",
|
|
"Programming Language :: Python :: 3.9",
|
|
"Programming Language :: Python :: 3.10",
|
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
]
|
|
dependencies = [
|
|
"requests",
|
|
"pydantic<2",
|
|
"transformers==4.32.1",
|
|
"pandas",
|
|
"rank_bm25",
|
|
"scikit-learn>=1.3.0", # TF-IDF and metrics
|
|
"lazy-imports==0.3.1", # Optional imports
|
|
"prompthub-py==4.0.0",
|
|
"platformdirs",
|
|
|
|
# Utils
|
|
"tqdm", # progress bars in model download and training scripts
|
|
"networkx", # graphs library
|
|
"quantulum3", # quantities extraction from text
|
|
"posthog", # telemetry
|
|
# audio's espnet-model-zoo requires huggingface-hub version <0.8 while we need >=0.5 to be able to use create_repo in FARMReader
|
|
"tenacity", # retry decorator
|
|
"sseclient-py", # server side events for OpenAI streaming
|
|
"more_itertools", # utilities
|
|
|
|
# Web Retriever
|
|
"boilerpy3",
|
|
|
|
# Multimodal Embedder haystack/nodes/retriever/multimodal/embedder.py
|
|
"Pillow",
|
|
|
|
# OpenAI tokenizer
|
|
"tiktoken>=0.5.1",
|
|
|
|
# Schema validation
|
|
"jsonschema",
|
|
|
|
# Preview
|
|
"canals==0.8.0",
|
|
"openai",
|
|
"Jinja2",
|
|
"openai-whisper", # FIXME https://github.com/deepset-ai/haystack/issues/5731
|
|
|
|
# Agent events
|
|
"events",
|
|
|
|
"requests-cache<1.0.0",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
inference = [
|
|
"transformers[torch,sentencepiece]==4.32.1",
|
|
"sentence-transformers>=2.2.0", # See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder
|
|
"huggingface-hub>=0.5.0",
|
|
]
|
|
elasticsearch = [
|
|
"farm-haystack[elasticsearch7]",
|
|
]
|
|
elasticsearch7 = [
|
|
"elasticsearch>=7.17,<8",
|
|
"elastic_transport<8"
|
|
]
|
|
elasticsearch8 = [
|
|
"elasticsearch>=8,<9",
|
|
"elastic_transport>=8,<9"
|
|
]
|
|
sql = [
|
|
"sqlalchemy>=1.4.2,<2",
|
|
"sqlalchemy_utils",
|
|
"psycopg2-binary; platform_system != 'Windows'",
|
|
]
|
|
only-faiss = [
|
|
"faiss-cpu>=1.6.3,<=1.7.2",
|
|
]
|
|
faiss = [
|
|
"farm-haystack[sql,only-faiss]",
|
|
]
|
|
only-faiss-gpu = [
|
|
"faiss-gpu>=1.6.3,<2",
|
|
]
|
|
faiss-gpu = [
|
|
"farm-haystack[sql,only-faiss-gpu]",
|
|
]
|
|
weaviate = [
|
|
"weaviate-client>2",
|
|
]
|
|
only-pinecone = [
|
|
"pinecone-client>=2.0.11,<3",
|
|
]
|
|
pinecone = [
|
|
"farm-haystack[sql,only-pinecone]",
|
|
]
|
|
opensearch = [
|
|
"opensearch-py>=2",
|
|
]
|
|
docstores = [
|
|
"farm-haystack[elasticsearch,faiss,weaviate,pinecone,opensearch]",
|
|
]
|
|
docstores-gpu = [
|
|
"farm-haystack[elasticsearch,faiss-gpu,weaviate,pinecone,opensearch]",
|
|
]
|
|
audio = [
|
|
"openai-whisper"
|
|
]
|
|
beir = [
|
|
"beir; platform_system != 'Windows'",
|
|
]
|
|
aws = [
|
|
"boto3",
|
|
# Costraint botocore to avoid taking to much time to resolve the dependency tree.
|
|
# boto3 used to constraint it at this version more than a year ago. To avoid breaking
|
|
# people using old versions we use a similar constraint without upper bound.
|
|
# https://github.com/boto/boto3/blob/dae73bef223abbedfa7317a783070831febc0c90/setup.py#L16
|
|
"botocore>=1.27",
|
|
]
|
|
crawler = [
|
|
"selenium>=4.11.0"
|
|
]
|
|
preprocessing = [
|
|
"nltk",
|
|
"langdetect", # for language classification
|
|
]
|
|
file-conversion = [
|
|
"azure-ai-formrecognizer>=3.2.0b2", # Microsoft Azure's Form Recognizer service (text and table exctrator)
|
|
"python-docx",
|
|
"tika", # Apache Tika (text & metadata extractor)
|
|
"beautifulsoup4",
|
|
"markdown",
|
|
"python-frontmatter",
|
|
"python-magic; platform_system != 'Windows'", # Depends on libmagic: https://pypi.org/project/python-magic/
|
|
"python-magic-bin; platform_system == 'Windows'", # Needs to be installed without python-magic, otherwise Windows CI gets stuck.
|
|
]
|
|
pdf = [
|
|
"PyMuPDF>=1.18.16" , # PDF text extraction alternative to xpdf; please check AGPLv3 license
|
|
]
|
|
ocr = [
|
|
"pytesseract>0.3.7",
|
|
"pdf2image>1.14",
|
|
]
|
|
onnx = [
|
|
"onnxruntime",
|
|
"onnxruntime_tools",
|
|
]
|
|
onnx-gpu = [
|
|
"onnxruntime-gpu",
|
|
"onnxruntime_tools",
|
|
]
|
|
metrics = [ # for metrics
|
|
"scipy>=1.3.2",
|
|
"rapidfuzz>=2.0.15,<2.8.0", # FIXME https://github.com/deepset-ai/haystack/pull/3199
|
|
"seqeval",
|
|
"mlflow",
|
|
]
|
|
ray = [
|
|
"ray[serve]>=1.9.1,<2; platform_system != 'Windows'",
|
|
"ray[serve]>=1.9.1,<2,!=1.12.0; platform_system == 'Windows'", # Avoid 1.12.0 due to https://github.com/ray-project/ray/issues/24169 (fails on windows)
|
|
"aiorwlock>=1.3.0,<2",
|
|
]
|
|
colab = [
|
|
"pillow<=9.0.0",
|
|
]
|
|
dev = [
|
|
"pre-commit",
|
|
# Type check
|
|
"mypy",
|
|
# Test
|
|
"pytest",
|
|
"pytest-cov",
|
|
"pytest-custom_exit_code", # used in the CI
|
|
"pytest-asyncio",
|
|
"responses",
|
|
"tox",
|
|
"coverage",
|
|
"python-multipart",
|
|
"psutil",
|
|
# Linting
|
|
"pylint",
|
|
"farm-haystack[formatting]",
|
|
# Documentation
|
|
"pydoc-markdown",
|
|
"mkdocs",
|
|
"jupytercontrib",
|
|
"watchdog",
|
|
"toml",
|
|
"reno",
|
|
# dulwich is a reno dependency, they pin it at >=0.15.0 so pip takes ton of time to resolve the dependency tree.
|
|
# We pin it here to avoid taking too much time.
|
|
# https://opendev.org/openstack/reno/src/branch/master/requirements.txt#L7
|
|
"dulwich>=0.21.0,<1.0.0",
|
|
]
|
|
|
|
formatting = [
|
|
# Version specified following Black stability policy:
|
|
# https://black.readthedocs.io/en/stable/the_black_code_style/index.html#stability-policy
|
|
"black[jupyter]~=23.0",
|
|
]
|
|
|
|
all = [
|
|
"farm-haystack[inference,docstores,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics,aws]",
|
|
]
|
|
all-gpu = [
|
|
# beir is incompatible with faiss-gpu: https://github.com/beir-cellar/beir/issues/71
|
|
"farm-haystack[inference,docstores-gpu,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx-gpu,metrics,aws]",
|
|
]
|
|
|
|
[project.scripts]
|
|
haystack = "haystack.cli.entry_point:main"
|
|
|
|
[project.urls]
|
|
"CI: GitHub" = "https://github.com/deepset-ai/haystack/actions"
|
|
"Docs: RTD" = "https://haystack.deepset.ai/overview/intro"
|
|
"GitHub: issues" = "https://github.com/deepset-ai/haystack/issues"
|
|
"GitHub: repo" = "https://github.com/deepset-ai/haystack"
|
|
Homepage = "https://github.com/deepset-ai/haystack"
|
|
|
|
[tool.hatch.version]
|
|
path = "VERSION.txt"
|
|
pattern = "(?P<version>.+)"
|
|
|
|
[tool.hatch.metadata]
|
|
allow-direct-references = true
|
|
|
|
[tool.hatch.build.targets.sdist]
|
|
include = [
|
|
"/haystack",
|
|
"/VERSION.txt",
|
|
]
|
|
|
|
[tool.hatch.build.targets.wheel]
|
|
packages = [
|
|
"haystack",
|
|
]
|
|
|
|
[tool.black]
|
|
line-length = 120
|
|
skip_magic_trailing_comma = true # For compatibility with pydoc>=4.6, check if still needed.
|
|
|
|
[tool.codespell]
|
|
ignore-words-list = "ans,astroid,nd,ned,nin,ue"
|
|
quiet-level = 3
|
|
skip = "test/nodes/*,test/others/*,test/samples/*"
|
|
|
|
[tool.pylint.'MESSAGES CONTROL']
|
|
max-line-length=120
|
|
load-plugins = "haystack_linter"
|
|
disable = [
|
|
|
|
# To keep
|
|
"fixme",
|
|
"c-extension-no-member",
|
|
|
|
# To review:
|
|
"missing-docstring",
|
|
"unused-argument",
|
|
"no-member",
|
|
"line-too-long",
|
|
"protected-access",
|
|
"too-few-public-methods",
|
|
"raise-missing-from",
|
|
"invalid-name",
|
|
"duplicate-code",
|
|
"arguments-differ",
|
|
"consider-using-f-string",
|
|
"no-else-return",
|
|
"attribute-defined-outside-init",
|
|
"super-with-arguments",
|
|
"redefined-builtin",
|
|
"abstract-method",
|
|
"unspecified-encoding",
|
|
"unidiomatic-typecheck",
|
|
"no-name-in-module",
|
|
"consider-using-with",
|
|
"redefined-outer-name",
|
|
"arguments-renamed",
|
|
"unnecessary-pass",
|
|
"broad-except",
|
|
"unnecessary-comprehension",
|
|
"subprocess-run-check",
|
|
"singleton-comparison",
|
|
"consider-iterating-dictionary",
|
|
"undefined-loop-variable",
|
|
"consider-using-in",
|
|
"bare-except",
|
|
"unexpected-keyword-arg",
|
|
"simplifiable-if-expression",
|
|
"use-list-literal",
|
|
"broad-exception-raised",
|
|
|
|
# To review later
|
|
"cyclic-import",
|
|
"import-outside-toplevel",
|
|
"deprecated-method",
|
|
]
|
|
[tool.pylint.'DESIGN']
|
|
max-args = 38 # Default is 5
|
|
max-attributes = 27 # Default is 7
|
|
max-branches = 34 # Default is 12
|
|
max-locals = 45 # Default is 15
|
|
max-module-lines = 2468 # Default is 1000
|
|
max-nested-blocks = 7 # Default is 5
|
|
max-statements = 206 # Default is 50
|
|
[tool.pylint.'SIMILARITIES']
|
|
min-similarity-lines=6
|
|
|
|
[tool.pytest.ini_options]
|
|
minversion = "6.0"
|
|
addopts = "--strict-markers"
|
|
markers = [
|
|
"unit: unit tests",
|
|
"integration: integration tests",
|
|
|
|
"generator: generator tests",
|
|
"summarizer: summarizer tests",
|
|
"embedding_dim: uses a document store with non-default embedding dimension (e.g @pytest.mark.embedding_dim(128))",
|
|
|
|
"tika: requires Tika container",
|
|
"parsr: requires Parsr container",
|
|
"ocr: requires Tesseract",
|
|
|
|
"elasticsearch: requires Elasticsearch container",
|
|
"weaviate: requires Weaviate container",
|
|
"pinecone: requires Pinecone credentials",
|
|
"faiss: uses FAISS",
|
|
"opensearch",
|
|
"document_store",
|
|
]
|
|
log_cli = true
|
|
|
|
[tool.mypy]
|
|
warn_return_any = false
|
|
warn_unused_configs = true
|
|
ignore_missing_imports = true
|
|
plugins = [
|
|
"pydantic.mypy",
|
|
]
|
|
|
|
[tool.ruff]
|
|
select = [
|
|
"AIR", # Airflow
|
|
"ASYNC", # flake8-async
|
|
"C90", # McCabe cyclomatic complexity
|
|
"CPY", # flake8-copyright
|
|
"DJ", # flake8-django
|
|
"E501", # Long lines
|
|
"EXE", # flake8-executable
|
|
"FURB", # refurb
|
|
"INT", # flake8-gettext
|
|
"PL", # Pylint
|
|
"Q", # flake8-quotes
|
|
"SLOT", # flake8-slots
|
|
"T10", # flake8-debugger
|
|
"W", # pycodestyle
|
|
"YTT", # flake8-2020
|
|
# "E", # pycodestyle
|
|
# "F", # Pyflakes
|
|
# "NPY", # NumPy-specific rules
|
|
# "PD", # pandas-vet
|
|
# "PERF", # Perflint
|
|
# "PT", # flake8-pytest-style
|
|
# "UP", # pyupgrade
|
|
]
|
|
line-length = 1486
|
|
target-version = "py38"
|
|
ignore = [
|
|
"PLR1714", # repeated-equality-comparison
|
|
"PLR5501", # collapsible-else-if
|
|
"PLW0603", # global-statement
|
|
"PLW1510", # subprocess-run-without-check
|
|
"PLW2901", # redefined-loop-name
|
|
]
|
|
|
|
[tool.ruff.mccabe]
|
|
max-complexity = 28
|
|
|
|
[tool.ruff.pylint]
|
|
allow-magic-value-types = ["float", "int", "str"]
|
|
max-args = 38 # Default is 5
|
|
max-branches = 32 # Default is 12
|
|
max-returns = 9 # Default is 6
|
|
max-statements = 105 # Default is 50
|
|
|
|
[tool.coverage.run]
|
|
omit = [
|
|
"haystack/testing/*",
|
|
]
|