2022-01-26 18:12:55 +01:00
[ build-system ]
requires = [
2022-08-24 03:46:21 -04:00
"hatchling>=1.8.0" ,
2022-01-26 18:12:55 +01:00
]
2022-08-24 03:46:21 -04:00
build-backend = "hatchling.build"
2022-01-26 18:12:55 +01:00
2022-08-24 03:46:21 -04:00
[ project ]
name = "farm-haystack"
dynamic = [
"version" ,
]
description = "Neural Question Answering & Semantic Search at Scale. Use modern transformer based models like BERT to find answers in large document collections"
readme = "README.md"
license = "Apache-2.0"
requires-python = ">=3.7"
authors = [
{ name = "deepset.ai" , email = "malte.pietsch@deepset.ai" } ,
]
keywords = [
"BERT" ,
"QA" ,
"Question-Answering" ,
"Reader" ,
"Retriever" ,
"albert" ,
"language-model" ,
"mrc" ,
"roberta" ,
"search" ,
"semantic-search" ,
"squad" ,
"transfer-learning" ,
"transformer" ,
]
classifiers = [
"Development Status :: 5 - Production/Stable" ,
"Intended Audience :: Science/Research" ,
"License :: Freely Distributable" ,
"License :: OSI Approved :: Apache Software License" ,
"Operating System :: OS Independent" ,
"Programming Language :: Python" ,
"Programming Language :: Python :: 3" ,
"Programming Language :: Python :: 3.7" ,
"Programming Language :: Python :: 3.8" ,
"Programming Language :: Python :: 3.9" ,
"Programming Language :: Python :: 3.10" ,
"Topic :: Scientific/Engineering :: Artificial Intelligence" ,
]
dependencies = [
"importlib-metadata; python_version < '3.8'" ,
"requests" ,
2022-09-09 06:31:37 -03:00
"pydantic" ,
2022-12-16 17:03:01 +01:00
"transformers[torch]==4.25.1" ,
2022-12-30 17:09:01 +01:00
"protobuf<=3.20.2" , # same version they use in transformers[sentencepiece]
2022-08-24 03:46:21 -04:00
"nltk" ,
"pandas" ,
2022-11-22 09:24:52 +01:00
"rank_bm25" ,
2022-08-24 03:46:21 -04:00
# Utils
"dill" , # pickle extension for (de-)serialization
"tqdm" , # progress bars in model download and training scripts
"networkx" , # graphs library
"mmh3" , # fast hashing function (murmurhash3)
"quantulum3" , # quantities extraction from text
"posthog" , # telemetry
2022-09-05 09:30:35 -03:00
"azure-ai-formrecognizer>=3.2.0b2" , # forms reader
2022-08-24 03:46:21 -04:00
# audio's espnet-model-zoo requires huggingface-hub version <0.8 while we need >=0.5 to be able to use create_repo in FARMReader
2022-09-05 09:30:35 -03:00
"huggingface-hub>=0.5.0" ,
2022-08-24 03:46:21 -04:00
# Preprocessing
"more_itertools" , # for windowing
"python-docx" ,
"langdetect" , # for PDF conversions
"tika" , # Apache Tika (text & metadata extractor)
# See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder
"sentence-transformers>=2.2.0" ,
# for stats in run_classifier
"scipy>=1.3.2" ,
"scikit-learn>=1.0.0" ,
# Metrics and logging
"seqeval" ,
"mlflow" ,
# Elasticsearch
2022-10-13 11:53:27 +02:00
"elasticsearch>=7.7,<8" ,
2022-08-24 03:46:21 -04:00
2023-01-24 12:15:49 -03:00
# OpenAI tokenizer
2023-02-16 15:46:34 -03:00
"tiktoken>=0.1.2; python_version >= '3.8' and (platform_machine == 'AMD64' or platform_machine == 'amd64' or platform_machine == 'x86_64' or (platform_machine == 'arm64' and platform_system == 'Darwin'))" ,
2023-01-24 12:15:49 -03:00
2022-08-24 03:46:21 -04:00
# context matching
2022-09-13 09:32:38 +02:00
"rapidfuzz>=2.0.15,<2.8.0" , # FIXME https://github.com/deepset-ai/haystack/pull/3199
2022-08-24 03:46:21 -04:00
# Schema validation
"jsonschema" ,
]
[ project . optional-dependencies ]
sql = [
"sqlalchemy>=1.4.2,<2" ,
"sqlalchemy_utils" ,
"psycopg2-binary; platform_system != 'Windows'" ,
]
only-faiss = [
2022-11-18 09:18:24 +01:00
"faiss-cpu>=1.6.3,<=1.7.2" ,
2022-08-24 03:46:21 -04:00
]
faiss = [
"farm-haystack[sql,only-faiss]" ,
]
only-faiss-gpu = [
"faiss-gpu>=1.6.3,<2" ,
]
faiss-gpu = [
"farm-haystack[sql,only-faiss-gpu]" ,
]
only-milvus = [
"pymilvus>=2.0.0,<3" , # Refer milvus version support matrix at https://github.com/milvus-io/pymilvus#install-pymilvus
]
milvus = [
"farm-haystack[sql,only-milvus]" ,
]
weaviate = [
2023-01-12 04:01:38 -05:00
"weaviate-client==3.10.0" ,
2022-08-24 03:46:21 -04:00
]
only-pinecone = [
2022-08-24 12:27:15 +01:00
"pinecone-client>=2.0.11,<3" ,
2022-08-24 03:46:21 -04:00
]
pinecone = [
"farm-haystack[sql,only-pinecone]" ,
]
graphdb = [
"SPARQLWrapper" ,
]
inmemorygraph = [
"SPARQLWrapper" ,
]
opensearch = [
"opensearch-py>=2" ,
]
docstores = [
"farm-haystack[faiss,milvus,weaviate,graphdb,inmemorygraph,pinecone,opensearch]" ,
]
docstores-gpu = [
"farm-haystack[faiss-gpu,milvus,weaviate,graphdb,inmemorygraph,pinecone,opensearch]" ,
]
audio = [
2022-10-20 12:28:38 +02:00
"pyworld>=0.3.1; python_version >= '3.8'" ,
"pyworld<0.3.1; python_version < '3.8'" ,
2023-02-16 13:42:17 -03:00
"ffmpeg-python==0.2.0" ,
"espnet" ,
2022-08-24 03:46:21 -04:00
"espnet-model-zoo" ,
"pydub" ,
2023-02-16 13:42:17 -03:00
"protobuf<=3.20.1" ,
"soundfile< 0.12.0" ,
"numpy<1.24" , # Keep compatibility with latest numba
2022-08-24 03:46:21 -04:00
]
beir = [
"beir; platform_system != 'Windows'" ,
]
crawler = [
"selenium>=4.0.0,!=4.1.4" , # Avoid 4.1.4 due to https://github.com/SeleniumHQ/selenium/issues/10612
"webdriver-manager" ,
]
preprocessing = [
"beautifulsoup4" ,
"markdown" ,
2023-01-26 17:15:02 +01:00
"python-frontmatter" ,
2022-08-24 03:46:21 -04:00
"python-magic; platform_system != 'Windows'" , # Depends on libmagic: https://pypi.org/project/python-magic/
"python-magic-bin; platform_system == 'Windows'" , # Needs to be installed without python-magic, otherwise Windows CI gets stuck.
]
ocr = [
2022-09-05 09:30:35 -03:00
"pytesseract>0.3.7" ,
"pdf2image>1.14" ,
2022-08-24 03:46:21 -04:00
]
onnx = [
"onnxruntime" ,
"onnxruntime_tools" ,
]
onnx-gpu = [
"onnxruntime-gpu" ,
"onnxruntime_tools" ,
]
ray = [
"ray>=1.9.1,<2; platform_system != 'Windows'" ,
"ray>=1.9.1,<2,!=1.12.0; platform_system == 'Windows'" , # Avoid 1.12.0 due to https://github.com/ray-project/ray/issues/24169 (fails on windows)
"aiorwlock>=1.3.0,<2" ,
]
colab = [
2022-11-24 18:37:57 +01:00
"pillow<=9.0.0" ,
2022-08-24 03:46:21 -04:00
]
dev = [
"pre-commit" ,
# Type check
"mypy" ,
"typing_extensions; python_version < '3.8'" ,
# Test
"pytest" ,
"pytest-custom_exit_code" , # used in the CI
2023-01-23 10:23:09 -05:00
"pytest-asyncio" ,
2022-08-24 03:46:21 -04:00
"responses" ,
"tox" ,
"coverage" ,
"python-multipart" ,
"psutil" ,
# Linting
2023-02-08 15:34:43 +01:00
"pylint" ,
"farm-haystack[formatting]" ,
2022-08-24 03:46:21 -04:00
# Documentation
2022-09-05 09:30:35 -03:00
"pydoc-markdown" ,
2022-08-24 03:46:21 -04:00
"mkdocs" ,
"jupytercontrib" ,
2022-12-12 11:22:03 +01:00
"watchdog" ,
2022-08-24 03:46:21 -04:00
"requests-cache" ,
]
2023-02-08 15:34:43 +01:00
formatting = [
# Version specified following Black stability policy:
# https://black.readthedocs.io/en/stable/the_black_code_style/index.html#stability-policy
"black[jupyter]~=23.0" ,
]
2022-08-24 03:46:21 -04:00
all = [
"farm-haystack[docstores,audio,crawler,preprocessing,ocr,ray,dev,onnx,beir]" ,
]
all-gpu = [
2022-12-06 14:56:27 +01:00
# beir is incompatible with faiss-gpu: https://github.com/beir-cellar/beir/issues/71
"farm-haystack[docstores-gpu,audio,crawler,preprocessing,ocr,ray,dev,onnx-gpu]" ,
2022-08-24 03:46:21 -04:00
]
[ project . urls ]
"CI: GitHub" = "https://github.com/deepset-ai/haystack/actions"
"Docs: RTD" = "https://haystack.deepset.ai/overview/intro"
"GitHub: issues" = "https://github.com/deepset-ai/haystack/issues"
"GitHub: repo" = "https://github.com/deepset-ai/haystack"
Homepage = "https://github.com/deepset-ai/haystack"
[ tool . hatch . version ]
path = "VERSION.txt"
pattern = "(?P<version>.+)"
[ tool . hatch . build . targets . sdist ]
include = [
"/haystack" ,
"/VERSION.txt" ,
]
[ tool . hatch . build . targets . wheel ]
packages = [
"haystack" ,
]
2022-01-26 18:12:55 +01:00
[ tool . black ]
line-length = 120
2022-03-07 19:25:33 +01:00
skip_magic_trailing_comma = true # For compatibility with pydoc>=4.6, check if still needed.
2022-01-26 18:12:55 +01:00
[ tool . pylint . 'MESSAGES CONTROL' ]
max-line-length = 120
disable = [
2022-02-21 20:16:14 +01:00
2022-05-04 17:39:06 +02:00
# To keep
2022-02-21 20:16:14 +01:00
"fixme" ,
"c-extension-no-member" ,
2022-04-12 16:41:05 +02:00
# To review:
2022-01-26 18:12:55 +01:00
"missing-docstring" ,
"unused-argument" ,
"no-member" ,
"line-too-long" ,
"protected-access" ,
"too-few-public-methods" ,
2022-02-09 18:27:12 +01:00
"raise-missing-from" ,
"invalid-name" ,
"too-many-locals" ,
"duplicate-code" ,
"too-many-arguments" ,
"arguments-differ" ,
"consider-using-f-string" ,
"no-else-return" ,
"attribute-defined-outside-init" ,
"too-many-instance-attributes" ,
"super-with-arguments" ,
"redefined-builtin" ,
"abstract-method" ,
"too-many-branches" ,
"unspecified-encoding" ,
"unidiomatic-typecheck" ,
"no-name-in-module" ,
"consider-using-with" ,
"redefined-outer-name" ,
"arguments-renamed" ,
"unnecessary-pass" ,
"broad-except" ,
"unnecessary-comprehension" ,
"subprocess-run-check" ,
"singleton-comparison" ,
"consider-iterating-dictionary" ,
"too-many-nested-blocks" ,
"undefined-loop-variable" ,
"too-many-statements" ,
"consider-using-in" ,
"bare-except" ,
"too-many-lines" ,
"unexpected-keyword-arg" ,
"simplifiable-if-expression" ,
"use-list-literal" ,
2023-02-02 11:59:17 +01:00
"broad-exception-raised" ,
2022-04-12 16:41:05 +02:00
# To review later
"cyclic-import" ,
"import-outside-toplevel" ,
2022-02-09 18:27:12 +01:00
"deprecated-method" ,
2022-01-26 18:12:55 +01:00
]
[ tool . pylint . 'DESIGN' ]
max-args = 7
[ tool . pylint . 'SIMILARITIES' ]
2022-02-08 17:23:59 +01:00
min-similarity-lines = 6
[ tool . pytest . ini_options ]
minversion = "6.0"
addopts = "--strict-markers"
markers = [
2022-08-24 03:46:21 -04:00
"unit: unit tests" ,
2022-10-17 18:58:35 +02:00
"integration: integration tests" ,
2022-11-14 09:57:30 +01:00
2022-08-24 03:46:21 -04:00
"generator: generator tests" ,
"summarizer: summarizer tests" ,
"embedding_dim: uses a document store with non-default embedding dimension (e.g @pytest.mark.embedding_dim(128))" ,
"tika: requires Tika container" ,
"parsr: requires Parsr container" ,
"ocr: requires Tesseract" ,
"elasticsearch: requires Elasticsearch container" ,
"graphdb: requires GraphDB container" ,
"weaviate: requires Weaviate container" ,
"pinecone: requires Pinecone credentials" ,
"faiss: uses FAISS" ,
"milvus: requires a Milvus 2 setup" ,
"opensearch" ,
2022-10-31 15:30:14 +01:00
"document_store" ,
2022-08-24 03:46:21 -04:00
]
log_cli = true
[ tool . mypy ]
warn_return_any = false
warn_unused_configs = true
ignore_missing_imports = true
plugins = [
"pydantic.mypy" ,
]