2022-01-26 18:12:55 +01:00
[metadata]
name = farm-haystack
version = file: VERSION.txt
url = https://github.com/deepset-ai/haystack
project_urls =
Docs: RTD = https://haystack.deepset.ai/overview/intro
CI: GitHub = https://github.com/deepset-ai/haystack/actions
GitHub: issues = https://github.com/deepset-ai/haystack/issues
GitHub: repo = https://github.com/deepset-ai/haystack
description = Neural Question Answering & Semantic Search at Scale. Use modern transformer based models like BERT to find answers in large document collections
long_description = file: README.md
long_description_content_type = text/markdown
keywords =
2022-06-27 15:13:34 +02:00
QA
Question-Answering
Reader
Retriever
semantic-search
search
BERT
roberta
albert
squad
mrc
transfer-learning
language-model
2022-01-26 18:12:55 +01:00
transformer
author = deepset.ai
author_email = malte.pietsch@deepset.ai
license = Apache License 2.0
license_file = LICENSE
platforms = any
classifiers =
Development Status :: 5 - Production/Stable
Intended Audience :: Science/Research
License :: Freely Distributable
License :: OSI Approved :: Apache Software License
Topic :: Scientific/Engineering :: Artificial Intelligence
Operating System :: OS Independent
Programming Language :: Python
Programming Language :: Python :: 3
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
[options]
use_scm_version = True
2022-02-02 16:14:12 +01:00
python_requires = >=3.7, <4
2022-01-26 18:12:55 +01:00
packages = find:
2022-01-28 17:21:23 +01:00
setup_requires =
setuptools
wheel
2022-01-26 18:12:55 +01:00
install_requires =
importlib-metadata; python_version < '3.8'
2022-07-01 20:23:32 +02:00
torch>1.9,<1.13
2022-01-26 18:12:55 +01:00
requests
pydantic
2022-06-27 11:56:58 +02:00
transformers = =4.20.1
2022-01-26 18:12:55 +01:00
nltk
pandas
# Utils
dill # pickle extension for (de-)serialization
tqdm # progress bars in model download and training scripts
networkx # graphs library
mmh3 # fast hashing function (murmurhash3)
quantulum3 # quantities extraction from text
2022-03-21 11:58:51 +01:00
posthog # telemetry
2022-01-26 18:12:55 +01:00
azure-ai-formrecognizer = =3.2.0b2 # forms reader
2022-03-07 19:25:33 +01:00
# azure-core is a dependency of azure-ai-formrecognizer
2022-06-27 15:13:34 +02:00
# In order to stop malicious pip backtracking during pip install farm-haystack[all] documented in https://github.com/deepset-ai/haystack/issues/2280
2022-03-07 19:25:33 +01:00
# we have to resolve a dependency version conflict ourself.
# azure-core>=1.23 conflicts with pydoc-markdown's dependency on databind>=1.5.0 which itself requires typing-extensions<4.0.0
# azure-core>=1.23 needs typing-extensions>=4.0.1
# pip unfortunately backtracks into the databind direction ultimately getting lost.
azure-core<1.23
2022-07-04 15:39:56 +02:00
# audio's espnet-model-zoo requires huggingface-hub version <0.8 while we need >=0.5 to be able to use create_repo in FARMReader
huggingface-hub<0.8.0,> = 0.5.0
2022-01-26 18:12:55 +01:00
# Preprocessing
more_itertools # for windowing
python-docx
langdetect # for PDF conversions
tika # Apache Tika (text & metadata extractor)
# See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder
2022-04-11 19:07:32 +02:00
sentence-transformers> = 2.2.0
2022-01-26 18:12:55 +01:00
# for stats in run_classifier
2022-06-27 15:13:34 +02:00
scipy> = 1.3.2
2022-01-26 18:12:55 +01:00
scikit-learn> = 1.0.0
# Metrics and logging
seqeval
2022-04-25 20:14:48 +02:00
mlflow
2022-01-26 18:12:55 +01:00
# Elasticsearch
2022-06-27 15:13:34 +02:00
elasticsearch> = 7.7,<7.11
2022-01-26 18:12:55 +01:00
elastic-apm
2022-03-21 10:35:12 +01:00
# context matching
2022-07-04 13:53:39 +02:00
rapidfuzz> = 2.0.15,<3
2022-07-26 15:02:15 +02:00
2022-03-15 17:18:55 +01:00
# Schema validation
jsonschema
2022-01-26 18:12:55 +01:00
[options.packages.find]
exclude =
rest_api*
test*
tutorials*
ui*
2022-03-15 17:33:01 +01:00
[options.package_data]
2022-06-27 15:13:34 +02:00
haystack =
2022-03-15 17:33:01 +01:00
json-schemas/*.schema.json
2022-01-26 18:12:55 +01:00
[options.extras_require]
2022-06-27 15:13:34 +02:00
sql =
2022-02-09 16:32:52 +01:00
sqlalchemy> = 1.4.2,<2
2022-01-26 18:12:55 +01:00
sqlalchemy_utils
2022-04-13 19:06:04 +02:00
psycopg2-binary; platform_system ! = 'Windows'
2022-06-27 15:13:34 +02:00
only-faiss =
2022-02-09 16:32:52 +01:00
faiss-cpu> = 1.6.3,<2
2022-06-27 15:13:34 +02:00
faiss =
2022-01-26 18:12:55 +01:00
farm-haystack[sql,only-faiss]
2022-06-27 15:13:34 +02:00
only-faiss-gpu =
2022-02-09 16:32:52 +01:00
faiss-gpu> = 1.6.3,<2
2022-06-27 15:13:34 +02:00
faiss-gpu =
2022-01-26 18:12:55 +01:00
farm-haystack[sql,only-faiss-gpu]
2022-06-27 15:13:34 +02:00
only-milvus1 =
2022-01-26 18:12:55 +01:00
pymilvus<2.0.0 # Refer milvus version support matrix at https://github.com/milvus-io/pymilvus#install-pymilvus
2022-06-27 15:13:34 +02:00
milvus1 =
2022-01-26 18:12:55 +01:00
farm-haystack[sql,only-milvus1]
2022-06-27 15:13:34 +02:00
only-milvus =
2022-02-09 16:32:52 +01:00
pymilvus> = 2.0.0,<3 # Refer milvus version support matrix at https://github.com/milvus-io/pymilvus#install-pymilvus
2022-06-27 15:13:34 +02:00
milvus =
2022-01-26 18:12:55 +01:00
farm-haystack[sql,only-milvus]
weaviate =
2022-07-27 04:07:13 -04:00
weaviate-client = =3.6.0
2022-06-27 15:13:34 +02:00
only-pinecone =
2022-03-21 22:24:09 +07:00
pinecone-client
pinecone =
farm-haystack[sql,only-pinecone]
2022-06-27 15:13:34 +02:00
graphdb =
SPARQLWrapper
inmemorygraph =
2022-01-26 18:12:55 +01:00
SPARQLWrapper
docstores =
2022-06-22 19:16:33 +02:00
farm-haystack[faiss,milvus,weaviate,graphdb,inmemorygraph,pinecone]
2022-01-26 18:12:55 +01:00
docstores-gpu =
2022-06-22 19:16:33 +02:00
farm-haystack[faiss-gpu,milvus,weaviate,graphdb,inmemorygraph,pinecone]
2022-06-15 10:13:18 +02:00
2022-06-27 15:13:34 +02:00
audio =
2022-07-27 14:42:26 -03:00
pyworld< = 0.2.12; python_version >= '3.10'
2022-06-15 10:13:18 +02:00
espnet
espnet-model-zoo
pydub
2022-06-27 15:13:34 +02:00
beir =
2022-06-15 10:13:18 +02:00
beir; platform_system ! = 'Windows'
2022-06-27 15:13:34 +02:00
crawler =
2022-06-07 09:23:03 +02:00
selenium ! = 4.1.4 # due to https://github.com/SeleniumHQ/selenium/issues/10612
2022-01-26 18:12:55 +01:00
webdriver-manager
2022-06-27 15:13:34 +02:00
preprocessing =
2022-01-26 18:12:55 +01:00
beautifulsoup4
markdown
2022-04-14 16:08:55 +02:00
python-magic; platform_system ! = 'Windows' # Depends on libmagic: https://pypi.org/project/python-magic/
python-magic-bin; platform_system = = 'Windows' # Needs to be installed without python-magic, otherwise Windows CI gets stuck.
2022-06-27 15:13:34 +02:00
ocr =
2022-01-26 18:12:55 +01:00
pytesseract = =0.3.7
pillow
pdf2image = =1.14.0
2022-06-27 15:13:34 +02:00
onnx =
2022-01-26 18:12:55 +01:00
onnxruntime
onnxruntime_tools
2022-06-27 15:13:34 +02:00
onnx-gpu =
2022-01-26 18:12:55 +01:00
onnxruntime-gpu
onnxruntime_tools
2022-06-27 15:13:34 +02:00
ray =
2022-05-17 10:55:16 +02:00
ray> = 1.9.1,<2; platform_system != 'Windows'
ray> = 1.9.1,<2,!=1.12.0; platform_system == 'Windows' # Avoid 1.12.0 due to https://github.com/ray-project/ray/issues/24169 (fails on windows)
2022-02-09 16:32:52 +01:00
aiorwlock> = 1.3.0,<2
2022-06-15 10:13:18 +02:00
2022-06-27 15:13:34 +02:00
colab =
2022-01-26 18:12:55 +01:00
grpcio = =1.43.0
2022-06-27 15:13:34 +02:00
dev =
2022-07-26 15:02:15 +02:00
pre-commit
2022-02-04 15:45:09 +01:00
# Type check
2022-01-26 18:12:55 +01:00
mypy
2022-02-09 18:27:12 +01:00
typing_extensions; python_version < '3.8'
2022-02-04 15:45:09 +01:00
# Test
2022-01-26 18:12:55 +01:00
pytest
2022-06-07 09:23:03 +02:00
pytest-custom_exit_code # used in the CI
2022-01-26 18:12:55 +01:00
responses
tox
coverage
python-multipart
psutil
2022-02-04 15:45:09 +01:00
# Linting
2022-01-26 18:12:55 +01:00
pylint
2022-02-04 15:45:09 +01:00
# Code formatting
2022-07-26 15:02:15 +02:00
black[jupyter] = =22.6.0
2022-02-04 15:45:09 +01:00
# Documentation
2022-03-07 19:25:33 +01:00
pydoc-markdown = =4.5.1 # FIXME Unpin!
2022-06-27 15:13:34 +02:00
mkdocs
jupytercontrib
2022-02-04 15:45:09 +01:00
watchdog # = =1.0.2
2022-03-31 12:36:45 +02:00
requests-cache
2022-06-27 15:13:34 +02:00
test =
2022-06-15 10:13:18 +02:00
farm-haystack[docstores,audio,crawler,preprocessing,ocr,ray,dev]
2022-01-26 18:12:55 +01:00
all =
2022-06-15 10:13:18 +02:00
farm-haystack[docstores,audio,crawler,preprocessing,ocr,ray,dev,onnx,beir]
2022-01-26 18:12:55 +01:00
all-gpu =
2022-06-15 10:13:18 +02:00
farm-haystack[docstores-gpu,audio,crawler,preprocessing,ocr,ray,dev,onnx-gpu,beir]
2022-01-26 18:12:55 +01:00
[tool:pytest]
2022-06-27 15:13:34 +02:00
testpaths =
2022-01-26 18:12:55 +01:00
test
rest_api/test
ui/test
python_files =
test_*.py
addopts =
-vv
[mypy]
warn_return_any = false
warn_unused_configs = true
ignore_missing_imports = true
plugins = pydantic.mypy
[tox]
requires = tox-venv
setuptools > = 30.0.0
envlist = py36,py37
[testenv]
changedir = test
deps =
coverage
pytest
pandas
setenv =
COVERAGE_FILE = test-reports/.coverage
PYTEST_ADDOPTS = --junitxml=test-reports/{envname}/junit.xml -vv
commands =
coverage run --source haystack --parallel-mode -m pytest {posargs}
coverage combine
coverage report -m
coverage html -d test-reports/coverage-html
coverage xml -o test-reports/coverage.xml