[metadata]
name = farm-haystack
version = file: VERSION.txt
url = https://github.com/deepset-ai/haystack
project_urls =
    Docs: RTD = https://haystack.deepset.ai/overview/intro
    CI: GitHub = https://github.com/deepset-ai/haystack/actions
    GitHub: issues = https://github.com/deepset-ai/haystack/issues
    GitHub: repo = https://github.com/deepset-ai/haystack
description = Neural Question Answering & Semantic Search at Scale. Use modern transformer based models like BERT to find answers in large document collections
long_description = file: README.md
long_description_content_type = text/markdown
keywords=
    QA
    Question-Answering
    Reader
    Retriever
    semantic-search
    search
    BERT
    roberta
    albert
    squad
    mrc
    transfer-learning
    language-model
    transformer
author = deepset.ai
author_email = malte.pietsch@deepset.ai
license = Apache License 2.0
license_file = LICENSE
platforms = any
classifiers =
    Development Status :: 5 - Production/Stable
    Intended Audience :: Science/Research
    License :: Freely Distributable
    License :: OSI Approved :: Apache Software License
    Topic :: Scientific/Engineering :: Artificial Intelligence
    Operating System :: OS Independent
    Programming Language :: Python
    Programming Language :: Python :: 3
    Programming Language :: Python :: 3.7
    Programming Language :: Python :: 3.8
    Programming Language :: Python :: 3.9
    Programming Language :: Python :: 3.10

[options]
use_scm_version = True
python_requires = >=3.7, <4
packages = find:
setup_requires =
    setuptools
    wheel
install_requires =
    importlib-metadata; python_version < '3.8'
    torch>1.9,<1.13
    requests
    pydantic
    transformers==4.20.1
    nltk
    pandas

    # Utils
    dill  # pickle extension for (de-)serialization
    tqdm  # progress bars in model download and training scripts
    networkx  # graphs library
    mmh3  # fast hashing function (murmurhash3)
    quantulum3  # quantities extraction from text
    posthog # telemetry
    azure-ai-formrecognizer==3.2.0b2  # forms reader
    # audio's espnet-model-zoo requires huggingface-hub version <0.8 while we need >=0.5 to be able to use create_repo in FARMReader
    huggingface-hub<0.8.0,>=0.5.0

    # Preprocessing
    more_itertools  # for windowing
    python-docx
    langdetect   # for PDF conversions
    tika  # Apache Tika (text & metadata extractor)

    # See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder
    sentence-transformers>=2.2.0

    # for stats in run_classifier
    scipy>=1.3.2
    scikit-learn>=1.0.0

    # Metrics and logging
    seqeval
    mlflow

    # Elasticsearch
    elasticsearch>=7.7,<7.11
    elastic-apm

    # context matching
    rapidfuzz>=2.0.15,<3

    # Schema validation
    jsonschema


[options.packages.find]
exclude =
    rest_api*
    test*
    tutorials*
    ui*


[options.package_data]
haystack =
    json-schemas/*.schema.json


[options.extras_require]
sql =
    sqlalchemy>=1.4.2,<2
    sqlalchemy_utils
    psycopg2-binary; platform_system != 'Windows'
only-faiss =
    faiss-cpu>=1.6.3,<2
faiss =
    farm-haystack[sql,only-faiss]
only-faiss-gpu =
    faiss-gpu>=1.6.3,<2
faiss-gpu =
    farm-haystack[sql,only-faiss-gpu]
only-milvus1 =
    pymilvus<2.0.0  # Refer milvus version support matrix at https://github.com/milvus-io/pymilvus#install-pymilvus
milvus1 =
    farm-haystack[sql,only-milvus1]
only-milvus =
    pymilvus>=2.0.0,<3  # Refer milvus version support matrix at https://github.com/milvus-io/pymilvus#install-pymilvus
milvus =
    farm-haystack[sql,only-milvus]
weaviate =
    weaviate-client==3.6.0
only-pinecone =
    pinecone-client
pinecone =
    farm-haystack[sql,only-pinecone]
graphdb =
    SPARQLWrapper
inmemorygraph =
    SPARQLWrapper
opensearch =
    opensearch-py>=2
docstores =
    farm-haystack[faiss,milvus,weaviate,graphdb,inmemorygraph,pinecone,opensearch]
docstores-gpu =
    farm-haystack[faiss-gpu,milvus,weaviate,graphdb,inmemorygraph,pinecone,opensearch]

audio =
    pyworld<=0.2.12; python_version >= '3.10'
    espnet
    espnet-model-zoo
    pydub
beir =
    beir; platform_system != 'Windows'
crawler =
    selenium>=4.0.0,!=4.1.4  # Avoid 4.1.4 due to https://github.com/SeleniumHQ/selenium/issues/10612
    webdriver-manager
preprocessing =
    beautifulsoup4
    markdown
    python-magic; platform_system != 'Windows'   # Depends on libmagic: https://pypi.org/project/python-magic/
    python-magic-bin; platform_system == 'Windows'   # Needs to be installed without python-magic, otherwise Windows CI gets stuck.
ocr =
    pytesseract==0.3.7
    pillow
    pdf2image==1.14.0
onnx =
    onnxruntime
    onnxruntime_tools
onnx-gpu =
    onnxruntime-gpu
    onnxruntime_tools
ray =
    ray>=1.9.1,<2; platform_system != 'Windows'
    ray>=1.9.1,<2,!=1.12.0; platform_system == 'Windows'  # Avoid 1.12.0 due to https://github.com/ray-project/ray/issues/24169 (fails on windows)
    aiorwlock>=1.3.0,<2

colab =
    grpcio==1.47.0
    requests>=2.25  # Needed to avoid dependency conflict with crawler https://github.com/deepset-ai/haystack/pull/2921
dev =
    pre-commit
    # Type check
    mypy
    typing_extensions; python_version < '3.8'
    # Test
    pytest
    pytest-custom_exit_code  # used in the CI
    responses
    tox
    coverage
    python-multipart
    psutil
    # Linting
    pylint
    # Code formatting
    black[jupyter]==22.6.0
    # Documentation
    pydoc-markdown==4.5.1   # FIXME Unpin!
    # azure-core is a dependency of azure-ai-formrecognizer
    # In order to stop malicious pip backtracking during pip install farm-haystack[all] documented in https://github.com/deepset-ai/haystack/issues/2280
    # we have to resolve a dependency version conflict ourself.
    # azure-core>=1.23 conflicts with pydoc-markdown's dependency on databind>=1.5.0 which itself requires typing-extensions<4.0.0
    # azure-core>=1.23 needs typing-extensions>=4.0.1
    # pip unfortunately backtracks into the databind direction ultimately getting lost.
    azure-core<1.23
    mkdocs
    jupytercontrib
    watchdog  #==1.0.2
    requests-cache
test =
    farm-haystack[docstores,audio,crawler,preprocessing,ocr,ray,dev]
all =
    farm-haystack[docstores,audio,crawler,preprocessing,ocr,ray,dev,onnx,beir]
all-gpu =
    farm-haystack[docstores-gpu,audio,crawler,preprocessing,ocr,ray,dev,onnx-gpu,beir]


[tool:pytest]
testpaths =
    test
    rest_api/test
    ui/test
python_files =
    test_*.py
addopts =
    -vv


[mypy]
warn_return_any = false
warn_unused_configs = true
ignore_missing_imports = true
plugins = pydantic.mypy


[tox]
requires = tox-venv
           setuptools >= 30.0.0
envlist = py36,py37


[testenv]
changedir = test
deps =
    coverage
    pytest
    pandas
setenv =
    COVERAGE_FILE = test-reports/.coverage
    PYTEST_ADDOPTS = --junitxml=test-reports/{envname}/junit.xml -vv
commands =
    coverage run --source haystack --parallel-mode -m pytest {posargs}
    coverage combine
    coverage report -m
    coverage html -d test-reports/coverage-html
    coverage xml -o test-reports/coverage.xml