haystack/pyproject.toml

429 lines
10 KiB
TOML
Raw Normal View History

[build-system]
requires = [
"hatchling>=1.8.0",
]
build-backend = "hatchling.build"
[project]
name = "farm-haystack"
dynamic = [
"version",
]
2023-08-30 15:29:12 +02:00
description = "LLM framework to build customizable, production-ready LLM applications. Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data."
readme = "README.md"
license = "Apache-2.0"
requires-python = ">=3.8"
authors = [
{ name = "deepset.ai", email = "malte.pietsch@deepset.ai" },
]
keywords = [
"BERT",
"QA",
"Question-Answering",
"Reader",
"Retriever",
"albert",
"language-model",
"mrc",
"roberta",
"search",
"semantic-search",
"squad",
"transfer-learning",
"transformer",
]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Science/Research",
"License :: Freely Distributable",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
]
dependencies = [
"requests",
2023-06-30 19:44:08 +02:00
"pydantic<2",
"transformers==4.32.1",
"pandas",
"rank_bm25",
"scikit-learn>=1.3.0", # TF-IDF and metrics
"lazy-imports==0.3.1", # Optional imports
"prompthub-py==4.0.0",
"platformdirs",
# Utils
"tqdm", # progress bars in model download and training scripts
"networkx", # graphs library
"quantulum3", # quantities extraction from text
"posthog", # telemetry
# audio's espnet-model-zoo requires huggingface-hub version <0.8 while we need >=0.5 to be able to use create_repo in FARMReader
"tenacity", # retry decorator
2023-03-15 18:24:47 +01:00
"sseclient-py", # server side events for OpenAI streaming
"more_itertools", # utilities
feat: Add agent tools (#4437) * Initial commit, add search_engine * Add TopPSampler * Add more TopPSampler unit tests * Remove SearchEngineSampler (converted to TopPSampler) * Add some basic WebSearch unit tests * Rename unit tests * Add WebRetriever into agent_tools * Adjust to WebRetriever * Add WebRetriever mode [snippet|document] * Minor changes * SerperDev: add peopleAlsoAsk search results * First agent for hotpotqa * Making WebRetriever work on hotpotqa * refactor: minor WebRetriever improvements (#4377) * refactor: remove doc ids rebuild + antecipate cache * refactor: improve caching, fix Document ids * Minor WebRetriever improvements * Overlooked minor fixes * feat: add Bing API as search engine * refactor: let kwargs pass-through * feat: increase search context * check sampler result, improve batch typing * refactor: increase mypy compliance * Initial commit, add search_engine * Add TopPSampler * Add more TopPSampler unit tests * Remove SearchEngineSampler (converted to TopPSampler) * Add some basic WebSearch unit tests * Rename unit tests * Add WebRetriever into agent_tools * Adjust to WebRetriever * Add WebRetriever mode [snippet|document] * Minor changes * SerperDev: add peopleAlsoAsk search results * First agent for hotpotqa * Making WebRetriever work on hotpotqa * refactor: minor WebRetriever improvements (#4377) * refactor: remove doc ids rebuild + antecipate cache * refactor: improve caching, fix Document ids * Minor WebRetriever improvements * Overlooked minor fixes * feat: add Bing API as search engine * refactor: let kwargs pass-through * feat: increase search context * check sampler result, improve batch typing * refactor: increase mypy compliance * Fix mypy * Minor example fixes * Fix the descriptions * PR feedback updates * More fixes * TopPSampler: handle top p None value, add unit test * Add top_k to WebSearch * Use boilerpy3 instead trafilatura * Remove date finding * Add more WebRetriever docs * Refactor long methods * making the preprocessor optional * hide WebSearch and make NeuralWebSearch a pipeline * remove unused imports * add WebQAPipeline and split example into two * change example search engine to SerperDev * Turn off progress bars in WebRetriever's PreProcesssor * Agent tool examples - final updates * Add webqa test, search results ranking scores * Better answer box handling for SerperDev and SerpAPI * Minor fixes * pylint * pylint fixes * extract TopPSampler from WebRetriever * use sampler only for WebRetriever modes other than snippet * add web retriever tests * add web retriever tests * exclude rdflib@6.3.2 due to license issues * add test for preprocessed docs and kwargs examples in docstrings * Move test_webqa_pipeline to test/pipelines * change docstring for join_documents_and_scores * Use WebQAPipeline in examples/web_lfqa.py * Use WebQAPipeline in examples/web_lfqa.py * Move test_webqa_pipeline to e2e * Updated lg * Sampler added automatically in WebQAPipeline, no need to add it * Updated lg * Updated lg * :ignore Update agent tools examples to new templates (#4503) * Update examples to new templates * Add print back * fix linting and black format issues --------- Co-authored-by: Daniel Bichuetti <daniel.bichuetti@gmail.com> Co-authored-by: agnieszka-m <amarzec13@gmail.com> Co-authored-by: Julian Risch <julian.risch@deepset.ai>
2023-03-27 18:14:58 +02:00
# Web Retriever
"boilerpy3",
# Multimodal Embedder haystack/nodes/retriever/multimodal/embedder.py
"Pillow",
# OpenAI tokenizer
"tiktoken>=0.5.1",
# Schema validation
"jsonschema",
# Preview
2023-09-04 13:55:05 +01:00
"canals==0.8.0",
"openai",
"Jinja2",
"openai-whisper", # FIXME https://github.com/deepset-ai/haystack/issues/5731
# Agent events
"events",
"requests-cache<1.0.0",
]
[project.optional-dependencies]
inference = [
"transformers[torch,sentencepiece]==4.32.1",
"sentence-transformers>=2.2.0", # See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder
"huggingface-hub>=0.5.0",
]
elasticsearch = [
"farm-haystack[elasticsearch7]",
]
elasticsearch7 = [
"elasticsearch>=7.17,<8",
"elastic_transport<8"
]
elasticsearch8 = [
"elasticsearch>=8,<9",
"elastic_transport>=8,<9"
]
sql = [
"sqlalchemy>=1.4.2,<2",
"sqlalchemy_utils",
"psycopg2-binary; platform_system != 'Windows'",
]
only-faiss = [
"faiss-cpu>=1.6.3,<=1.7.2",
]
faiss = [
"farm-haystack[sql,only-faiss]",
]
only-faiss-gpu = [
"faiss-gpu>=1.6.3,<2",
]
faiss-gpu = [
"farm-haystack[sql,only-faiss-gpu]",
]
weaviate = [
"weaviate-client>2",
]
only-pinecone = [
update to PineconeDocumentStore to remove dependency on SQL db (#2749) * update to PineconeDocumentStore to remove dependency on SQL db * Update Documentation & Code Style * typing fixes * Update Documentation & Code Style * fixed embedding generator to yield Documents * Update Documentation & Code Style * fixes for final typing issues * fixes for pylint * Update Documentation & Code Style * uncomment pinecone tests * added new params to docstrings * Update Documentation & Code Style * Update Documentation & Code Style * Update haystack/document_stores/pinecone.py Co-authored-by: Sara Zan <sarazanzo94@gmail.com> * Update haystack/document_stores/pinecone.py Co-authored-by: Sara Zan <sarazanzo94@gmail.com> * Update Documentation & Code Style * Update haystack/document_stores/pinecone.py Co-authored-by: Sara Zan <sarazanzo94@gmail.com> * Update haystack/document_stores/pinecone.py Co-authored-by: Sara Zan <sarazanzo94@gmail.com> * Update haystack/document_stores/pinecone.py Co-authored-by: Sara Zan <sarazanzo94@gmail.com> * Update haystack/document_stores/pinecone.py Co-authored-by: Sara Zan <sarazanzo94@gmail.com> * changes based on comments, updated errors and install * Update Documentation & Code Style * mypy * implement simple filtering in pinecone mock * typo * typo in reverse * account for missing meta key in filtering * typo * added metadata filtering to describe index * added handling for users switching indexes in same doc store, and handling duplicate docs in write * syntax tweaks * added index option to document/embedding count calls * labels implementation in progress * added metadata fields to be indexed for pinecone tests * further changes to mock * WIP implementation of labels+multilabels * switched to rely on labels namespace rather than filter * simpler delete_labels * label fixes, remove debug code * Apply dostring fixes Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * mypy * pylint * docs * temporarily un-mock Pinecone * Small Pinecone test suite * pylint * Add fake test key to pass the None check * Add again fake test key to pass the None check * Add Pinecone to default docstores and fix filters * Fix field name * Change field name * Change field value * Remove comments * forgot to upgrade pyproject.toml Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai> Co-authored-by: Sara Zan <sarazanzo94@gmail.com> Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com>
2022-08-24 12:27:15 +01:00
"pinecone-client>=2.0.11,<3",
]
pinecone = [
"farm-haystack[sql,only-pinecone]",
]
opensearch = [
"opensearch-py>=2",
]
docstores = [
"farm-haystack[elasticsearch,faiss,weaviate,pinecone,opensearch]",
]
docstores-gpu = [
"farm-haystack[elasticsearch,faiss-gpu,weaviate,pinecone,opensearch]",
]
audio = [
"openai-whisper"
]
beir = [
"beir; platform_system != 'Windows'",
]
aws = [
"boto3",
# Costraint botocore to avoid taking to much time to resolve the dependency tree.
# boto3 used to constraint it at this version more than a year ago. To avoid breaking
# people using old versions we use a similar constraint without upper bound.
# https://github.com/boto/boto3/blob/dae73bef223abbedfa7317a783070831febc0c90/setup.py#L16
"botocore>=1.27",
]
crawler = [
"selenium>=4.11.0"
]
preprocessing = [
"nltk",
"langdetect", # for language classification
]
file-conversion = [
"azure-ai-formrecognizer>=3.2.0b2", # Microsoft Azure's Form Recognizer service (text and table exctrator)
"python-docx",
"tika", # Apache Tika (text & metadata extractor)
"beautifulsoup4",
"markdown",
"python-frontmatter",
"python-magic; platform_system != 'Windows'", # Depends on libmagic: https://pypi.org/project/python-magic/
"python-magic-bin; platform_system == 'Windows'", # Needs to be installed without python-magic, otherwise Windows CI gets stuck.
]
pdf = [
"PyMuPDF>=1.18.16" , # PDF text extraction alternative to xpdf; please check AGPLv3 license
]
ocr = [
"pytesseract>0.3.7",
"pdf2image>1.14",
]
onnx = [
"onnxruntime",
"onnxruntime_tools",
]
onnx-gpu = [
"onnxruntime-gpu",
"onnxruntime_tools",
]
metrics = [ # for metrics
"scipy>=1.3.2",
"rapidfuzz>=2.0.15,<2.8.0", # FIXME https://github.com/deepset-ai/haystack/pull/3199
"seqeval",
"mlflow",
]
ray = [
"ray[serve]>=1.9.1,<2; platform_system != 'Windows'",
"ray[serve]>=1.9.1,<2,!=1.12.0; platform_system == 'Windows'", # Avoid 1.12.0 due to https://github.com/ray-project/ray/issues/24169 (fails on windows)
"aiorwlock>=1.3.0,<2",
]
colab = [
2022-11-24 18:37:57 +01:00
"pillow<=9.0.0",
]
dev = [
"pre-commit",
# Type check
"mypy",
# Test
"pytest",
"pytest-cov",
"pytest-custom_exit_code", # used in the CI
"pytest-asyncio",
"responses",
"tox",
"coverage",
"python-multipart",
"psutil",
# Linting
"pylint",
"farm-haystack[formatting]",
# Documentation
"pydoc-markdown",
"mkdocs",
"jupytercontrib",
"watchdog",
"toml",
"reno",
# dulwich is a reno dependency, they pin it at >=0.15.0 so pip takes ton of time to resolve the dependency tree.
# We pin it here to avoid taking too much time.
# https://opendev.org/openstack/reno/src/branch/master/requirements.txt#L7
"dulwich>=0.21.0,<1.0.0",
]
formatting = [
# Version specified following Black stability policy:
# https://black.readthedocs.io/en/stable/the_black_code_style/index.html#stability-policy
"black[jupyter]~=23.0",
]
all = [
"farm-haystack[inference,docstores,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics,aws]",
]
all-gpu = [
2022-12-06 14:56:27 +01:00
# beir is incompatible with faiss-gpu: https://github.com/beir-cellar/beir/issues/71
"farm-haystack[inference,docstores-gpu,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx-gpu,metrics,aws]",
]
[project.scripts]
haystack = "haystack.cli.entry_point:main"
[project.urls]
"CI: GitHub" = "https://github.com/deepset-ai/haystack/actions"
"Docs: RTD" = "https://haystack.deepset.ai/overview/intro"
"GitHub: issues" = "https://github.com/deepset-ai/haystack/issues"
"GitHub: repo" = "https://github.com/deepset-ai/haystack"
Homepage = "https://github.com/deepset-ai/haystack"
[tool.hatch.version]
path = "VERSION.txt"
pattern = "(?P<version>.+)"
[tool.hatch.metadata]
allow-direct-references = true
[tool.hatch.build.targets.sdist]
include = [
"/haystack",
"/VERSION.txt",
]
[tool.hatch.build.targets.wheel]
packages = [
"haystack",
]
[tool.black]
line-length = 120
skip_magic_trailing_comma = true # For compatibility with pydoc>=4.6, check if still needed.
[tool.codespell]
ignore-words-list = "ans,astroid,nd,ned,nin,ue"
quiet-level = 3
skip = "test/nodes/*,test/others/*,test/samples/*"
[tool.pylint.'MESSAGES CONTROL']
max-line-length=120
load-plugins = "haystack_linter"
disable = [
Pylint: solve or silence locally rare warnings (#2170) * Remove invalid-envvar-default and logging-too-many-args * Remove import-self, access-member-before-definition and deprecated-argument * Remove used-before-assignment by restructuring type import * Remove unneeded-not * Silence unnecessary-lambda (it's necessary) * Remove pointless-string-statement * Update Documentation & Code Style * Silenced unsupported-membership-test (probably a real bug, can't fix though) * Remove trailing-newlines * Remove super-init-not-called and slience invalid-sequence-index (it's valid) * Remove invalid-envvar-default in ui * Remove some more warnings from pyproject.toml than actually solrted in code, CI will fail * Linting all modules together is more readable * Update Documentation & Code Style * Typo in pylint disable comment * Simplify long boolean statement * Simplify init call in FAISS * Fix inconsistent-return-statements * Fix useless-super-delegation * Fix useless-else-on-loop * Fix another inconsistent-return-statements * Move back pylint disable comment moved by black * Fix consider-using-set-comprehension * Fix another consider-using-set-comprehension * Silence non-parent-init-called * Update pylint exclusion list * Update Documentation & Code Style * Resolve unnecessary-else-after-break * Fix superfluous-parens * Fix no-else-break * Remove is_correctly_retrieved along with its pylint issue * Update exclusions list * Silence constructor issue in squad_data.py (method is already broken) * Fix too-many-return-statements * Fix use-dict-literal * Fix consider-using-from-import and useless-object-inheritance * Update exclusion list * Fix simplifiable-if-statements * Fix one consider-using-dict-items * Fix another consider-using-dict-items * Fix a third consider-using-dict-items * Fix last consider-using-dict-items * Fix three use-a-generator * Silence import errors on numba, tensorboardX and apex, but add comments & logs * Fix couple of mypy issues * Fix another typing issue * Silence mypy, was conflicting with more meaningful pylint issue * Fix no-else-continue * Silence unsubscriptable-object and fix an import error with importlib.metadata * Update Documentation & Code Style * Fix all no-else-raise * Update Documentation & Code Style * Fix inverted parameters in simplified if switch * Change [test] to [all] in some jobs (for typing and linting) * Add comment in haystack/schema.py on pydantic's dataclasses * Move comment from get_documents_by_id into _convert_weaviate_result_to_document in weaviate.py * Add comment on pylint silencing * Fix bug introduced rest_api/controller/search.py * Update Documentation & Code Style * Add ADR about Pydantic dataclasses * Update pydantic-dataclasses.md * Add link to Pydantic docs on Dataclasses Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2022-02-21 20:16:14 +01:00
Validate YAML files without loading the nodes (#2438) * Remove BasePipeline and make a module for RayPipeline * Can load pipelines from yaml, plenty of issues left * Extract graph validation logic into _add_node_to_pipeline_graph & refactor load_from_config and add_node to use it * Fix pipeline tests * Move some tests out of test_pipeline.py and create MockDenseRetriever * myoy and pylint (silencing too-many-public-methods) * Fix issue found in some yaml files and in schema files * Fix paths to YAML and fix some typos in Ray * Fix eval tests * Simplify MockDenseRetriever * Fix Ray test * Accidentally pushed merge coinflict, fixed * Typo in schemas * Typo in _json_schema.py * Slightly reduce noisyness of version validation warnings * Fix version logs tests * Fix version logs tests again * remove seemingly unused file * Add check and test to avoid adding the same node to the pipeline twice * Update Documentation & Code Style * Revert config to pipeline_config * Remo0ve unused import * Complete reverting to pipeline_config * Some more stray config= * Update Documentation & Code Style * Feedback * Move back other_nodes tests into pipeline tests temporarily * Update Documentation & Code Style * Fixing tests * Update Documentation & Code Style * Fixing ray and standard pipeline tests * Rename colliding load() methods in dense retrievers and faiss * Update Documentation & Code Style * Fix mypy on ray.py as well * Add check for no root node * Fix tests to use load_from_directory and load_index * Try to workaround the disabled add_node of RayPipeline * Update Documentation & Code Style * Fix Ray test * Fix FAISS tests * Relax class check in _add_node_to_pipeline_graph * Update Documentation & Code Style * Try to fix mypy in ray.py * unused import * Try another fix for Ray * Fix connector tests * Update Documentation & Code Style * Fix ray * Update Documentation & Code Style * use BaseComponent.load() in pipelines/base.py * another round of feedback * stray BaseComponent.load() * Update Documentation & Code Style * Fix FAISS tests too Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: tstadel <60758086+tstadel@users.noreply.github.com>
2022-05-04 17:39:06 +02:00
# To keep
Pylint: solve or silence locally rare warnings (#2170) * Remove invalid-envvar-default and logging-too-many-args * Remove import-self, access-member-before-definition and deprecated-argument * Remove used-before-assignment by restructuring type import * Remove unneeded-not * Silence unnecessary-lambda (it's necessary) * Remove pointless-string-statement * Update Documentation & Code Style * Silenced unsupported-membership-test (probably a real bug, can't fix though) * Remove trailing-newlines * Remove super-init-not-called and slience invalid-sequence-index (it's valid) * Remove invalid-envvar-default in ui * Remove some more warnings from pyproject.toml than actually solrted in code, CI will fail * Linting all modules together is more readable * Update Documentation & Code Style * Typo in pylint disable comment * Simplify long boolean statement * Simplify init call in FAISS * Fix inconsistent-return-statements * Fix useless-super-delegation * Fix useless-else-on-loop * Fix another inconsistent-return-statements * Move back pylint disable comment moved by black * Fix consider-using-set-comprehension * Fix another consider-using-set-comprehension * Silence non-parent-init-called * Update pylint exclusion list * Update Documentation & Code Style * Resolve unnecessary-else-after-break * Fix superfluous-parens * Fix no-else-break * Remove is_correctly_retrieved along with its pylint issue * Update exclusions list * Silence constructor issue in squad_data.py (method is already broken) * Fix too-many-return-statements * Fix use-dict-literal * Fix consider-using-from-import and useless-object-inheritance * Update exclusion list * Fix simplifiable-if-statements * Fix one consider-using-dict-items * Fix another consider-using-dict-items * Fix a third consider-using-dict-items * Fix last consider-using-dict-items * Fix three use-a-generator * Silence import errors on numba, tensorboardX and apex, but add comments & logs * Fix couple of mypy issues * Fix another typing issue * Silence mypy, was conflicting with more meaningful pylint issue * Fix no-else-continue * Silence unsubscriptable-object and fix an import error with importlib.metadata * Update Documentation & Code Style * Fix all no-else-raise * Update Documentation & Code Style * Fix inverted parameters in simplified if switch * Change [test] to [all] in some jobs (for typing and linting) * Add comment in haystack/schema.py on pydantic's dataclasses * Move comment from get_documents_by_id into _convert_weaviate_result_to_document in weaviate.py * Add comment on pylint silencing * Fix bug introduced rest_api/controller/search.py * Update Documentation & Code Style * Add ADR about Pydantic dataclasses * Update pydantic-dataclasses.md * Add link to Pydantic docs on Dataclasses Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2022-02-21 20:16:14 +01:00
"fixme",
"c-extension-no-member",
# To review:
"missing-docstring",
"unused-argument",
"no-member",
"line-too-long",
"protected-access",
"too-few-public-methods",
"raise-missing-from",
"invalid-name",
"duplicate-code",
"arguments-differ",
"consider-using-f-string",
"no-else-return",
"attribute-defined-outside-init",
"super-with-arguments",
"redefined-builtin",
"abstract-method",
"unspecified-encoding",
"unidiomatic-typecheck",
"no-name-in-module",
"consider-using-with",
"redefined-outer-name",
"arguments-renamed",
"unnecessary-pass",
"broad-except",
"unnecessary-comprehension",
"subprocess-run-check",
"singleton-comparison",
"consider-iterating-dictionary",
"undefined-loop-variable",
"consider-using-in",
"bare-except",
"unexpected-keyword-arg",
"simplifiable-if-expression",
"use-list-literal",
2023-02-02 11:59:17 +01:00
"broad-exception-raised",
# To review later
"cyclic-import",
"import-outside-toplevel",
"deprecated-method",
]
[tool.pylint.'DESIGN']
max-args = 38 # Default is 5
max-attributes = 27 # Default is 7
max-branches = 34 # Default is 12
max-locals = 45 # Default is 15
max-module-lines = 2468 # Default is 1000
max-nested-blocks = 7 # Default is 5
max-statements = 206 # Default is 50
[tool.pylint.'SIMILARITIES']
min-similarity-lines=6
[tool.pytest.ini_options]
minversion = "6.0"
addopts = "--strict-markers"
markers = [
"unit: unit tests",
feat: `MultiModalRetriever` (#2891) * Adding Data2VecVision and Data2VecText to the supported models and adapt Tokenizers accordingly * content_types * Splitting classes into respective folders * small changes * Fix EOF * eof * black * API * EOF * whitespace * api * improve multimodal similarity processor * tokenizer -> feature extractor * Making feature vectors come out of the feature extractor in the similarity head * embed_queries is now self-sufficient * couple trivial errors * Implemented separate language model classes for multimodal inference * Document embedding seems to work * removing batch_encode_plus, is deprecated anyway * Realized the base Data2Vec models are not trained on retrieval tasks * Issue with the generated embeddings * Add batching * Try to fit CLIP in * Stub of CLIP integration * Retrieval goes through but returns noise only * Still working on the scores * Introduce temporary adapter for CLIP models * Image retrieval now works with sentence-transformers * Tidying up the code * Refactoring is now functional * Add MPNet to the supported sentence transformers models * Remove unused classes * pylint * docs * docs * Remove the method renaming * mpyp first pass * docs * tutorial * schema * mypy * Move devices setup into get_model * more mypy * mypy * pylint * Move a few params in HaystackModel's init * make feature extractor work with squadprocessor * fix feature_extractor_kwargs forwarding * Forgotten part of the fix * Revert unrelated ES change * Revert unrelated memdocstore changes * comment * Small corrections * mypy and pylint * mypy * typo * mypy * Refactor the call * mypy * Do not make FARMReader use the new FeatureExtractor * mypy * Detach DPR tests from FeatureExtractor too * Detach processor tests too * Add end2end marker * extract end2end feature extractor tests * temporary disable feature extraction tests * Introduce end2end tests for tokenizer tests * pylint * Fix model loading from folder in FeatureExtractor * working o n end2end * end2end keeps failing * Restructuring retriever tests * Restructuring retriever tests * remove covert_dataset_to_dataloader * remove comment * Better check sentence-transformers models * Use embed_meta_fields properly * rename passage into document * Embedding dims can't be found * Add check for models that support it * pylint * Split all retriever tests into suites, running mostly on InMemory only * fix mypy * fix tfidf test * fix weaviate tests * Parallelize on every docstore * Fix schema and specify modality in base retriever suite * tests * Add first image tests * remove comment * Revert to simpler tests * Update docs/_src/api/api/primitives.md Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * Update haystack/modeling/model/multimodal/__init__.py Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * Apply suggestions from code review * Apply suggestions from code review Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * get_args * mypy * Update haystack/modeling/model/multimodal/__init__.py * Update haystack/modeling/model/multimodal/base.py * Update haystack/modeling/model/multimodal/base.py Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * Update haystack/modeling/model/multimodal/sentence_transformers.py * Update haystack/modeling/model/multimodal/sentence_transformers.py Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * Update haystack/modeling/model/multimodal/transformers.py * Update haystack/modeling/model/multimodal/transformers.py Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * Update haystack/modeling/model/multimodal/transformers.py Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * Update haystack/nodes/retriever/multimodal/retriever.py Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * mypy * mypy * removing more ContentTypes * more contentypes * pylint * add to __init__ * revert end2end workflow for now * missing integration markers * Update haystack/nodes/retriever/multimodal/embedder.py Co-authored-by: bogdankostic <bogdankostic@web.de> * review feedback, removing HaystackImageTransformerModel * review feedback part 2 * mypy & pylint * mypy * mypy * fix multimodal docs also for Pinecone * add note on internal constants * Fix pinecone write_documents * schemas * keep support for sentence-transformers only * fix pinecone test * schemas * fix pinecone again * temporarily disable some tests, need to understand if they're still relevant Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> Co-authored-by: bogdankostic <bogdankostic@web.de>
2022-10-17 18:58:35 +02:00
"integration: integration tests",
"generator: generator tests",
"summarizer: summarizer tests",
"embedding_dim: uses a document store with non-default embedding dimension (e.g @pytest.mark.embedding_dim(128))",
"tika: requires Tika container",
"parsr: requires Parsr container",
"ocr: requires Tesseract",
"elasticsearch: requires Elasticsearch container",
"weaviate: requires Weaviate container",
"pinecone: requires Pinecone credentials",
"faiss: uses FAISS",
"opensearch",
"document_store",
]
log_cli = true
[tool.mypy]
warn_return_any = false
warn_unused_configs = true
ignore_missing_imports = true
plugins = [
"pydantic.mypy",
]
[tool.ruff]
select = [
"AIR", # Airflow
"ASYNC", # flake8-async
"C90", # McCabe cyclomatic complexity
"CPY", # flake8-copyright
"DJ", # flake8-django
"E501", # Long lines
"EXE", # flake8-executable
"FURB", # refurb
"INT", # flake8-gettext
"PL", # Pylint
"Q", # flake8-quotes
"SLOT", # flake8-slots
"T10", # flake8-debugger
"W", # pycodestyle
"YTT", # flake8-2020
# "E", # pycodestyle
# "F", # Pyflakes
# "NPY", # NumPy-specific rules
# "PD", # pandas-vet
# "PERF", # Perflint
# "PT", # flake8-pytest-style
# "UP", # pyupgrade
]
line-length = 1486
target-version = "py38"
ignore = [
"PLR1714", # repeated-equality-comparison
"PLR5501", # collapsible-else-if
"PLW0603", # global-statement
"PLW1510", # subprocess-run-without-check
"PLW2901", # redefined-loop-name
]
[tool.ruff.mccabe]
max-complexity = 28
[tool.ruff.pylint]
allow-magic-value-types = ["float", "int", "str"]
max-args = 38 # Default is 5
max-branches = 32 # Default is 12
max-returns = 9 # Default is 6
max-statements = 105 # Default is 50
[tool.coverage.run]
omit = [
"haystack/testing/*",
]