2021-02-02 17:32:17 +01:00
|
|
|
from pathlib import Path
|
|
|
|
|
2021-11-11 11:02:22 +01:00
|
|
|
import os
|
2022-01-28 17:32:56 +01:00
|
|
|
import json
|
2022-01-03 11:38:02 +01:00
|
|
|
from unittest.mock import Mock
|
2020-11-20 17:41:08 +01:00
|
|
|
import pytest
|
2022-01-28 17:32:56 +01:00
|
|
|
import responses
|
2020-11-20 17:41:08 +01:00
|
|
|
|
2022-02-21 12:22:37 +01:00
|
|
|
from haystack import __version__
|
2022-02-22 15:01:07 +01:00
|
|
|
from haystack.document_stores.base import BaseDocumentStore
|
2022-02-21 12:22:37 +01:00
|
|
|
from haystack.document_stores.deepsetcloud import DeepsetCloudDocumentStore
|
Adding yaml functionality to standard pipelines (save/load...) (#1735)
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* Update API Reference Pages for v1.0 (#1729)
* Create new API pages and update existing ones
* Create query classifier page
* Remove Objects suffix
* Change answer aggregation key to doc_id, query instead of label_id, query (#1726)
* Add debugging example to tutorial (#1731)
* Add debugging example to tutorial
* Add latest docstring and tutorial changes
* Remove Objects suffix
* Add latest docstring and tutorial changes
* Revert "Remove Objects suffix"
This reverts commit 6681cb06510b080775994effe6a50bae42254be4.
* Revert unintentional commit
* Add third debugging option
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix another self.device/s typo (#1734)
* Fix yet another self.device(s) typo
* Add typing to 'initialize_device_settings' to try prevent future issues
* Fix bug in Tutorial5
* Fix the same bug in the notebook
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* Fix a few details of some tutorials (#1733)
* Make Tutorial10 use print instead of logs and fix a typo in Tutoria15
* Add a type check in 'print_answers'
* Add same checks to print_documents and print_questions
* Make RAGenerator return Answers instead of dictionaries
* Fix RAGenerator tests
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix `print_answers` (#1743)
* Fix a specific path of print_answers that was assuming answers are dictionaries
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split pipeline tests into three suites (#1755)
* Split pipeline tests into three suites
* Will this trigger the CI?
* Rename duplicate test into test_most_similar_documents_pipeline
* Fixing a bug that was probably never noticed
* Capitalize starting letter in params (#1750)
* Capitalize starting letter in params
Capitalized the starting letter in code examples for params in keeping with the latest names for nodes where first letter is capitalized.
Refer: https://github.com/deepset-ai/haystack/issues/1748
* Update standard_pipelines.py
Capitalized some starting letters in the docstrings in keeping with the updated node names for standard pipelines
* Multi query eval (#1746)
* add eval() to pipeline
* Add latest docstring and tutorial changes
* support multiple queries in eval()
* Add latest docstring and tutorial changes
* keep single query test
* fix EvaluationResult node_results default
* adjust docstrings
* Add latest docstring and tutorial changes
* minor improvements from comments
* Add latest docstring and tutorial changes
* move EvaluationResult and calculate_metrics to schema
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split summarizer tests in order to make windows CI work again (#1757)
* separate testfile for summarizer with translation
* Add latest docstring and tutorial changes
* import SPLIT_DOCS from test_summarizer
* add workflow_dispatch to windows_ci
* add worflow_dispatch to linux_ci
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* fix import of EvaluationResult in test case
* exclude test_summarizer_translation.py for windows_ci (#1759)
* Pipelines now tolerate custom _debug content (#1756)
* Pipelines now tolerate custom _debug content
* Support Tables in all DocumentStores (#1744)
* Add support for tables in SQLDocumentStore, FAISSDocumentStore and MilvuDocumentStore
* Add support for WeaviateDocumentStore
* Make sure that embedded meta fields are strings + add embedding_dim to WeaviateDocStore in test config
* Add latest docstring and tutorial changes
* Represent tables in WeaviateDocumentStore as nested lists
* Fix mypy
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Allow TableReader models without aggregation classifier (#1772)
* Fix usage of filters in `/query` endpoint in REST API (#1774)
* WIP filter refactoring
* fix filter formatting
* remove inplace modification of filters
* Public demo (#1747)
* Queries now run only when pressing RUN. File upload hidden. Question is not sent if the textbox is empty.
* Add latest docstring and tutorial changes
* Tidy up: remove needless state, add comments, fix minor bugs
* Had to add results to the status to avoid some bugs in eval mode
* Added 'credits'
* Add footers, update requirements, some random questions for the evaluation
* Add requested changes
* Temporary rollback the UI to the old GoT dataset
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Facilitate concurrent query / indexing in Elasticsearch with dense retrievers (new `skip_missing_embeddings` param) (#1762)
* Filtering records not having embeddings
* Added support for skip_missing_embeddings Flag. Default behavior is throw error when embeddings are missing. If skip_missing_embeddings=True then documents without embeddings are ignored for vector similarity
* Fix for below error:
haystack/document_stores/elasticsearch.py:852: error: Need type annotation for "script_score_query"
* docstring for skip_missing_embeddings parameter
* Raise exception where no documents with embeddings is found for Embedding retriever.
* Default skip_missing_embeddings to True
* Explicitly check if embeddings are present if no results are returned by EmbeddingRetriever for Elasticsearch
* Added test case for based on Julian's input
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Simplify code by using get_embed_count
* Adjust docstring & error msg slightly
* Revert error msg
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
* Huggingface private model support via API tokens (FARMReader) (#1775)
* passed kwargs to model loading
* Pass Auth token explicitly
* add use_auth_token to get_language_model_class
* added use_auth_token parameter at FARMReader
* Add latest docstring and tutorial changes
* added docs for parameter `use_auth_token`
* Add latest docstring and tutorial changes
* adding docs link
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* private hugging face models for retrievers (#1785)
* private dpr
* Add latest docstring and tutorial changes
* added parameters to child functions
* Add latest docstring and tutorial changes
* added tableextractor
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* ignore empty filters parameter (#1783)
* ignore empty filters parameter
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* initialize doc store with doc and label index in tutorial 5 (#1730)
* initialize doc store with doc and label index
* change ipynb according to py for tutorial 5
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Small fixes to the public demo (#1781)
* Make strealit tolerant to haystack not knowing its version, and adding special error for docstore issues
* Add workaround for a Streamlit bug
* Make default filters value an empty dict
* Return more context for each answer in the rest api
* Make the hs_version call not-blocking by adding a very quick timeout
* Add disclaimer on low confidence answer
* Use the no-answer feature of the reader to highlight questions with no good answer
* Upgrade torch to v1.10.0 (#1789)
* Upgrade torch to v1.10.0
* Adapt torch version for torch-scatter in TableQA tutorial
* Add latest docstring and tutorial changes
* Make torch version more flexible
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* fix code rendering for example
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Branden Chan <33759007+brandenchan@users.noreply.github.com>
Co-authored-by: Julian Risch <julian.risch@deepset.ai>
Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
Co-authored-by: nishanthcgit <5066268+nishanthcgit@users.noreply.github.com>
Co-authored-by: tstadel <60758086+tstadel@users.noreply.github.com>
Co-authored-by: bogdankostic <bogdankostic@web.de>
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
Co-authored-by: C V Goudar <cvgoudar@users.noreply.github.com>
Co-authored-by: Kristof Herrmann <37148029+ArzelaAscoIi@users.noreply.github.com>
2021-11-23 17:01:39 +01:00
|
|
|
from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
|
2022-02-22 20:33:21 +01:00
|
|
|
from haystack.nodes.base import BaseComponent
|
2022-02-22 15:01:07 +01:00
|
|
|
from haystack.nodes.retriever.base import BaseRetriever
|
2022-01-28 17:32:56 +01:00
|
|
|
from haystack.nodes.retriever.sparse import ElasticsearchRetriever
|
2022-01-26 18:12:55 +01:00
|
|
|
from haystack.pipelines import (
|
Adding yaml functionality to standard pipelines (save/load...) (#1735)
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* Update API Reference Pages for v1.0 (#1729)
* Create new API pages and update existing ones
* Create query classifier page
* Remove Objects suffix
* Change answer aggregation key to doc_id, query instead of label_id, query (#1726)
* Add debugging example to tutorial (#1731)
* Add debugging example to tutorial
* Add latest docstring and tutorial changes
* Remove Objects suffix
* Add latest docstring and tutorial changes
* Revert "Remove Objects suffix"
This reverts commit 6681cb06510b080775994effe6a50bae42254be4.
* Revert unintentional commit
* Add third debugging option
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix another self.device/s typo (#1734)
* Fix yet another self.device(s) typo
* Add typing to 'initialize_device_settings' to try prevent future issues
* Fix bug in Tutorial5
* Fix the same bug in the notebook
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* Fix a few details of some tutorials (#1733)
* Make Tutorial10 use print instead of logs and fix a typo in Tutoria15
* Add a type check in 'print_answers'
* Add same checks to print_documents and print_questions
* Make RAGenerator return Answers instead of dictionaries
* Fix RAGenerator tests
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix `print_answers` (#1743)
* Fix a specific path of print_answers that was assuming answers are dictionaries
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split pipeline tests into three suites (#1755)
* Split pipeline tests into three suites
* Will this trigger the CI?
* Rename duplicate test into test_most_similar_documents_pipeline
* Fixing a bug that was probably never noticed
* Capitalize starting letter in params (#1750)
* Capitalize starting letter in params
Capitalized the starting letter in code examples for params in keeping with the latest names for nodes where first letter is capitalized.
Refer: https://github.com/deepset-ai/haystack/issues/1748
* Update standard_pipelines.py
Capitalized some starting letters in the docstrings in keeping with the updated node names for standard pipelines
* Multi query eval (#1746)
* add eval() to pipeline
* Add latest docstring and tutorial changes
* support multiple queries in eval()
* Add latest docstring and tutorial changes
* keep single query test
* fix EvaluationResult node_results default
* adjust docstrings
* Add latest docstring and tutorial changes
* minor improvements from comments
* Add latest docstring and tutorial changes
* move EvaluationResult and calculate_metrics to schema
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split summarizer tests in order to make windows CI work again (#1757)
* separate testfile for summarizer with translation
* Add latest docstring and tutorial changes
* import SPLIT_DOCS from test_summarizer
* add workflow_dispatch to windows_ci
* add worflow_dispatch to linux_ci
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* fix import of EvaluationResult in test case
* exclude test_summarizer_translation.py for windows_ci (#1759)
* Pipelines now tolerate custom _debug content (#1756)
* Pipelines now tolerate custom _debug content
* Support Tables in all DocumentStores (#1744)
* Add support for tables in SQLDocumentStore, FAISSDocumentStore and MilvuDocumentStore
* Add support for WeaviateDocumentStore
* Make sure that embedded meta fields are strings + add embedding_dim to WeaviateDocStore in test config
* Add latest docstring and tutorial changes
* Represent tables in WeaviateDocumentStore as nested lists
* Fix mypy
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Allow TableReader models without aggregation classifier (#1772)
* Fix usage of filters in `/query` endpoint in REST API (#1774)
* WIP filter refactoring
* fix filter formatting
* remove inplace modification of filters
* Public demo (#1747)
* Queries now run only when pressing RUN. File upload hidden. Question is not sent if the textbox is empty.
* Add latest docstring and tutorial changes
* Tidy up: remove needless state, add comments, fix minor bugs
* Had to add results to the status to avoid some bugs in eval mode
* Added 'credits'
* Add footers, update requirements, some random questions for the evaluation
* Add requested changes
* Temporary rollback the UI to the old GoT dataset
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Facilitate concurrent query / indexing in Elasticsearch with dense retrievers (new `skip_missing_embeddings` param) (#1762)
* Filtering records not having embeddings
* Added support for skip_missing_embeddings Flag. Default behavior is throw error when embeddings are missing. If skip_missing_embeddings=True then documents without embeddings are ignored for vector similarity
* Fix for below error:
haystack/document_stores/elasticsearch.py:852: error: Need type annotation for "script_score_query"
* docstring for skip_missing_embeddings parameter
* Raise exception where no documents with embeddings is found for Embedding retriever.
* Default skip_missing_embeddings to True
* Explicitly check if embeddings are present if no results are returned by EmbeddingRetriever for Elasticsearch
* Added test case for based on Julian's input
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Simplify code by using get_embed_count
* Adjust docstring & error msg slightly
* Revert error msg
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
* Huggingface private model support via API tokens (FARMReader) (#1775)
* passed kwargs to model loading
* Pass Auth token explicitly
* add use_auth_token to get_language_model_class
* added use_auth_token parameter at FARMReader
* Add latest docstring and tutorial changes
* added docs for parameter `use_auth_token`
* Add latest docstring and tutorial changes
* adding docs link
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* private hugging face models for retrievers (#1785)
* private dpr
* Add latest docstring and tutorial changes
* added parameters to child functions
* Add latest docstring and tutorial changes
* added tableextractor
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* ignore empty filters parameter (#1783)
* ignore empty filters parameter
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* initialize doc store with doc and label index in tutorial 5 (#1730)
* initialize doc store with doc and label index
* change ipynb according to py for tutorial 5
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Small fixes to the public demo (#1781)
* Make strealit tolerant to haystack not knowing its version, and adding special error for docstore issues
* Add workaround for a Streamlit bug
* Make default filters value an empty dict
* Return more context for each answer in the rest api
* Make the hs_version call not-blocking by adding a very quick timeout
* Add disclaimer on low confidence answer
* Use the no-answer feature of the reader to highlight questions with no good answer
* Upgrade torch to v1.10.0 (#1789)
* Upgrade torch to v1.10.0
* Adapt torch version for torch-scatter in TableQA tutorial
* Add latest docstring and tutorial changes
* Make torch version more flexible
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* fix code rendering for example
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Branden Chan <33759007+brandenchan@users.noreply.github.com>
Co-authored-by: Julian Risch <julian.risch@deepset.ai>
Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
Co-authored-by: nishanthcgit <5066268+nishanthcgit@users.noreply.github.com>
Co-authored-by: tstadel <60758086+tstadel@users.noreply.github.com>
Co-authored-by: bogdankostic <bogdankostic@web.de>
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
Co-authored-by: C V Goudar <cvgoudar@users.noreply.github.com>
Co-authored-by: Kristof Herrmann <37148029+ArzelaAscoIi@users.noreply.github.com>
2021-11-23 17:01:39 +01:00
|
|
|
Pipeline,
|
|
|
|
DocumentSearchPipeline,
|
|
|
|
RootNode,
|
|
|
|
)
|
|
|
|
from haystack.pipelines import ExtractiveQAPipeline
|
2022-01-26 18:12:55 +01:00
|
|
|
from haystack.nodes import DensePassageRetriever, EmbeddingRetriever
|
|
|
|
|
2022-01-28 17:32:56 +01:00
|
|
|
from conftest import MOCK_DC, DC_API_ENDPOINT, DC_API_KEY, DC_TEST_INDEX, SAMPLES_PATH, deepset_cloud_fixture
|
2022-01-26 18:12:55 +01:00
|
|
|
|
2020-11-20 17:41:08 +01:00
|
|
|
|
2021-09-27 10:52:07 +02:00
|
|
|
@pytest.mark.elasticsearch
|
2021-08-02 14:51:24 +02:00
|
|
|
@pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)
|
|
|
|
def test_load_and_save_yaml(document_store, tmp_path):
|
2021-02-16 16:24:28 +01:00
|
|
|
# test correct load of indexing pipeline from yaml
|
2021-06-08 15:20:13 +02:00
|
|
|
pipeline = Pipeline.load_from_yaml(
|
2022-02-03 13:43:18 +01:00
|
|
|
SAMPLES_PATH / "pipeline" / "test_pipeline.yaml", pipeline_name="indexing_pipeline"
|
2021-06-08 15:20:13 +02:00
|
|
|
)
|
2022-02-03 13:43:18 +01:00
|
|
|
pipeline.run(file_paths=SAMPLES_PATH / "pdf" / "sample_pdf_1.pdf")
|
2021-02-16 16:24:28 +01:00
|
|
|
# test correct load of query pipeline from yaml
|
2022-02-03 13:43:18 +01:00
|
|
|
pipeline = Pipeline.load_from_yaml(SAMPLES_PATH / "pipeline" / "test_pipeline.yaml", pipeline_name="query_pipeline")
|
2021-06-08 15:20:13 +02:00
|
|
|
prediction = pipeline.run(
|
2021-10-19 15:22:44 +02:00
|
|
|
query="Who made the PDF specification?", params={"ESRetriever": {"top_k": 10}, "Reader": {"top_k": 3}}
|
2021-06-08 15:20:13 +02:00
|
|
|
)
|
2021-02-16 16:24:28 +01:00
|
|
|
assert prediction["query"] == "Who made the PDF specification?"
|
2021-10-13 14:23:23 +02:00
|
|
|
assert prediction["answers"][0].answer == "Adobe Systems"
|
2021-10-07 22:13:25 +02:00
|
|
|
assert "_debug" not in prediction.keys()
|
2021-02-02 17:32:17 +01:00
|
|
|
|
|
|
|
# test invalid pipeline name
|
|
|
|
with pytest.raises(Exception):
|
2022-02-03 13:43:18 +01:00
|
|
|
Pipeline.load_from_yaml(path=SAMPLES_PATH / "pipeline" / "test_pipeline.yaml", pipeline_name="invalid")
|
2021-04-30 12:23:29 +02:00
|
|
|
# test config export
|
|
|
|
pipeline.save_to_yaml(tmp_path / "test.yaml")
|
2021-06-08 15:20:13 +02:00
|
|
|
with open(tmp_path / "test.yaml", "r", encoding="utf-8") as stream:
|
2021-04-30 12:23:29 +02:00
|
|
|
saved_yaml = stream.read()
|
2022-02-21 12:22:37 +01:00
|
|
|
expected_yaml = f"""
|
2021-04-30 12:23:29 +02:00
|
|
|
components:
|
|
|
|
- name: ESRetriever
|
|
|
|
params:
|
|
|
|
document_store: ElasticsearchDocumentStore
|
|
|
|
type: ElasticsearchRetriever
|
|
|
|
- name: ElasticsearchDocumentStore
|
|
|
|
params:
|
2021-08-02 14:51:24 +02:00
|
|
|
index: haystack_test
|
2021-04-30 12:23:29 +02:00
|
|
|
label_index: haystack_test_label
|
|
|
|
type: ElasticsearchDocumentStore
|
|
|
|
- name: Reader
|
|
|
|
params:
|
|
|
|
model_name_or_path: deepset/roberta-base-squad2
|
|
|
|
no_ans_boost: -10
|
2021-12-22 17:20:23 +01:00
|
|
|
num_processes: 0
|
2021-04-30 12:23:29 +02:00
|
|
|
type: FARMReader
|
|
|
|
pipelines:
|
|
|
|
- name: query
|
|
|
|
nodes:
|
|
|
|
- inputs:
|
|
|
|
- Query
|
|
|
|
name: ESRetriever
|
|
|
|
- inputs:
|
|
|
|
- ESRetriever
|
|
|
|
name: Reader
|
2021-08-02 14:51:24 +02:00
|
|
|
type: Pipeline
|
2022-02-21 12:22:37 +01:00
|
|
|
version: {__version__}
|
2021-06-08 15:20:13 +02:00
|
|
|
"""
|
2022-02-03 13:43:18 +01:00
|
|
|
assert saved_yaml.replace(" ", "").replace("\n", "") == expected_yaml.replace(" ", "").replace("\n", "")
|
|
|
|
|
2021-04-30 12:23:29 +02:00
|
|
|
|
Adding yaml functionality to standard pipelines (save/load...) (#1735)
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* Update API Reference Pages for v1.0 (#1729)
* Create new API pages and update existing ones
* Create query classifier page
* Remove Objects suffix
* Change answer aggregation key to doc_id, query instead of label_id, query (#1726)
* Add debugging example to tutorial (#1731)
* Add debugging example to tutorial
* Add latest docstring and tutorial changes
* Remove Objects suffix
* Add latest docstring and tutorial changes
* Revert "Remove Objects suffix"
This reverts commit 6681cb06510b080775994effe6a50bae42254be4.
* Revert unintentional commit
* Add third debugging option
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix another self.device/s typo (#1734)
* Fix yet another self.device(s) typo
* Add typing to 'initialize_device_settings' to try prevent future issues
* Fix bug in Tutorial5
* Fix the same bug in the notebook
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* Fix a few details of some tutorials (#1733)
* Make Tutorial10 use print instead of logs and fix a typo in Tutoria15
* Add a type check in 'print_answers'
* Add same checks to print_documents and print_questions
* Make RAGenerator return Answers instead of dictionaries
* Fix RAGenerator tests
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix `print_answers` (#1743)
* Fix a specific path of print_answers that was assuming answers are dictionaries
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split pipeline tests into three suites (#1755)
* Split pipeline tests into three suites
* Will this trigger the CI?
* Rename duplicate test into test_most_similar_documents_pipeline
* Fixing a bug that was probably never noticed
* Capitalize starting letter in params (#1750)
* Capitalize starting letter in params
Capitalized the starting letter in code examples for params in keeping with the latest names for nodes where first letter is capitalized.
Refer: https://github.com/deepset-ai/haystack/issues/1748
* Update standard_pipelines.py
Capitalized some starting letters in the docstrings in keeping with the updated node names for standard pipelines
* Multi query eval (#1746)
* add eval() to pipeline
* Add latest docstring and tutorial changes
* support multiple queries in eval()
* Add latest docstring and tutorial changes
* keep single query test
* fix EvaluationResult node_results default
* adjust docstrings
* Add latest docstring and tutorial changes
* minor improvements from comments
* Add latest docstring and tutorial changes
* move EvaluationResult and calculate_metrics to schema
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split summarizer tests in order to make windows CI work again (#1757)
* separate testfile for summarizer with translation
* Add latest docstring and tutorial changes
* import SPLIT_DOCS from test_summarizer
* add workflow_dispatch to windows_ci
* add worflow_dispatch to linux_ci
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* fix import of EvaluationResult in test case
* exclude test_summarizer_translation.py for windows_ci (#1759)
* Pipelines now tolerate custom _debug content (#1756)
* Pipelines now tolerate custom _debug content
* Support Tables in all DocumentStores (#1744)
* Add support for tables in SQLDocumentStore, FAISSDocumentStore and MilvuDocumentStore
* Add support for WeaviateDocumentStore
* Make sure that embedded meta fields are strings + add embedding_dim to WeaviateDocStore in test config
* Add latest docstring and tutorial changes
* Represent tables in WeaviateDocumentStore as nested lists
* Fix mypy
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Allow TableReader models without aggregation classifier (#1772)
* Fix usage of filters in `/query` endpoint in REST API (#1774)
* WIP filter refactoring
* fix filter formatting
* remove inplace modification of filters
* Public demo (#1747)
* Queries now run only when pressing RUN. File upload hidden. Question is not sent if the textbox is empty.
* Add latest docstring and tutorial changes
* Tidy up: remove needless state, add comments, fix minor bugs
* Had to add results to the status to avoid some bugs in eval mode
* Added 'credits'
* Add footers, update requirements, some random questions for the evaluation
* Add requested changes
* Temporary rollback the UI to the old GoT dataset
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Facilitate concurrent query / indexing in Elasticsearch with dense retrievers (new `skip_missing_embeddings` param) (#1762)
* Filtering records not having embeddings
* Added support for skip_missing_embeddings Flag. Default behavior is throw error when embeddings are missing. If skip_missing_embeddings=True then documents without embeddings are ignored for vector similarity
* Fix for below error:
haystack/document_stores/elasticsearch.py:852: error: Need type annotation for "script_score_query"
* docstring for skip_missing_embeddings parameter
* Raise exception where no documents with embeddings is found for Embedding retriever.
* Default skip_missing_embeddings to True
* Explicitly check if embeddings are present if no results are returned by EmbeddingRetriever for Elasticsearch
* Added test case for based on Julian's input
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Simplify code by using get_embed_count
* Adjust docstring & error msg slightly
* Revert error msg
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
* Huggingface private model support via API tokens (FARMReader) (#1775)
* passed kwargs to model loading
* Pass Auth token explicitly
* add use_auth_token to get_language_model_class
* added use_auth_token parameter at FARMReader
* Add latest docstring and tutorial changes
* added docs for parameter `use_auth_token`
* Add latest docstring and tutorial changes
* adding docs link
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* private hugging face models for retrievers (#1785)
* private dpr
* Add latest docstring and tutorial changes
* added parameters to child functions
* Add latest docstring and tutorial changes
* added tableextractor
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* ignore empty filters parameter (#1783)
* ignore empty filters parameter
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* initialize doc store with doc and label index in tutorial 5 (#1730)
* initialize doc store with doc and label index
* change ipynb according to py for tutorial 5
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Small fixes to the public demo (#1781)
* Make strealit tolerant to haystack not knowing its version, and adding special error for docstore issues
* Add workaround for a Streamlit bug
* Make default filters value an empty dict
* Return more context for each answer in the rest api
* Make the hs_version call not-blocking by adding a very quick timeout
* Add disclaimer on low confidence answer
* Use the no-answer feature of the reader to highlight questions with no good answer
* Upgrade torch to v1.10.0 (#1789)
* Upgrade torch to v1.10.0
* Adapt torch version for torch-scatter in TableQA tutorial
* Add latest docstring and tutorial changes
* Make torch version more flexible
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* fix code rendering for example
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Branden Chan <33759007+brandenchan@users.noreply.github.com>
Co-authored-by: Julian Risch <julian.risch@deepset.ai>
Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
Co-authored-by: nishanthcgit <5066268+nishanthcgit@users.noreply.github.com>
Co-authored-by: tstadel <60758086+tstadel@users.noreply.github.com>
Co-authored-by: bogdankostic <bogdankostic@web.de>
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
Co-authored-by: C V Goudar <cvgoudar@users.noreply.github.com>
Co-authored-by: Kristof Herrmann <37148029+ArzelaAscoIi@users.noreply.github.com>
2021-11-23 17:01:39 +01:00
|
|
|
@pytest.mark.elasticsearch
|
|
|
|
@pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)
|
|
|
|
def test_load_and_save_yaml_prebuilt_pipelines(document_store, tmp_path):
|
|
|
|
# populating index
|
|
|
|
pipeline = Pipeline.load_from_yaml(
|
2022-02-03 13:43:18 +01:00
|
|
|
SAMPLES_PATH / "pipeline" / "test_pipeline.yaml", pipeline_name="indexing_pipeline"
|
Adding yaml functionality to standard pipelines (save/load...) (#1735)
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* Update API Reference Pages for v1.0 (#1729)
* Create new API pages and update existing ones
* Create query classifier page
* Remove Objects suffix
* Change answer aggregation key to doc_id, query instead of label_id, query (#1726)
* Add debugging example to tutorial (#1731)
* Add debugging example to tutorial
* Add latest docstring and tutorial changes
* Remove Objects suffix
* Add latest docstring and tutorial changes
* Revert "Remove Objects suffix"
This reverts commit 6681cb06510b080775994effe6a50bae42254be4.
* Revert unintentional commit
* Add third debugging option
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix another self.device/s typo (#1734)
* Fix yet another self.device(s) typo
* Add typing to 'initialize_device_settings' to try prevent future issues
* Fix bug in Tutorial5
* Fix the same bug in the notebook
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* Fix a few details of some tutorials (#1733)
* Make Tutorial10 use print instead of logs and fix a typo in Tutoria15
* Add a type check in 'print_answers'
* Add same checks to print_documents and print_questions
* Make RAGenerator return Answers instead of dictionaries
* Fix RAGenerator tests
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix `print_answers` (#1743)
* Fix a specific path of print_answers that was assuming answers are dictionaries
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split pipeline tests into three suites (#1755)
* Split pipeline tests into three suites
* Will this trigger the CI?
* Rename duplicate test into test_most_similar_documents_pipeline
* Fixing a bug that was probably never noticed
* Capitalize starting letter in params (#1750)
* Capitalize starting letter in params
Capitalized the starting letter in code examples for params in keeping with the latest names for nodes where first letter is capitalized.
Refer: https://github.com/deepset-ai/haystack/issues/1748
* Update standard_pipelines.py
Capitalized some starting letters in the docstrings in keeping with the updated node names for standard pipelines
* Multi query eval (#1746)
* add eval() to pipeline
* Add latest docstring and tutorial changes
* support multiple queries in eval()
* Add latest docstring and tutorial changes
* keep single query test
* fix EvaluationResult node_results default
* adjust docstrings
* Add latest docstring and tutorial changes
* minor improvements from comments
* Add latest docstring and tutorial changes
* move EvaluationResult and calculate_metrics to schema
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split summarizer tests in order to make windows CI work again (#1757)
* separate testfile for summarizer with translation
* Add latest docstring and tutorial changes
* import SPLIT_DOCS from test_summarizer
* add workflow_dispatch to windows_ci
* add worflow_dispatch to linux_ci
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* fix import of EvaluationResult in test case
* exclude test_summarizer_translation.py for windows_ci (#1759)
* Pipelines now tolerate custom _debug content (#1756)
* Pipelines now tolerate custom _debug content
* Support Tables in all DocumentStores (#1744)
* Add support for tables in SQLDocumentStore, FAISSDocumentStore and MilvuDocumentStore
* Add support for WeaviateDocumentStore
* Make sure that embedded meta fields are strings + add embedding_dim to WeaviateDocStore in test config
* Add latest docstring and tutorial changes
* Represent tables in WeaviateDocumentStore as nested lists
* Fix mypy
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Allow TableReader models without aggregation classifier (#1772)
* Fix usage of filters in `/query` endpoint in REST API (#1774)
* WIP filter refactoring
* fix filter formatting
* remove inplace modification of filters
* Public demo (#1747)
* Queries now run only when pressing RUN. File upload hidden. Question is not sent if the textbox is empty.
* Add latest docstring and tutorial changes
* Tidy up: remove needless state, add comments, fix minor bugs
* Had to add results to the status to avoid some bugs in eval mode
* Added 'credits'
* Add footers, update requirements, some random questions for the evaluation
* Add requested changes
* Temporary rollback the UI to the old GoT dataset
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Facilitate concurrent query / indexing in Elasticsearch with dense retrievers (new `skip_missing_embeddings` param) (#1762)
* Filtering records not having embeddings
* Added support for skip_missing_embeddings Flag. Default behavior is throw error when embeddings are missing. If skip_missing_embeddings=True then documents without embeddings are ignored for vector similarity
* Fix for below error:
haystack/document_stores/elasticsearch.py:852: error: Need type annotation for "script_score_query"
* docstring for skip_missing_embeddings parameter
* Raise exception where no documents with embeddings is found for Embedding retriever.
* Default skip_missing_embeddings to True
* Explicitly check if embeddings are present if no results are returned by EmbeddingRetriever for Elasticsearch
* Added test case for based on Julian's input
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Simplify code by using get_embed_count
* Adjust docstring & error msg slightly
* Revert error msg
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
* Huggingface private model support via API tokens (FARMReader) (#1775)
* passed kwargs to model loading
* Pass Auth token explicitly
* add use_auth_token to get_language_model_class
* added use_auth_token parameter at FARMReader
* Add latest docstring and tutorial changes
* added docs for parameter `use_auth_token`
* Add latest docstring and tutorial changes
* adding docs link
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* private hugging face models for retrievers (#1785)
* private dpr
* Add latest docstring and tutorial changes
* added parameters to child functions
* Add latest docstring and tutorial changes
* added tableextractor
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* ignore empty filters parameter (#1783)
* ignore empty filters parameter
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* initialize doc store with doc and label index in tutorial 5 (#1730)
* initialize doc store with doc and label index
* change ipynb according to py for tutorial 5
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Small fixes to the public demo (#1781)
* Make strealit tolerant to haystack not knowing its version, and adding special error for docstore issues
* Add workaround for a Streamlit bug
* Make default filters value an empty dict
* Return more context for each answer in the rest api
* Make the hs_version call not-blocking by adding a very quick timeout
* Add disclaimer on low confidence answer
* Use the no-answer feature of the reader to highlight questions with no good answer
* Upgrade torch to v1.10.0 (#1789)
* Upgrade torch to v1.10.0
* Adapt torch version for torch-scatter in TableQA tutorial
* Add latest docstring and tutorial changes
* Make torch version more flexible
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* fix code rendering for example
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Branden Chan <33759007+brandenchan@users.noreply.github.com>
Co-authored-by: Julian Risch <julian.risch@deepset.ai>
Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
Co-authored-by: nishanthcgit <5066268+nishanthcgit@users.noreply.github.com>
Co-authored-by: tstadel <60758086+tstadel@users.noreply.github.com>
Co-authored-by: bogdankostic <bogdankostic@web.de>
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
Co-authored-by: C V Goudar <cvgoudar@users.noreply.github.com>
Co-authored-by: Kristof Herrmann <37148029+ArzelaAscoIi@users.noreply.github.com>
2021-11-23 17:01:39 +01:00
|
|
|
)
|
2022-02-03 13:43:18 +01:00
|
|
|
pipeline.run(file_paths=SAMPLES_PATH / "pdf" / "sample_pdf_1.pdf")
|
Adding yaml functionality to standard pipelines (save/load...) (#1735)
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* Update API Reference Pages for v1.0 (#1729)
* Create new API pages and update existing ones
* Create query classifier page
* Remove Objects suffix
* Change answer aggregation key to doc_id, query instead of label_id, query (#1726)
* Add debugging example to tutorial (#1731)
* Add debugging example to tutorial
* Add latest docstring and tutorial changes
* Remove Objects suffix
* Add latest docstring and tutorial changes
* Revert "Remove Objects suffix"
This reverts commit 6681cb06510b080775994effe6a50bae42254be4.
* Revert unintentional commit
* Add third debugging option
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix another self.device/s typo (#1734)
* Fix yet another self.device(s) typo
* Add typing to 'initialize_device_settings' to try prevent future issues
* Fix bug in Tutorial5
* Fix the same bug in the notebook
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* Fix a few details of some tutorials (#1733)
* Make Tutorial10 use print instead of logs and fix a typo in Tutoria15
* Add a type check in 'print_answers'
* Add same checks to print_documents and print_questions
* Make RAGenerator return Answers instead of dictionaries
* Fix RAGenerator tests
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix `print_answers` (#1743)
* Fix a specific path of print_answers that was assuming answers are dictionaries
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split pipeline tests into three suites (#1755)
* Split pipeline tests into three suites
* Will this trigger the CI?
* Rename duplicate test into test_most_similar_documents_pipeline
* Fixing a bug that was probably never noticed
* Capitalize starting letter in params (#1750)
* Capitalize starting letter in params
Capitalized the starting letter in code examples for params in keeping with the latest names for nodes where first letter is capitalized.
Refer: https://github.com/deepset-ai/haystack/issues/1748
* Update standard_pipelines.py
Capitalized some starting letters in the docstrings in keeping with the updated node names for standard pipelines
* Multi query eval (#1746)
* add eval() to pipeline
* Add latest docstring and tutorial changes
* support multiple queries in eval()
* Add latest docstring and tutorial changes
* keep single query test
* fix EvaluationResult node_results default
* adjust docstrings
* Add latest docstring and tutorial changes
* minor improvements from comments
* Add latest docstring and tutorial changes
* move EvaluationResult and calculate_metrics to schema
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split summarizer tests in order to make windows CI work again (#1757)
* separate testfile for summarizer with translation
* Add latest docstring and tutorial changes
* import SPLIT_DOCS from test_summarizer
* add workflow_dispatch to windows_ci
* add worflow_dispatch to linux_ci
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* fix import of EvaluationResult in test case
* exclude test_summarizer_translation.py for windows_ci (#1759)
* Pipelines now tolerate custom _debug content (#1756)
* Pipelines now tolerate custom _debug content
* Support Tables in all DocumentStores (#1744)
* Add support for tables in SQLDocumentStore, FAISSDocumentStore and MilvuDocumentStore
* Add support for WeaviateDocumentStore
* Make sure that embedded meta fields are strings + add embedding_dim to WeaviateDocStore in test config
* Add latest docstring and tutorial changes
* Represent tables in WeaviateDocumentStore as nested lists
* Fix mypy
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Allow TableReader models without aggregation classifier (#1772)
* Fix usage of filters in `/query` endpoint in REST API (#1774)
* WIP filter refactoring
* fix filter formatting
* remove inplace modification of filters
* Public demo (#1747)
* Queries now run only when pressing RUN. File upload hidden. Question is not sent if the textbox is empty.
* Add latest docstring and tutorial changes
* Tidy up: remove needless state, add comments, fix minor bugs
* Had to add results to the status to avoid some bugs in eval mode
* Added 'credits'
* Add footers, update requirements, some random questions for the evaluation
* Add requested changes
* Temporary rollback the UI to the old GoT dataset
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Facilitate concurrent query / indexing in Elasticsearch with dense retrievers (new `skip_missing_embeddings` param) (#1762)
* Filtering records not having embeddings
* Added support for skip_missing_embeddings Flag. Default behavior is throw error when embeddings are missing. If skip_missing_embeddings=True then documents without embeddings are ignored for vector similarity
* Fix for below error:
haystack/document_stores/elasticsearch.py:852: error: Need type annotation for "script_score_query"
* docstring for skip_missing_embeddings parameter
* Raise exception where no documents with embeddings is found for Embedding retriever.
* Default skip_missing_embeddings to True
* Explicitly check if embeddings are present if no results are returned by EmbeddingRetriever for Elasticsearch
* Added test case for based on Julian's input
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Simplify code by using get_embed_count
* Adjust docstring & error msg slightly
* Revert error msg
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
* Huggingface private model support via API tokens (FARMReader) (#1775)
* passed kwargs to model loading
* Pass Auth token explicitly
* add use_auth_token to get_language_model_class
* added use_auth_token parameter at FARMReader
* Add latest docstring and tutorial changes
* added docs for parameter `use_auth_token`
* Add latest docstring and tutorial changes
* adding docs link
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* private hugging face models for retrievers (#1785)
* private dpr
* Add latest docstring and tutorial changes
* added parameters to child functions
* Add latest docstring and tutorial changes
* added tableextractor
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* ignore empty filters parameter (#1783)
* ignore empty filters parameter
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* initialize doc store with doc and label index in tutorial 5 (#1730)
* initialize doc store with doc and label index
* change ipynb according to py for tutorial 5
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Small fixes to the public demo (#1781)
* Make strealit tolerant to haystack not knowing its version, and adding special error for docstore issues
* Add workaround for a Streamlit bug
* Make default filters value an empty dict
* Return more context for each answer in the rest api
* Make the hs_version call not-blocking by adding a very quick timeout
* Add disclaimer on low confidence answer
* Use the no-answer feature of the reader to highlight questions with no good answer
* Upgrade torch to v1.10.0 (#1789)
* Upgrade torch to v1.10.0
* Adapt torch version for torch-scatter in TableQA tutorial
* Add latest docstring and tutorial changes
* Make torch version more flexible
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* fix code rendering for example
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Branden Chan <33759007+brandenchan@users.noreply.github.com>
Co-authored-by: Julian Risch <julian.risch@deepset.ai>
Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
Co-authored-by: nishanthcgit <5066268+nishanthcgit@users.noreply.github.com>
Co-authored-by: tstadel <60758086+tstadel@users.noreply.github.com>
Co-authored-by: bogdankostic <bogdankostic@web.de>
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
Co-authored-by: C V Goudar <cvgoudar@users.noreply.github.com>
Co-authored-by: Kristof Herrmann <37148029+ArzelaAscoIi@users.noreply.github.com>
2021-11-23 17:01:39 +01:00
|
|
|
# test correct load of query pipeline from yaml
|
|
|
|
pipeline = ExtractiveQAPipeline.load_from_yaml(
|
2022-02-03 13:43:18 +01:00
|
|
|
SAMPLES_PATH / "pipeline" / "test_pipeline.yaml", pipeline_name="query_pipeline"
|
Adding yaml functionality to standard pipelines (save/load...) (#1735)
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* Update API Reference Pages for v1.0 (#1729)
* Create new API pages and update existing ones
* Create query classifier page
* Remove Objects suffix
* Change answer aggregation key to doc_id, query instead of label_id, query (#1726)
* Add debugging example to tutorial (#1731)
* Add debugging example to tutorial
* Add latest docstring and tutorial changes
* Remove Objects suffix
* Add latest docstring and tutorial changes
* Revert "Remove Objects suffix"
This reverts commit 6681cb06510b080775994effe6a50bae42254be4.
* Revert unintentional commit
* Add third debugging option
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix another self.device/s typo (#1734)
* Fix yet another self.device(s) typo
* Add typing to 'initialize_device_settings' to try prevent future issues
* Fix bug in Tutorial5
* Fix the same bug in the notebook
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* Fix a few details of some tutorials (#1733)
* Make Tutorial10 use print instead of logs and fix a typo in Tutoria15
* Add a type check in 'print_answers'
* Add same checks to print_documents and print_questions
* Make RAGenerator return Answers instead of dictionaries
* Fix RAGenerator tests
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix `print_answers` (#1743)
* Fix a specific path of print_answers that was assuming answers are dictionaries
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split pipeline tests into three suites (#1755)
* Split pipeline tests into three suites
* Will this trigger the CI?
* Rename duplicate test into test_most_similar_documents_pipeline
* Fixing a bug that was probably never noticed
* Capitalize starting letter in params (#1750)
* Capitalize starting letter in params
Capitalized the starting letter in code examples for params in keeping with the latest names for nodes where first letter is capitalized.
Refer: https://github.com/deepset-ai/haystack/issues/1748
* Update standard_pipelines.py
Capitalized some starting letters in the docstrings in keeping with the updated node names for standard pipelines
* Multi query eval (#1746)
* add eval() to pipeline
* Add latest docstring and tutorial changes
* support multiple queries in eval()
* Add latest docstring and tutorial changes
* keep single query test
* fix EvaluationResult node_results default
* adjust docstrings
* Add latest docstring and tutorial changes
* minor improvements from comments
* Add latest docstring and tutorial changes
* move EvaluationResult and calculate_metrics to schema
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split summarizer tests in order to make windows CI work again (#1757)
* separate testfile for summarizer with translation
* Add latest docstring and tutorial changes
* import SPLIT_DOCS from test_summarizer
* add workflow_dispatch to windows_ci
* add worflow_dispatch to linux_ci
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* fix import of EvaluationResult in test case
* exclude test_summarizer_translation.py for windows_ci (#1759)
* Pipelines now tolerate custom _debug content (#1756)
* Pipelines now tolerate custom _debug content
* Support Tables in all DocumentStores (#1744)
* Add support for tables in SQLDocumentStore, FAISSDocumentStore and MilvuDocumentStore
* Add support for WeaviateDocumentStore
* Make sure that embedded meta fields are strings + add embedding_dim to WeaviateDocStore in test config
* Add latest docstring and tutorial changes
* Represent tables in WeaviateDocumentStore as nested lists
* Fix mypy
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Allow TableReader models without aggregation classifier (#1772)
* Fix usage of filters in `/query` endpoint in REST API (#1774)
* WIP filter refactoring
* fix filter formatting
* remove inplace modification of filters
* Public demo (#1747)
* Queries now run only when pressing RUN. File upload hidden. Question is not sent if the textbox is empty.
* Add latest docstring and tutorial changes
* Tidy up: remove needless state, add comments, fix minor bugs
* Had to add results to the status to avoid some bugs in eval mode
* Added 'credits'
* Add footers, update requirements, some random questions for the evaluation
* Add requested changes
* Temporary rollback the UI to the old GoT dataset
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Facilitate concurrent query / indexing in Elasticsearch with dense retrievers (new `skip_missing_embeddings` param) (#1762)
* Filtering records not having embeddings
* Added support for skip_missing_embeddings Flag. Default behavior is throw error when embeddings are missing. If skip_missing_embeddings=True then documents without embeddings are ignored for vector similarity
* Fix for below error:
haystack/document_stores/elasticsearch.py:852: error: Need type annotation for "script_score_query"
* docstring for skip_missing_embeddings parameter
* Raise exception where no documents with embeddings is found for Embedding retriever.
* Default skip_missing_embeddings to True
* Explicitly check if embeddings are present if no results are returned by EmbeddingRetriever for Elasticsearch
* Added test case for based on Julian's input
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Simplify code by using get_embed_count
* Adjust docstring & error msg slightly
* Revert error msg
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
* Huggingface private model support via API tokens (FARMReader) (#1775)
* passed kwargs to model loading
* Pass Auth token explicitly
* add use_auth_token to get_language_model_class
* added use_auth_token parameter at FARMReader
* Add latest docstring and tutorial changes
* added docs for parameter `use_auth_token`
* Add latest docstring and tutorial changes
* adding docs link
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* private hugging face models for retrievers (#1785)
* private dpr
* Add latest docstring and tutorial changes
* added parameters to child functions
* Add latest docstring and tutorial changes
* added tableextractor
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* ignore empty filters parameter (#1783)
* ignore empty filters parameter
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* initialize doc store with doc and label index in tutorial 5 (#1730)
* initialize doc store with doc and label index
* change ipynb according to py for tutorial 5
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Small fixes to the public demo (#1781)
* Make strealit tolerant to haystack not knowing its version, and adding special error for docstore issues
* Add workaround for a Streamlit bug
* Make default filters value an empty dict
* Return more context for each answer in the rest api
* Make the hs_version call not-blocking by adding a very quick timeout
* Add disclaimer on low confidence answer
* Use the no-answer feature of the reader to highlight questions with no good answer
* Upgrade torch to v1.10.0 (#1789)
* Upgrade torch to v1.10.0
* Adapt torch version for torch-scatter in TableQA tutorial
* Add latest docstring and tutorial changes
* Make torch version more flexible
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* fix code rendering for example
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Branden Chan <33759007+brandenchan@users.noreply.github.com>
Co-authored-by: Julian Risch <julian.risch@deepset.ai>
Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
Co-authored-by: nishanthcgit <5066268+nishanthcgit@users.noreply.github.com>
Co-authored-by: tstadel <60758086+tstadel@users.noreply.github.com>
Co-authored-by: bogdankostic <bogdankostic@web.de>
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
Co-authored-by: C V Goudar <cvgoudar@users.noreply.github.com>
Co-authored-by: Kristof Herrmann <37148029+ArzelaAscoIi@users.noreply.github.com>
2021-11-23 17:01:39 +01:00
|
|
|
)
|
|
|
|
prediction = pipeline.run(
|
|
|
|
query="Who made the PDF specification?", params={"ESRetriever": {"top_k": 10}, "Reader": {"top_k": 3}}
|
|
|
|
)
|
|
|
|
assert prediction["query"] == "Who made the PDF specification?"
|
|
|
|
assert prediction["answers"][0].answer == "Adobe Systems"
|
|
|
|
assert "_debug" not in prediction.keys()
|
|
|
|
|
|
|
|
# test invalid pipeline name
|
|
|
|
with pytest.raises(Exception):
|
|
|
|
ExtractiveQAPipeline.load_from_yaml(
|
2022-02-03 13:43:18 +01:00
|
|
|
path=SAMPLES_PATH / "pipeline" / "test_pipeline.yaml", pipeline_name="invalid"
|
Adding yaml functionality to standard pipelines (save/load...) (#1735)
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* Update API Reference Pages for v1.0 (#1729)
* Create new API pages and update existing ones
* Create query classifier page
* Remove Objects suffix
* Change answer aggregation key to doc_id, query instead of label_id, query (#1726)
* Add debugging example to tutorial (#1731)
* Add debugging example to tutorial
* Add latest docstring and tutorial changes
* Remove Objects suffix
* Add latest docstring and tutorial changes
* Revert "Remove Objects suffix"
This reverts commit 6681cb06510b080775994effe6a50bae42254be4.
* Revert unintentional commit
* Add third debugging option
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix another self.device/s typo (#1734)
* Fix yet another self.device(s) typo
* Add typing to 'initialize_device_settings' to try prevent future issues
* Fix bug in Tutorial5
* Fix the same bug in the notebook
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* Fix a few details of some tutorials (#1733)
* Make Tutorial10 use print instead of logs and fix a typo in Tutoria15
* Add a type check in 'print_answers'
* Add same checks to print_documents and print_questions
* Make RAGenerator return Answers instead of dictionaries
* Fix RAGenerator tests
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix `print_answers` (#1743)
* Fix a specific path of print_answers that was assuming answers are dictionaries
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split pipeline tests into three suites (#1755)
* Split pipeline tests into three suites
* Will this trigger the CI?
* Rename duplicate test into test_most_similar_documents_pipeline
* Fixing a bug that was probably never noticed
* Capitalize starting letter in params (#1750)
* Capitalize starting letter in params
Capitalized the starting letter in code examples for params in keeping with the latest names for nodes where first letter is capitalized.
Refer: https://github.com/deepset-ai/haystack/issues/1748
* Update standard_pipelines.py
Capitalized some starting letters in the docstrings in keeping with the updated node names for standard pipelines
* Multi query eval (#1746)
* add eval() to pipeline
* Add latest docstring and tutorial changes
* support multiple queries in eval()
* Add latest docstring and tutorial changes
* keep single query test
* fix EvaluationResult node_results default
* adjust docstrings
* Add latest docstring and tutorial changes
* minor improvements from comments
* Add latest docstring and tutorial changes
* move EvaluationResult and calculate_metrics to schema
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split summarizer tests in order to make windows CI work again (#1757)
* separate testfile for summarizer with translation
* Add latest docstring and tutorial changes
* import SPLIT_DOCS from test_summarizer
* add workflow_dispatch to windows_ci
* add worflow_dispatch to linux_ci
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* fix import of EvaluationResult in test case
* exclude test_summarizer_translation.py for windows_ci (#1759)
* Pipelines now tolerate custom _debug content (#1756)
* Pipelines now tolerate custom _debug content
* Support Tables in all DocumentStores (#1744)
* Add support for tables in SQLDocumentStore, FAISSDocumentStore and MilvuDocumentStore
* Add support for WeaviateDocumentStore
* Make sure that embedded meta fields are strings + add embedding_dim to WeaviateDocStore in test config
* Add latest docstring and tutorial changes
* Represent tables in WeaviateDocumentStore as nested lists
* Fix mypy
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Allow TableReader models without aggregation classifier (#1772)
* Fix usage of filters in `/query` endpoint in REST API (#1774)
* WIP filter refactoring
* fix filter formatting
* remove inplace modification of filters
* Public demo (#1747)
* Queries now run only when pressing RUN. File upload hidden. Question is not sent if the textbox is empty.
* Add latest docstring and tutorial changes
* Tidy up: remove needless state, add comments, fix minor bugs
* Had to add results to the status to avoid some bugs in eval mode
* Added 'credits'
* Add footers, update requirements, some random questions for the evaluation
* Add requested changes
* Temporary rollback the UI to the old GoT dataset
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Facilitate concurrent query / indexing in Elasticsearch with dense retrievers (new `skip_missing_embeddings` param) (#1762)
* Filtering records not having embeddings
* Added support for skip_missing_embeddings Flag. Default behavior is throw error when embeddings are missing. If skip_missing_embeddings=True then documents without embeddings are ignored for vector similarity
* Fix for below error:
haystack/document_stores/elasticsearch.py:852: error: Need type annotation for "script_score_query"
* docstring for skip_missing_embeddings parameter
* Raise exception where no documents with embeddings is found for Embedding retriever.
* Default skip_missing_embeddings to True
* Explicitly check if embeddings are present if no results are returned by EmbeddingRetriever for Elasticsearch
* Added test case for based on Julian's input
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Simplify code by using get_embed_count
* Adjust docstring & error msg slightly
* Revert error msg
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
* Huggingface private model support via API tokens (FARMReader) (#1775)
* passed kwargs to model loading
* Pass Auth token explicitly
* add use_auth_token to get_language_model_class
* added use_auth_token parameter at FARMReader
* Add latest docstring and tutorial changes
* added docs for parameter `use_auth_token`
* Add latest docstring and tutorial changes
* adding docs link
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* private hugging face models for retrievers (#1785)
* private dpr
* Add latest docstring and tutorial changes
* added parameters to child functions
* Add latest docstring and tutorial changes
* added tableextractor
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* ignore empty filters parameter (#1783)
* ignore empty filters parameter
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* initialize doc store with doc and label index in tutorial 5 (#1730)
* initialize doc store with doc and label index
* change ipynb according to py for tutorial 5
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Small fixes to the public demo (#1781)
* Make strealit tolerant to haystack not knowing its version, and adding special error for docstore issues
* Add workaround for a Streamlit bug
* Make default filters value an empty dict
* Return more context for each answer in the rest api
* Make the hs_version call not-blocking by adding a very quick timeout
* Add disclaimer on low confidence answer
* Use the no-answer feature of the reader to highlight questions with no good answer
* Upgrade torch to v1.10.0 (#1789)
* Upgrade torch to v1.10.0
* Adapt torch version for torch-scatter in TableQA tutorial
* Add latest docstring and tutorial changes
* Make torch version more flexible
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* fix code rendering for example
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Branden Chan <33759007+brandenchan@users.noreply.github.com>
Co-authored-by: Julian Risch <julian.risch@deepset.ai>
Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
Co-authored-by: nishanthcgit <5066268+nishanthcgit@users.noreply.github.com>
Co-authored-by: tstadel <60758086+tstadel@users.noreply.github.com>
Co-authored-by: bogdankostic <bogdankostic@web.de>
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
Co-authored-by: C V Goudar <cvgoudar@users.noreply.github.com>
Co-authored-by: Kristof Herrmann <37148029+ArzelaAscoIi@users.noreply.github.com>
2021-11-23 17:01:39 +01:00
|
|
|
)
|
|
|
|
# test config export
|
|
|
|
pipeline.save_to_yaml(tmp_path / "test.yaml")
|
|
|
|
with open(tmp_path / "test.yaml", "r", encoding="utf-8") as stream:
|
|
|
|
saved_yaml = stream.read()
|
2022-02-21 12:22:37 +01:00
|
|
|
expected_yaml = f"""
|
Adding yaml functionality to standard pipelines (save/load...) (#1735)
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* Update API Reference Pages for v1.0 (#1729)
* Create new API pages and update existing ones
* Create query classifier page
* Remove Objects suffix
* Change answer aggregation key to doc_id, query instead of label_id, query (#1726)
* Add debugging example to tutorial (#1731)
* Add debugging example to tutorial
* Add latest docstring and tutorial changes
* Remove Objects suffix
* Add latest docstring and tutorial changes
* Revert "Remove Objects suffix"
This reverts commit 6681cb06510b080775994effe6a50bae42254be4.
* Revert unintentional commit
* Add third debugging option
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix another self.device/s typo (#1734)
* Fix yet another self.device(s) typo
* Add typing to 'initialize_device_settings' to try prevent future issues
* Fix bug in Tutorial5
* Fix the same bug in the notebook
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* Fix a few details of some tutorials (#1733)
* Make Tutorial10 use print instead of logs and fix a typo in Tutoria15
* Add a type check in 'print_answers'
* Add same checks to print_documents and print_questions
* Make RAGenerator return Answers instead of dictionaries
* Fix RAGenerator tests
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix `print_answers` (#1743)
* Fix a specific path of print_answers that was assuming answers are dictionaries
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split pipeline tests into three suites (#1755)
* Split pipeline tests into three suites
* Will this trigger the CI?
* Rename duplicate test into test_most_similar_documents_pipeline
* Fixing a bug that was probably never noticed
* Capitalize starting letter in params (#1750)
* Capitalize starting letter in params
Capitalized the starting letter in code examples for params in keeping with the latest names for nodes where first letter is capitalized.
Refer: https://github.com/deepset-ai/haystack/issues/1748
* Update standard_pipelines.py
Capitalized some starting letters in the docstrings in keeping with the updated node names for standard pipelines
* Multi query eval (#1746)
* add eval() to pipeline
* Add latest docstring and tutorial changes
* support multiple queries in eval()
* Add latest docstring and tutorial changes
* keep single query test
* fix EvaluationResult node_results default
* adjust docstrings
* Add latest docstring and tutorial changes
* minor improvements from comments
* Add latest docstring and tutorial changes
* move EvaluationResult and calculate_metrics to schema
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split summarizer tests in order to make windows CI work again (#1757)
* separate testfile for summarizer with translation
* Add latest docstring and tutorial changes
* import SPLIT_DOCS from test_summarizer
* add workflow_dispatch to windows_ci
* add worflow_dispatch to linux_ci
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* fix import of EvaluationResult in test case
* exclude test_summarizer_translation.py for windows_ci (#1759)
* Pipelines now tolerate custom _debug content (#1756)
* Pipelines now tolerate custom _debug content
* Support Tables in all DocumentStores (#1744)
* Add support for tables in SQLDocumentStore, FAISSDocumentStore and MilvuDocumentStore
* Add support for WeaviateDocumentStore
* Make sure that embedded meta fields are strings + add embedding_dim to WeaviateDocStore in test config
* Add latest docstring and tutorial changes
* Represent tables in WeaviateDocumentStore as nested lists
* Fix mypy
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Allow TableReader models without aggregation classifier (#1772)
* Fix usage of filters in `/query` endpoint in REST API (#1774)
* WIP filter refactoring
* fix filter formatting
* remove inplace modification of filters
* Public demo (#1747)
* Queries now run only when pressing RUN. File upload hidden. Question is not sent if the textbox is empty.
* Add latest docstring and tutorial changes
* Tidy up: remove needless state, add comments, fix minor bugs
* Had to add results to the status to avoid some bugs in eval mode
* Added 'credits'
* Add footers, update requirements, some random questions for the evaluation
* Add requested changes
* Temporary rollback the UI to the old GoT dataset
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Facilitate concurrent query / indexing in Elasticsearch with dense retrievers (new `skip_missing_embeddings` param) (#1762)
* Filtering records not having embeddings
* Added support for skip_missing_embeddings Flag. Default behavior is throw error when embeddings are missing. If skip_missing_embeddings=True then documents without embeddings are ignored for vector similarity
* Fix for below error:
haystack/document_stores/elasticsearch.py:852: error: Need type annotation for "script_score_query"
* docstring for skip_missing_embeddings parameter
* Raise exception where no documents with embeddings is found for Embedding retriever.
* Default skip_missing_embeddings to True
* Explicitly check if embeddings are present if no results are returned by EmbeddingRetriever for Elasticsearch
* Added test case for based on Julian's input
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Simplify code by using get_embed_count
* Adjust docstring & error msg slightly
* Revert error msg
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
* Huggingface private model support via API tokens (FARMReader) (#1775)
* passed kwargs to model loading
* Pass Auth token explicitly
* add use_auth_token to get_language_model_class
* added use_auth_token parameter at FARMReader
* Add latest docstring and tutorial changes
* added docs for parameter `use_auth_token`
* Add latest docstring and tutorial changes
* adding docs link
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* private hugging face models for retrievers (#1785)
* private dpr
* Add latest docstring and tutorial changes
* added parameters to child functions
* Add latest docstring and tutorial changes
* added tableextractor
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* ignore empty filters parameter (#1783)
* ignore empty filters parameter
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* initialize doc store with doc and label index in tutorial 5 (#1730)
* initialize doc store with doc and label index
* change ipynb according to py for tutorial 5
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Small fixes to the public demo (#1781)
* Make strealit tolerant to haystack not knowing its version, and adding special error for docstore issues
* Add workaround for a Streamlit bug
* Make default filters value an empty dict
* Return more context for each answer in the rest api
* Make the hs_version call not-blocking by adding a very quick timeout
* Add disclaimer on low confidence answer
* Use the no-answer feature of the reader to highlight questions with no good answer
* Upgrade torch to v1.10.0 (#1789)
* Upgrade torch to v1.10.0
* Adapt torch version for torch-scatter in TableQA tutorial
* Add latest docstring and tutorial changes
* Make torch version more flexible
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* fix code rendering for example
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Branden Chan <33759007+brandenchan@users.noreply.github.com>
Co-authored-by: Julian Risch <julian.risch@deepset.ai>
Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
Co-authored-by: nishanthcgit <5066268+nishanthcgit@users.noreply.github.com>
Co-authored-by: tstadel <60758086+tstadel@users.noreply.github.com>
Co-authored-by: bogdankostic <bogdankostic@web.de>
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
Co-authored-by: C V Goudar <cvgoudar@users.noreply.github.com>
Co-authored-by: Kristof Herrmann <37148029+ArzelaAscoIi@users.noreply.github.com>
2021-11-23 17:01:39 +01:00
|
|
|
components:
|
|
|
|
- name: ESRetriever
|
|
|
|
params:
|
|
|
|
document_store: ElasticsearchDocumentStore
|
|
|
|
type: ElasticsearchRetriever
|
|
|
|
- name: ElasticsearchDocumentStore
|
|
|
|
params:
|
|
|
|
index: haystack_test
|
|
|
|
label_index: haystack_test_label
|
|
|
|
type: ElasticsearchDocumentStore
|
|
|
|
- name: Reader
|
|
|
|
params:
|
|
|
|
model_name_or_path: deepset/roberta-base-squad2
|
|
|
|
no_ans_boost: -10
|
2021-12-22 17:20:23 +01:00
|
|
|
num_processes: 0
|
Adding yaml functionality to standard pipelines (save/load...) (#1735)
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* Update API Reference Pages for v1.0 (#1729)
* Create new API pages and update existing ones
* Create query classifier page
* Remove Objects suffix
* Change answer aggregation key to doc_id, query instead of label_id, query (#1726)
* Add debugging example to tutorial (#1731)
* Add debugging example to tutorial
* Add latest docstring and tutorial changes
* Remove Objects suffix
* Add latest docstring and tutorial changes
* Revert "Remove Objects suffix"
This reverts commit 6681cb06510b080775994effe6a50bae42254be4.
* Revert unintentional commit
* Add third debugging option
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix another self.device/s typo (#1734)
* Fix yet another self.device(s) typo
* Add typing to 'initialize_device_settings' to try prevent future issues
* Fix bug in Tutorial5
* Fix the same bug in the notebook
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* Fix a few details of some tutorials (#1733)
* Make Tutorial10 use print instead of logs and fix a typo in Tutoria15
* Add a type check in 'print_answers'
* Add same checks to print_documents and print_questions
* Make RAGenerator return Answers instead of dictionaries
* Fix RAGenerator tests
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix `print_answers` (#1743)
* Fix a specific path of print_answers that was assuming answers are dictionaries
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split pipeline tests into three suites (#1755)
* Split pipeline tests into three suites
* Will this trigger the CI?
* Rename duplicate test into test_most_similar_documents_pipeline
* Fixing a bug that was probably never noticed
* Capitalize starting letter in params (#1750)
* Capitalize starting letter in params
Capitalized the starting letter in code examples for params in keeping with the latest names for nodes where first letter is capitalized.
Refer: https://github.com/deepset-ai/haystack/issues/1748
* Update standard_pipelines.py
Capitalized some starting letters in the docstrings in keeping with the updated node names for standard pipelines
* Multi query eval (#1746)
* add eval() to pipeline
* Add latest docstring and tutorial changes
* support multiple queries in eval()
* Add latest docstring and tutorial changes
* keep single query test
* fix EvaluationResult node_results default
* adjust docstrings
* Add latest docstring and tutorial changes
* minor improvements from comments
* Add latest docstring and tutorial changes
* move EvaluationResult and calculate_metrics to schema
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split summarizer tests in order to make windows CI work again (#1757)
* separate testfile for summarizer with translation
* Add latest docstring and tutorial changes
* import SPLIT_DOCS from test_summarizer
* add workflow_dispatch to windows_ci
* add worflow_dispatch to linux_ci
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* fix import of EvaluationResult in test case
* exclude test_summarizer_translation.py for windows_ci (#1759)
* Pipelines now tolerate custom _debug content (#1756)
* Pipelines now tolerate custom _debug content
* Support Tables in all DocumentStores (#1744)
* Add support for tables in SQLDocumentStore, FAISSDocumentStore and MilvuDocumentStore
* Add support for WeaviateDocumentStore
* Make sure that embedded meta fields are strings + add embedding_dim to WeaviateDocStore in test config
* Add latest docstring and tutorial changes
* Represent tables in WeaviateDocumentStore as nested lists
* Fix mypy
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Allow TableReader models without aggregation classifier (#1772)
* Fix usage of filters in `/query` endpoint in REST API (#1774)
* WIP filter refactoring
* fix filter formatting
* remove inplace modification of filters
* Public demo (#1747)
* Queries now run only when pressing RUN. File upload hidden. Question is not sent if the textbox is empty.
* Add latest docstring and tutorial changes
* Tidy up: remove needless state, add comments, fix minor bugs
* Had to add results to the status to avoid some bugs in eval mode
* Added 'credits'
* Add footers, update requirements, some random questions for the evaluation
* Add requested changes
* Temporary rollback the UI to the old GoT dataset
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Facilitate concurrent query / indexing in Elasticsearch with dense retrievers (new `skip_missing_embeddings` param) (#1762)
* Filtering records not having embeddings
* Added support for skip_missing_embeddings Flag. Default behavior is throw error when embeddings are missing. If skip_missing_embeddings=True then documents without embeddings are ignored for vector similarity
* Fix for below error:
haystack/document_stores/elasticsearch.py:852: error: Need type annotation for "script_score_query"
* docstring for skip_missing_embeddings parameter
* Raise exception where no documents with embeddings is found for Embedding retriever.
* Default skip_missing_embeddings to True
* Explicitly check if embeddings are present if no results are returned by EmbeddingRetriever for Elasticsearch
* Added test case for based on Julian's input
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Simplify code by using get_embed_count
* Adjust docstring & error msg slightly
* Revert error msg
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
* Huggingface private model support via API tokens (FARMReader) (#1775)
* passed kwargs to model loading
* Pass Auth token explicitly
* add use_auth_token to get_language_model_class
* added use_auth_token parameter at FARMReader
* Add latest docstring and tutorial changes
* added docs for parameter `use_auth_token`
* Add latest docstring and tutorial changes
* adding docs link
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* private hugging face models for retrievers (#1785)
* private dpr
* Add latest docstring and tutorial changes
* added parameters to child functions
* Add latest docstring and tutorial changes
* added tableextractor
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* ignore empty filters parameter (#1783)
* ignore empty filters parameter
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* initialize doc store with doc and label index in tutorial 5 (#1730)
* initialize doc store with doc and label index
* change ipynb according to py for tutorial 5
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Small fixes to the public demo (#1781)
* Make strealit tolerant to haystack not knowing its version, and adding special error for docstore issues
* Add workaround for a Streamlit bug
* Make default filters value an empty dict
* Return more context for each answer in the rest api
* Make the hs_version call not-blocking by adding a very quick timeout
* Add disclaimer on low confidence answer
* Use the no-answer feature of the reader to highlight questions with no good answer
* Upgrade torch to v1.10.0 (#1789)
* Upgrade torch to v1.10.0
* Adapt torch version for torch-scatter in TableQA tutorial
* Add latest docstring and tutorial changes
* Make torch version more flexible
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* fix code rendering for example
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Branden Chan <33759007+brandenchan@users.noreply.github.com>
Co-authored-by: Julian Risch <julian.risch@deepset.ai>
Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
Co-authored-by: nishanthcgit <5066268+nishanthcgit@users.noreply.github.com>
Co-authored-by: tstadel <60758086+tstadel@users.noreply.github.com>
Co-authored-by: bogdankostic <bogdankostic@web.de>
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
Co-authored-by: C V Goudar <cvgoudar@users.noreply.github.com>
Co-authored-by: Kristof Herrmann <37148029+ArzelaAscoIi@users.noreply.github.com>
2021-11-23 17:01:39 +01:00
|
|
|
type: FARMReader
|
|
|
|
pipelines:
|
|
|
|
- name: query
|
|
|
|
nodes:
|
|
|
|
- inputs:
|
|
|
|
- Query
|
|
|
|
name: ESRetriever
|
|
|
|
- inputs:
|
|
|
|
- ESRetriever
|
|
|
|
name: Reader
|
|
|
|
type: Pipeline
|
2022-02-21 12:22:37 +01:00
|
|
|
version: {__version__}
|
Adding yaml functionality to standard pipelines (save/load...) (#1735)
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* Update API Reference Pages for v1.0 (#1729)
* Create new API pages and update existing ones
* Create query classifier page
* Remove Objects suffix
* Change answer aggregation key to doc_id, query instead of label_id, query (#1726)
* Add debugging example to tutorial (#1731)
* Add debugging example to tutorial
* Add latest docstring and tutorial changes
* Remove Objects suffix
* Add latest docstring and tutorial changes
* Revert "Remove Objects suffix"
This reverts commit 6681cb06510b080775994effe6a50bae42254be4.
* Revert unintentional commit
* Add third debugging option
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix another self.device/s typo (#1734)
* Fix yet another self.device(s) typo
* Add typing to 'initialize_device_settings' to try prevent future issues
* Fix bug in Tutorial5
* Fix the same bug in the notebook
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* Fix a few details of some tutorials (#1733)
* Make Tutorial10 use print instead of logs and fix a typo in Tutoria15
* Add a type check in 'print_answers'
* Add same checks to print_documents and print_questions
* Make RAGenerator return Answers instead of dictionaries
* Fix RAGenerator tests
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix `print_answers` (#1743)
* Fix a specific path of print_answers that was assuming answers are dictionaries
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split pipeline tests into three suites (#1755)
* Split pipeline tests into three suites
* Will this trigger the CI?
* Rename duplicate test into test_most_similar_documents_pipeline
* Fixing a bug that was probably never noticed
* Capitalize starting letter in params (#1750)
* Capitalize starting letter in params
Capitalized the starting letter in code examples for params in keeping with the latest names for nodes where first letter is capitalized.
Refer: https://github.com/deepset-ai/haystack/issues/1748
* Update standard_pipelines.py
Capitalized some starting letters in the docstrings in keeping with the updated node names for standard pipelines
* Multi query eval (#1746)
* add eval() to pipeline
* Add latest docstring and tutorial changes
* support multiple queries in eval()
* Add latest docstring and tutorial changes
* keep single query test
* fix EvaluationResult node_results default
* adjust docstrings
* Add latest docstring and tutorial changes
* minor improvements from comments
* Add latest docstring and tutorial changes
* move EvaluationResult and calculate_metrics to schema
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split summarizer tests in order to make windows CI work again (#1757)
* separate testfile for summarizer with translation
* Add latest docstring and tutorial changes
* import SPLIT_DOCS from test_summarizer
* add workflow_dispatch to windows_ci
* add worflow_dispatch to linux_ci
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* fix import of EvaluationResult in test case
* exclude test_summarizer_translation.py for windows_ci (#1759)
* Pipelines now tolerate custom _debug content (#1756)
* Pipelines now tolerate custom _debug content
* Support Tables in all DocumentStores (#1744)
* Add support for tables in SQLDocumentStore, FAISSDocumentStore and MilvuDocumentStore
* Add support for WeaviateDocumentStore
* Make sure that embedded meta fields are strings + add embedding_dim to WeaviateDocStore in test config
* Add latest docstring and tutorial changes
* Represent tables in WeaviateDocumentStore as nested lists
* Fix mypy
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Allow TableReader models without aggregation classifier (#1772)
* Fix usage of filters in `/query` endpoint in REST API (#1774)
* WIP filter refactoring
* fix filter formatting
* remove inplace modification of filters
* Public demo (#1747)
* Queries now run only when pressing RUN. File upload hidden. Question is not sent if the textbox is empty.
* Add latest docstring and tutorial changes
* Tidy up: remove needless state, add comments, fix minor bugs
* Had to add results to the status to avoid some bugs in eval mode
* Added 'credits'
* Add footers, update requirements, some random questions for the evaluation
* Add requested changes
* Temporary rollback the UI to the old GoT dataset
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Facilitate concurrent query / indexing in Elasticsearch with dense retrievers (new `skip_missing_embeddings` param) (#1762)
* Filtering records not having embeddings
* Added support for skip_missing_embeddings Flag. Default behavior is throw error when embeddings are missing. If skip_missing_embeddings=True then documents without embeddings are ignored for vector similarity
* Fix for below error:
haystack/document_stores/elasticsearch.py:852: error: Need type annotation for "script_score_query"
* docstring for skip_missing_embeddings parameter
* Raise exception where no documents with embeddings is found for Embedding retriever.
* Default skip_missing_embeddings to True
* Explicitly check if embeddings are present if no results are returned by EmbeddingRetriever for Elasticsearch
* Added test case for based on Julian's input
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Simplify code by using get_embed_count
* Adjust docstring & error msg slightly
* Revert error msg
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
* Huggingface private model support via API tokens (FARMReader) (#1775)
* passed kwargs to model loading
* Pass Auth token explicitly
* add use_auth_token to get_language_model_class
* added use_auth_token parameter at FARMReader
* Add latest docstring and tutorial changes
* added docs for parameter `use_auth_token`
* Add latest docstring and tutorial changes
* adding docs link
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* private hugging face models for retrievers (#1785)
* private dpr
* Add latest docstring and tutorial changes
* added parameters to child functions
* Add latest docstring and tutorial changes
* added tableextractor
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* ignore empty filters parameter (#1783)
* ignore empty filters parameter
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* initialize doc store with doc and label index in tutorial 5 (#1730)
* initialize doc store with doc and label index
* change ipynb according to py for tutorial 5
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Small fixes to the public demo (#1781)
* Make strealit tolerant to haystack not knowing its version, and adding special error for docstore issues
* Add workaround for a Streamlit bug
* Make default filters value an empty dict
* Return more context for each answer in the rest api
* Make the hs_version call not-blocking by adding a very quick timeout
* Add disclaimer on low confidence answer
* Use the no-answer feature of the reader to highlight questions with no good answer
* Upgrade torch to v1.10.0 (#1789)
* Upgrade torch to v1.10.0
* Adapt torch version for torch-scatter in TableQA tutorial
* Add latest docstring and tutorial changes
* Make torch version more flexible
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* fix code rendering for example
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Branden Chan <33759007+brandenchan@users.noreply.github.com>
Co-authored-by: Julian Risch <julian.risch@deepset.ai>
Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
Co-authored-by: nishanthcgit <5066268+nishanthcgit@users.noreply.github.com>
Co-authored-by: tstadel <60758086+tstadel@users.noreply.github.com>
Co-authored-by: bogdankostic <bogdankostic@web.de>
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
Co-authored-by: C V Goudar <cvgoudar@users.noreply.github.com>
Co-authored-by: Kristof Herrmann <37148029+ArzelaAscoIi@users.noreply.github.com>
2021-11-23 17:01:39 +01:00
|
|
|
"""
|
2022-02-03 13:43:18 +01:00
|
|
|
assert saved_yaml.replace(" ", "").replace("\n", "") == expected_yaml.replace(" ", "").replace("\n", "")
|
|
|
|
|
2021-02-02 17:32:17 +01:00
|
|
|
|
2021-10-28 16:48:06 +02:00
|
|
|
def test_load_tfidfretriever_yaml(tmp_path):
|
|
|
|
documents = [
|
|
|
|
{
|
|
|
|
"content": "A Doc specifically talking about haystack. Haystack can be used to scale QA models to large document collections."
|
|
|
|
}
|
|
|
|
]
|
|
|
|
pipeline = Pipeline.load_from_yaml(
|
2022-02-03 13:43:18 +01:00
|
|
|
SAMPLES_PATH / "pipeline" / "test_pipeline_tfidfretriever.yaml", pipeline_name="query_pipeline"
|
2021-10-28 16:48:06 +02:00
|
|
|
)
|
|
|
|
with pytest.raises(Exception) as exc_info:
|
|
|
|
pipeline.run(
|
2022-02-03 13:43:18 +01:00
|
|
|
query="What can be used to scale QA models to large document collections?",
|
|
|
|
params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 3}},
|
2021-10-28 16:48:06 +02:00
|
|
|
)
|
|
|
|
exception_raised = str(exc_info.value)
|
|
|
|
assert "Retrieval requires dataframe df and tf-idf matrix" in exception_raised
|
|
|
|
|
|
|
|
pipeline.get_node(name="Retriever").document_store.write_documents(documents=documents)
|
|
|
|
prediction = pipeline.run(
|
|
|
|
query="What can be used to scale QA models to large document collections?",
|
2022-02-03 13:43:18 +01:00
|
|
|
params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 3}},
|
2021-10-28 16:48:06 +02:00
|
|
|
)
|
|
|
|
assert prediction["query"] == "What can be used to scale QA models to large document collections?"
|
|
|
|
assert prediction["answers"][0].answer == "haystack"
|
|
|
|
|
2022-02-03 13:43:18 +01:00
|
|
|
|
2022-01-28 17:32:56 +01:00
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_load_from_deepset_cloud_query():
|
|
|
|
if MOCK_DC:
|
2022-02-03 13:43:18 +01:00
|
|
|
with open(SAMPLES_PATH / "dc" / "pipeline_config.json", "r") as f:
|
2022-01-28 17:32:56 +01:00
|
|
|
pipeline_config_yaml_response = json.load(f)
|
|
|
|
|
|
|
|
responses.add(
|
2022-02-03 13:43:18 +01:00
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/{DC_TEST_INDEX}/json",
|
|
|
|
json=pipeline_config_yaml_response,
|
|
|
|
status=200,
|
|
|
|
)
|
2022-01-28 17:32:56 +01:00
|
|
|
|
|
|
|
responses.add(
|
2022-02-03 13:43:18 +01:00
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
|
|
|
|
json=[{"id": "test_doc", "content": "man on hores"}],
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
2022-01-28 17:32:56 +01:00
|
|
|
query_pipeline = Pipeline.load_from_deepset_cloud(
|
2022-02-03 13:43:18 +01:00
|
|
|
pipeline_config_name=DC_TEST_INDEX, api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
2022-01-28 17:32:56 +01:00
|
|
|
retriever = query_pipeline.get_node("Retriever")
|
|
|
|
document_store = retriever.document_store
|
|
|
|
assert isinstance(retriever, ElasticsearchRetriever)
|
|
|
|
assert isinstance(document_store, DeepsetCloudDocumentStore)
|
2022-02-22 15:01:07 +01:00
|
|
|
assert document_store == query_pipeline.get_document_store()
|
2022-01-28 17:32:56 +01:00
|
|
|
|
2022-02-03 13:43:18 +01:00
|
|
|
prediction = query_pipeline.run(query="man on horse", params={})
|
2022-01-28 17:32:56 +01:00
|
|
|
|
|
|
|
assert prediction["query"] == "man on horse"
|
|
|
|
assert len(prediction["documents"]) == 1
|
|
|
|
assert prediction["documents"][0].id == "test_doc"
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_load_from_deepset_cloud_indexing():
|
|
|
|
if MOCK_DC:
|
2022-02-03 13:43:18 +01:00
|
|
|
with open(SAMPLES_PATH / "dc" / "pipeline_config.json", "r") as f:
|
2022-01-28 17:32:56 +01:00
|
|
|
pipeline_config_yaml_response = json.load(f)
|
|
|
|
|
|
|
|
responses.add(
|
2022-02-03 13:43:18 +01:00
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/{DC_TEST_INDEX}/json",
|
|
|
|
json=pipeline_config_yaml_response,
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
2022-01-28 17:32:56 +01:00
|
|
|
indexing_pipeline = Pipeline.load_from_deepset_cloud(
|
2022-02-03 13:43:18 +01:00
|
|
|
pipeline_config_name=DC_TEST_INDEX, api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, pipeline_name="indexing"
|
|
|
|
)
|
2022-01-28 17:32:56 +01:00
|
|
|
document_store = indexing_pipeline.get_node("DocumentStore")
|
|
|
|
assert isinstance(document_store, DeepsetCloudDocumentStore)
|
|
|
|
|
2022-02-03 13:43:18 +01:00
|
|
|
with pytest.raises(
|
|
|
|
Exception, match=".*NotImplementedError.*DeepsetCloudDocumentStore currently does not support writing documents"
|
|
|
|
):
|
|
|
|
indexing_pipeline.run(file_paths=[SAMPLES_PATH / "docs" / "doc_1.txt"])
|
2022-01-28 17:32:56 +01:00
|
|
|
|
2021-10-28 16:48:06 +02:00
|
|
|
|
2022-02-08 20:35:25 +01:00
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_list_pipelines_on_deepset_cloud():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines",
|
|
|
|
json={
|
|
|
|
"data": [
|
|
|
|
{
|
|
|
|
"name": "test_pipeline_config",
|
|
|
|
"pipeline_id": "2184e0c1-c6ec-40a1-9b28-5d2768e5efa2",
|
|
|
|
"status": "DEPLOYED",
|
|
|
|
"created_at": "2022-02-01T09:57:03.803991+00:00",
|
|
|
|
"deleted": False,
|
|
|
|
"is_default": False,
|
|
|
|
"indexing": {"status": "IN_PROGRESS", "pending_file_count": 4, "total_file_count": 33},
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"has_more": False,
|
|
|
|
"total": 1,
|
|
|
|
},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
pipelines = Pipeline.list_pipelines_on_deepset_cloud(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY)
|
|
|
|
assert len(pipelines) == 1
|
|
|
|
assert pipelines[0]["name"] == "test_pipeline_config"
|
|
|
|
|
|
|
|
|
2022-02-11 12:50:53 +01:00
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_save_to_deepset_cloud():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_pipeline_config",
|
|
|
|
json={
|
|
|
|
"name": "test_pipeline_config",
|
|
|
|
"pipeline_id": "2184e9c1-c6ec-40a1-9b28-5d2768e5efa2",
|
|
|
|
"status": "UNDEPLOYED",
|
|
|
|
"created_at": "2022-02-01T09:57:03.803991+00:00",
|
|
|
|
"deleted": False,
|
|
|
|
"is_default": False,
|
|
|
|
"indexing": {"status": "IN_PROGRESS", "pending_file_count": 4, "total_file_count": 33},
|
|
|
|
},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_pipeline_config_deployed",
|
|
|
|
json={
|
|
|
|
"name": "test_pipeline_config_deployed",
|
|
|
|
"pipeline_id": "8184e0c1-c6ec-40a1-9b28-5d2768e5efa3",
|
|
|
|
"status": "DEPLOYED",
|
|
|
|
"created_at": "2022-02-09T09:57:03.803991+00:00",
|
|
|
|
"deleted": False,
|
|
|
|
"is_default": False,
|
|
|
|
"indexing": {"status": "INDEXED", "pending_file_count": 0, "total_file_count": 33},
|
|
|
|
},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_pipeline_config_copy",
|
|
|
|
json={"errors": ["Pipeline with the name test_pipeline_config_copy does not exists."]},
|
|
|
|
status=404,
|
|
|
|
)
|
|
|
|
|
|
|
|
with open(SAMPLES_PATH / "dc" / "pipeline_config.json", "r") as f:
|
|
|
|
pipeline_config_yaml_response = json.load(f)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/{DC_TEST_INDEX}/json",
|
|
|
|
json=pipeline_config_yaml_response,
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines",
|
|
|
|
json={"name": "test_pipeline_config_copy"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.PUT,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_pipeline_config/yaml",
|
|
|
|
json={"name": "test_pipeline_config"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.PUT,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_pipeline_config_deployed/yaml",
|
|
|
|
json={"errors": ["Updating the pipeline yaml is not allowed for pipelines with status: 'DEPLOYED'"]},
|
|
|
|
status=406,
|
|
|
|
)
|
|
|
|
|
|
|
|
query_pipeline = Pipeline.load_from_deepset_cloud(
|
|
|
|
pipeline_config_name=DC_TEST_INDEX, api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
index_pipeline = Pipeline.load_from_deepset_cloud(
|
|
|
|
pipeline_config_name=DC_TEST_INDEX, api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, pipeline_name="indexing"
|
|
|
|
)
|
|
|
|
|
2022-02-16 09:07:58 +01:00
|
|
|
Pipeline.save_to_deepset_cloud(
|
2022-02-11 12:50:53 +01:00
|
|
|
query_pipeline=query_pipeline,
|
|
|
|
index_pipeline=index_pipeline,
|
|
|
|
pipeline_config_name="test_pipeline_config_copy",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(
|
|
|
|
ValueError,
|
|
|
|
match="Pipeline config 'test_pipeline_config' already exists. Set `overwrite=True` to overwrite pipeline config",
|
|
|
|
):
|
2022-02-16 09:07:58 +01:00
|
|
|
Pipeline.save_to_deepset_cloud(
|
2022-02-11 12:50:53 +01:00
|
|
|
query_pipeline=query_pipeline,
|
|
|
|
index_pipeline=index_pipeline,
|
|
|
|
pipeline_config_name="test_pipeline_config",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
)
|
|
|
|
|
2022-02-16 09:07:58 +01:00
|
|
|
Pipeline.save_to_deepset_cloud(
|
2022-02-11 12:50:53 +01:00
|
|
|
query_pipeline=query_pipeline,
|
|
|
|
index_pipeline=index_pipeline,
|
|
|
|
pipeline_config_name="test_pipeline_config",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
overwrite=True,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(
|
|
|
|
ValueError,
|
|
|
|
match="Deployed pipeline configs are not allowed to be updated. Please undeploy pipeline config 'test_pipeline_config_deployed' first",
|
|
|
|
):
|
2022-02-16 09:07:58 +01:00
|
|
|
Pipeline.save_to_deepset_cloud(
|
2022-02-11 12:50:53 +01:00
|
|
|
query_pipeline=query_pipeline,
|
|
|
|
index_pipeline=index_pipeline,
|
|
|
|
pipeline_config_name="test_pipeline_config_deployed",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
overwrite=True,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2021-09-27 10:52:07 +02:00
|
|
|
# @pytest.mark.slow
|
|
|
|
# @pytest.mark.elasticsearch
|
|
|
|
# @pytest.mark.parametrize(
|
|
|
|
# "retriever_with_docs, document_store_with_docs",
|
|
|
|
# [("elasticsearch", "elasticsearch")],
|
|
|
|
# indirect=True,
|
|
|
|
# )
|
2021-04-21 12:18:33 +02:00
|
|
|
@pytest.mark.parametrize(
|
2021-09-27 10:52:07 +02:00
|
|
|
"retriever_with_docs,document_store_with_docs",
|
|
|
|
[
|
|
|
|
("dpr", "elasticsearch"),
|
|
|
|
("dpr", "faiss"),
|
|
|
|
("dpr", "memory"),
|
|
|
|
("dpr", "milvus"),
|
|
|
|
("embedding", "elasticsearch"),
|
|
|
|
("embedding", "faiss"),
|
|
|
|
("embedding", "memory"),
|
|
|
|
("embedding", "milvus"),
|
|
|
|
("elasticsearch", "elasticsearch"),
|
|
|
|
("es_filter_only", "elasticsearch"),
|
|
|
|
("tfidf", "memory"),
|
|
|
|
],
|
2021-06-08 15:20:13 +02:00
|
|
|
indirect=True,
|
2021-04-21 12:18:33 +02:00
|
|
|
)
|
2021-09-27 10:52:07 +02:00
|
|
|
def test_graph_creation(retriever_with_docs, document_store_with_docs):
|
2020-11-20 17:41:08 +01:00
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="ES", component=retriever_with_docs, inputs=["Query"])
|
|
|
|
|
|
|
|
with pytest.raises(AssertionError):
|
2022-02-03 13:43:18 +01:00
|
|
|
pipeline.add_node(name="Reader", component=retriever_with_docs, inputs=["ES.output_2"])
|
2020-11-20 17:41:08 +01:00
|
|
|
|
|
|
|
with pytest.raises(AssertionError):
|
2022-02-03 13:43:18 +01:00
|
|
|
pipeline.add_node(name="Reader", component=retriever_with_docs, inputs=["ES.wrong_edge_label"])
|
2020-11-20 17:41:08 +01:00
|
|
|
|
|
|
|
with pytest.raises(Exception):
|
2022-02-03 13:43:18 +01:00
|
|
|
pipeline.add_node(name="Reader", component=retriever_with_docs, inputs=["InvalidNode"])
|
2020-11-20 17:41:08 +01:00
|
|
|
|
2021-04-21 12:18:33 +02:00
|
|
|
with pytest.raises(Exception):
|
|
|
|
pipeline = Pipeline()
|
2022-02-03 13:43:18 +01:00
|
|
|
pipeline.add_node(name="ES", component=retriever_with_docs, inputs=["InvalidNode"])
|
2021-04-21 12:18:33 +02:00
|
|
|
|
2020-11-20 17:41:08 +01:00
|
|
|
|
2021-03-10 18:17:23 +01:00
|
|
|
def test_parallel_paths_in_pipeline_graph():
|
|
|
|
class A(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self):
|
|
|
|
test = "A"
|
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
class B(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, test):
|
|
|
|
test += "B"
|
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
class C(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, test):
|
|
|
|
test += "C"
|
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
class D(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, test):
|
|
|
|
test += "D"
|
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
class E(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, test):
|
|
|
|
test += "E"
|
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
class JoinNode(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, inputs):
|
2022-02-03 13:43:18 +01:00
|
|
|
test = inputs[0]["test"] + inputs[1]["test"]
|
2021-09-10 11:41:16 +02:00
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=A(), inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=B(), inputs=["A"])
|
|
|
|
pipeline.add_node(name="C", component=C(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="E", component=E(), inputs=["C"])
|
|
|
|
pipeline.add_node(name="D", component=D(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="F", component=JoinNode(), inputs=["D", "E"])
|
|
|
|
output = pipeline.run(query="test")
|
2021-09-10 11:41:16 +02:00
|
|
|
assert output["test"] == "ABDABCE"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=A(), inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=B(), inputs=["A"])
|
|
|
|
pipeline.add_node(name="C", component=C(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="D", component=D(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="E", component=JoinNode(), inputs=["C", "D"])
|
|
|
|
output = pipeline.run(query="test")
|
2021-09-10 11:41:16 +02:00
|
|
|
assert output["test"] == "ABCABD"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
|
|
|
|
def test_parallel_paths_in_pipeline_graph_with_branching():
|
|
|
|
class AWithOutput1(RootNode):
|
|
|
|
outgoing_edges = 2
|
2021-06-08 15:20:13 +02:00
|
|
|
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self):
|
|
|
|
output = "A"
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class AWithOutput2(RootNode):
|
|
|
|
outgoing_edges = 2
|
2021-06-08 15:20:13 +02:00
|
|
|
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self):
|
|
|
|
output = "A"
|
|
|
|
return {"output": output}, "output_2"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class AWithOutputAll(RootNode):
|
|
|
|
outgoing_edges = 2
|
2021-06-08 15:20:13 +02:00
|
|
|
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self):
|
|
|
|
output = "A"
|
|
|
|
return {"output": output}, "output_all"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class B(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, output):
|
|
|
|
output += "B"
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class C(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, output):
|
|
|
|
output += "C"
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class D(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, output):
|
|
|
|
output += "D"
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class E(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, output):
|
|
|
|
output += "E"
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class JoinNode(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, output=None, inputs=None):
|
|
|
|
if inputs:
|
|
|
|
output = ""
|
|
|
|
for input_dict in inputs:
|
|
|
|
output += input_dict["output"]
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=AWithOutput1(), inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=B(), inputs=["A.output_1"])
|
|
|
|
pipeline.add_node(name="C", component=C(), inputs=["A.output_2"])
|
|
|
|
pipeline.add_node(name="D", component=E(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="E", component=D(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="F", component=JoinNode(), inputs=["D", "E", "C"])
|
|
|
|
output = pipeline.run(query="test")
|
|
|
|
assert output["output"] == "ABEABD"
|
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=AWithOutput2(), inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=B(), inputs=["A.output_1"])
|
|
|
|
pipeline.add_node(name="C", component=C(), inputs=["A.output_2"])
|
|
|
|
pipeline.add_node(name="D", component=E(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="E", component=D(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="F", component=JoinNode(), inputs=["D", "E", "C"])
|
|
|
|
output = pipeline.run(query="test")
|
|
|
|
assert output["output"] == "AC"
|
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=AWithOutputAll(), inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=B(), inputs=["A.output_1"])
|
|
|
|
pipeline.add_node(name="C", component=C(), inputs=["A.output_2"])
|
|
|
|
pipeline.add_node(name="D", component=E(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="E", component=D(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="F", component=JoinNode(), inputs=["D", "E", "C"])
|
|
|
|
output = pipeline.run(query="test")
|
|
|
|
assert output["output"] == "ACABEABD"
|
|
|
|
|
|
|
|
|
2022-02-22 15:01:07 +01:00
|
|
|
def test_pipeline_components():
|
2022-02-22 20:33:21 +01:00
|
|
|
class Node(BaseComponent):
|
|
|
|
outgoing_edges = 1
|
|
|
|
|
2022-02-22 15:01:07 +01:00
|
|
|
def run(self):
|
|
|
|
test = "test"
|
|
|
|
return {"test": test}, "output_1"
|
|
|
|
|
|
|
|
a = Node()
|
|
|
|
b = Node()
|
|
|
|
c = Node()
|
|
|
|
d = Node()
|
|
|
|
e = Node()
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=a, inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=b, inputs=["A"])
|
|
|
|
pipeline.add_node(name="C", component=c, inputs=["B"])
|
|
|
|
pipeline.add_node(name="D", component=d, inputs=["C"])
|
|
|
|
pipeline.add_node(name="E", component=e, inputs=["D"])
|
|
|
|
assert len(pipeline.components) == 5
|
|
|
|
assert pipeline.components["A"] == a
|
|
|
|
assert pipeline.components["B"] == b
|
|
|
|
assert pipeline.components["C"] == c
|
|
|
|
assert pipeline.components["D"] == d
|
|
|
|
assert pipeline.components["E"] == e
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_get_document_store_from_components():
|
|
|
|
class DummyDocumentStore(BaseDocumentStore):
|
|
|
|
pass
|
|
|
|
|
|
|
|
doc_store = DummyDocumentStore()
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=doc_store, inputs=["File"])
|
|
|
|
|
|
|
|
assert doc_store == pipeline.get_document_store()
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_get_document_store_from_components_multiple_doc_stores():
|
|
|
|
class DummyDocumentStore(BaseDocumentStore):
|
|
|
|
pass
|
|
|
|
|
|
|
|
doc_store_a = DummyDocumentStore()
|
|
|
|
doc_store_b = DummyDocumentStore()
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=doc_store_a, inputs=["File"])
|
|
|
|
pipeline.add_node(name="B", component=doc_store_b, inputs=["File"])
|
|
|
|
|
|
|
|
with pytest.raises(Exception, match="Multiple Document Stores found in Pipeline"):
|
|
|
|
pipeline.get_document_store()
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_get_document_store_from_retriever():
|
|
|
|
class DummyRetriever(BaseRetriever):
|
|
|
|
def __init__(self, document_store):
|
|
|
|
self.document_store = document_store
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
test = "test"
|
|
|
|
return {"test": test}, "output_1"
|
|
|
|
|
|
|
|
class DummyDocumentStore(BaseDocumentStore):
|
|
|
|
pass
|
|
|
|
|
|
|
|
doc_store = DummyDocumentStore()
|
|
|
|
retriever = DummyRetriever(document_store=doc_store)
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=retriever, inputs=["Query"])
|
|
|
|
|
|
|
|
assert doc_store == pipeline.get_document_store()
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_get_document_store_from_dual_retriever():
|
|
|
|
class DummyRetriever(BaseRetriever):
|
|
|
|
def __init__(self, document_store):
|
|
|
|
self.document_store = document_store
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
test = "test"
|
|
|
|
return {"test": test}, "output_1"
|
|
|
|
|
|
|
|
class DummyDocumentStore(BaseDocumentStore):
|
|
|
|
pass
|
|
|
|
|
|
|
|
class JoinNode(RootNode):
|
|
|
|
def run(self, output=None, inputs=None):
|
|
|
|
if inputs:
|
|
|
|
output = ""
|
|
|
|
for input_dict in inputs:
|
|
|
|
output += input_dict["output"]
|
|
|
|
return {"output": output}, "output_1"
|
|
|
|
|
|
|
|
doc_store = DummyDocumentStore()
|
|
|
|
retriever_a = DummyRetriever(document_store=doc_store)
|
|
|
|
retriever_b = DummyRetriever(document_store=doc_store)
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=retriever_a, inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=retriever_b, inputs=["Query"])
|
|
|
|
pipeline.add_node(name="C", component=JoinNode(), inputs=["A", "B"])
|
|
|
|
|
|
|
|
assert doc_store == pipeline.get_document_store()
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_get_document_store_multiple_doc_stores_from_dual_retriever():
|
|
|
|
class DummyRetriever(BaseRetriever):
|
|
|
|
def __init__(self, document_store):
|
|
|
|
self.document_store = document_store
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
test = "test"
|
|
|
|
return {"test": test}, "output_1"
|
|
|
|
|
|
|
|
class DummyDocumentStore(BaseDocumentStore):
|
|
|
|
pass
|
|
|
|
|
|
|
|
class JoinNode(RootNode):
|
|
|
|
def run(self, output=None, inputs=None):
|
|
|
|
if inputs:
|
|
|
|
output = ""
|
|
|
|
for input_dict in inputs:
|
|
|
|
output += input_dict["output"]
|
|
|
|
return {"output": output}, "output_1"
|
|
|
|
|
|
|
|
doc_store_a = DummyDocumentStore()
|
|
|
|
doc_store_b = DummyDocumentStore()
|
|
|
|
retriever_a = DummyRetriever(document_store=doc_store_a)
|
|
|
|
retriever_b = DummyRetriever(document_store=doc_store_b)
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=retriever_a, inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=retriever_b, inputs=["Query"])
|
|
|
|
pipeline.add_node(name="C", component=JoinNode(), inputs=["A", "B"])
|
|
|
|
|
|
|
|
with pytest.raises(Exception, match="Multiple Document Stores found in Pipeline"):
|
|
|
|
pipeline.get_document_store()
|
|
|
|
|
|
|
|
|
2021-11-11 11:02:22 +01:00
|
|
|
def test_existing_faiss_document_store():
|
|
|
|
clean_faiss_document_store()
|
|
|
|
|
|
|
|
pipeline = Pipeline.load_from_yaml(
|
2022-02-03 13:43:18 +01:00
|
|
|
SAMPLES_PATH / "pipeline" / "test_pipeline_faiss_indexing.yaml", pipeline_name="indexing_pipeline"
|
2021-11-11 11:02:22 +01:00
|
|
|
)
|
2022-02-03 13:43:18 +01:00
|
|
|
pipeline.run(file_paths=SAMPLES_PATH / "pdf" / "sample_pdf_1.pdf")
|
2021-11-11 11:02:22 +01:00
|
|
|
|
|
|
|
new_document_store = pipeline.get_document_store()
|
2022-02-03 13:43:18 +01:00
|
|
|
new_document_store.save("existing_faiss_document_store")
|
2021-11-11 11:02:22 +01:00
|
|
|
|
|
|
|
# test correct load of query pipeline from yaml
|
|
|
|
pipeline = Pipeline.load_from_yaml(
|
2022-02-03 13:43:18 +01:00
|
|
|
SAMPLES_PATH / "pipeline" / "test_pipeline_faiss_retrieval.yaml", pipeline_name="query_pipeline"
|
2021-11-11 11:02:22 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
retriever = pipeline.get_node("DPRRetriever")
|
|
|
|
existing_document_store = retriever.document_store
|
2022-02-03 13:43:18 +01:00
|
|
|
faiss_index = existing_document_store.faiss_indexes["document"]
|
2021-11-11 11:02:22 +01:00
|
|
|
assert faiss_index.ntotal == 2
|
|
|
|
|
2022-02-03 13:43:18 +01:00
|
|
|
prediction = pipeline.run(query="Who made the PDF specification?", params={"DPRRetriever": {"top_k": 10}})
|
2021-11-11 11:02:22 +01:00
|
|
|
|
|
|
|
assert prediction["query"] == "Who made the PDF specification?"
|
|
|
|
assert len(prediction["documents"]) == 2
|
|
|
|
clean_faiss_document_store()
|
|
|
|
|
|
|
|
|
2022-01-03 11:38:02 +01:00
|
|
|
@pytest.mark.slow
|
|
|
|
@pytest.mark.parametrize("retriever_with_docs", ["elasticsearch", "dpr", "embedding"], indirect=True)
|
|
|
|
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
|
|
|
|
def test_documentsearch_es_authentication(retriever_with_docs, document_store_with_docs: ElasticsearchDocumentStore):
|
|
|
|
if isinstance(retriever_with_docs, (DensePassageRetriever, EmbeddingRetriever)):
|
|
|
|
document_store_with_docs.update_embeddings(retriever=retriever_with_docs)
|
|
|
|
mock_client = Mock(wraps=document_store_with_docs.client)
|
|
|
|
document_store_with_docs.client = mock_client
|
2022-02-03 13:43:18 +01:00
|
|
|
auth_headers = {"Authorization": "Basic YWRtaW46cm9vdA=="}
|
2022-01-03 11:38:02 +01:00
|
|
|
pipeline = DocumentSearchPipeline(retriever=retriever_with_docs)
|
|
|
|
prediction = pipeline.run(
|
2022-02-03 13:43:18 +01:00
|
|
|
query="Who lives in Berlin?",
|
|
|
|
params={"Retriever": {"top_k": 10, "headers": auth_headers}},
|
2022-01-03 11:38:02 +01:00
|
|
|
)
|
|
|
|
assert prediction is not None
|
2022-02-04 13:43:12 +01:00
|
|
|
assert len(prediction["documents"]) == 5
|
2022-01-03 11:38:02 +01:00
|
|
|
mock_client.search.assert_called_once()
|
|
|
|
args, kwargs = mock_client.search.call_args
|
|
|
|
assert "headers" in kwargs
|
|
|
|
assert kwargs["headers"] == auth_headers
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.slow
|
|
|
|
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
|
|
|
def test_documentsearch_document_store_authentication(retriever_with_docs, document_store_with_docs):
|
|
|
|
mock_client = None
|
|
|
|
if isinstance(document_store_with_docs, ElasticsearchDocumentStore):
|
|
|
|
es_document_store: ElasticsearchDocumentStore = document_store_with_docs
|
|
|
|
mock_client = Mock(wraps=es_document_store.client)
|
|
|
|
es_document_store.client = mock_client
|
2022-02-03 13:43:18 +01:00
|
|
|
auth_headers = {"Authorization": "Basic YWRtaW46cm9vdA=="}
|
2022-01-03 11:38:02 +01:00
|
|
|
pipeline = DocumentSearchPipeline(retriever=retriever_with_docs)
|
|
|
|
if not mock_client:
|
|
|
|
with pytest.raises(Exception):
|
|
|
|
prediction = pipeline.run(
|
2022-02-03 13:43:18 +01:00
|
|
|
query="Who lives in Berlin?",
|
|
|
|
params={"Retriever": {"top_k": 10, "headers": auth_headers}},
|
2022-01-03 11:38:02 +01:00
|
|
|
)
|
|
|
|
else:
|
|
|
|
prediction = pipeline.run(
|
2022-02-03 13:43:18 +01:00
|
|
|
query="Who lives in Berlin?",
|
|
|
|
params={"Retriever": {"top_k": 10, "headers": auth_headers}},
|
|
|
|
)
|
2022-01-03 11:38:02 +01:00
|
|
|
assert prediction is not None
|
2022-02-04 13:43:12 +01:00
|
|
|
assert len(prediction["documents"]) == 5
|
2022-01-03 11:38:02 +01:00
|
|
|
mock_client.count.assert_called_once()
|
|
|
|
args, kwargs = mock_client.count.call_args
|
|
|
|
assert "headers" in kwargs
|
|
|
|
assert kwargs["headers"] == auth_headers
|
|
|
|
|
|
|
|
|
2021-11-11 11:02:22 +01:00
|
|
|
def clean_faiss_document_store():
|
2022-02-03 13:43:18 +01:00
|
|
|
if Path("existing_faiss_document_store").exists():
|
|
|
|
os.remove("existing_faiss_document_store")
|
|
|
|
if Path("existing_faiss_document_store.json").exists():
|
|
|
|
os.remove("existing_faiss_document_store.json")
|
|
|
|
if Path("faiss_document_store.db").exists():
|
|
|
|
os.remove("faiss_document_store.db")
|