2022-05-24 09:53:59 +02:00
|
|
|
from copy import deepcopy
|
2021-02-02 17:32:17 +01:00
|
|
|
from pathlib import Path
|
2021-11-11 11:02:22 +01:00
|
|
|
import os
|
2022-05-24 09:53:59 +02:00
|
|
|
import ssl
|
2022-01-28 17:32:56 +01:00
|
|
|
import json
|
2022-04-19 16:10:39 +02:00
|
|
|
import platform
|
|
|
|
import sys
|
2022-03-07 22:35:15 +01:00
|
|
|
from typing import Tuple
|
2022-05-24 09:53:59 +02:00
|
|
|
from pyparsing import original_text_for
|
2022-03-01 17:42:11 +01:00
|
|
|
|
2020-11-20 17:41:08 +01:00
|
|
|
import pytest
|
2022-03-07 22:35:15 +01:00
|
|
|
from requests import PreparedRequest
|
2022-01-28 17:32:56 +01:00
|
|
|
import responses
|
2022-03-15 11:17:26 +01:00
|
|
|
import logging
|
2022-05-24 09:53:59 +02:00
|
|
|
from transformers import pipeline
|
2022-03-07 22:35:15 +01:00
|
|
|
import yaml
|
2022-05-04 17:39:06 +02:00
|
|
|
import pandas as pd
|
2020-11-20 17:41:08 +01:00
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
from haystack import __version__
|
2022-02-21 12:22:37 +01:00
|
|
|
from haystack.document_stores.deepsetcloud import DeepsetCloudDocumentStore
|
Adding yaml functionality to standard pipelines (save/load...) (#1735)
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* Update API Reference Pages for v1.0 (#1729)
* Create new API pages and update existing ones
* Create query classifier page
* Remove Objects suffix
* Change answer aggregation key to doc_id, query instead of label_id, query (#1726)
* Add debugging example to tutorial (#1731)
* Add debugging example to tutorial
* Add latest docstring and tutorial changes
* Remove Objects suffix
* Add latest docstring and tutorial changes
* Revert "Remove Objects suffix"
This reverts commit 6681cb06510b080775994effe6a50bae42254be4.
* Revert unintentional commit
* Add third debugging option
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix another self.device/s typo (#1734)
* Fix yet another self.device(s) typo
* Add typing to 'initialize_device_settings' to try prevent future issues
* Fix bug in Tutorial5
* Fix the same bug in the notebook
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* Fix a few details of some tutorials (#1733)
* Make Tutorial10 use print instead of logs and fix a typo in Tutoria15
* Add a type check in 'print_answers'
* Add same checks to print_documents and print_questions
* Make RAGenerator return Answers instead of dictionaries
* Fix RAGenerator tests
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix `print_answers` (#1743)
* Fix a specific path of print_answers that was assuming answers are dictionaries
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split pipeline tests into three suites (#1755)
* Split pipeline tests into three suites
* Will this trigger the CI?
* Rename duplicate test into test_most_similar_documents_pipeline
* Fixing a bug that was probably never noticed
* Capitalize starting letter in params (#1750)
* Capitalize starting letter in params
Capitalized the starting letter in code examples for params in keeping with the latest names for nodes where first letter is capitalized.
Refer: https://github.com/deepset-ai/haystack/issues/1748
* Update standard_pipelines.py
Capitalized some starting letters in the docstrings in keeping with the updated node names for standard pipelines
* Multi query eval (#1746)
* add eval() to pipeline
* Add latest docstring and tutorial changes
* support multiple queries in eval()
* Add latest docstring and tutorial changes
* keep single query test
* fix EvaluationResult node_results default
* adjust docstrings
* Add latest docstring and tutorial changes
* minor improvements from comments
* Add latest docstring and tutorial changes
* move EvaluationResult and calculate_metrics to schema
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split summarizer tests in order to make windows CI work again (#1757)
* separate testfile for summarizer with translation
* Add latest docstring and tutorial changes
* import SPLIT_DOCS from test_summarizer
* add workflow_dispatch to windows_ci
* add worflow_dispatch to linux_ci
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* fix import of EvaluationResult in test case
* exclude test_summarizer_translation.py for windows_ci (#1759)
* Pipelines now tolerate custom _debug content (#1756)
* Pipelines now tolerate custom _debug content
* Support Tables in all DocumentStores (#1744)
* Add support for tables in SQLDocumentStore, FAISSDocumentStore and MilvuDocumentStore
* Add support for WeaviateDocumentStore
* Make sure that embedded meta fields are strings + add embedding_dim to WeaviateDocStore in test config
* Add latest docstring and tutorial changes
* Represent tables in WeaviateDocumentStore as nested lists
* Fix mypy
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Allow TableReader models without aggregation classifier (#1772)
* Fix usage of filters in `/query` endpoint in REST API (#1774)
* WIP filter refactoring
* fix filter formatting
* remove inplace modification of filters
* Public demo (#1747)
* Queries now run only when pressing RUN. File upload hidden. Question is not sent if the textbox is empty.
* Add latest docstring and tutorial changes
* Tidy up: remove needless state, add comments, fix minor bugs
* Had to add results to the status to avoid some bugs in eval mode
* Added 'credits'
* Add footers, update requirements, some random questions for the evaluation
* Add requested changes
* Temporary rollback the UI to the old GoT dataset
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Facilitate concurrent query / indexing in Elasticsearch with dense retrievers (new `skip_missing_embeddings` param) (#1762)
* Filtering records not having embeddings
* Added support for skip_missing_embeddings Flag. Default behavior is throw error when embeddings are missing. If skip_missing_embeddings=True then documents without embeddings are ignored for vector similarity
* Fix for below error:
haystack/document_stores/elasticsearch.py:852: error: Need type annotation for "script_score_query"
* docstring for skip_missing_embeddings parameter
* Raise exception where no documents with embeddings is found for Embedding retriever.
* Default skip_missing_embeddings to True
* Explicitly check if embeddings are present if no results are returned by EmbeddingRetriever for Elasticsearch
* Added test case for based on Julian's input
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Simplify code by using get_embed_count
* Adjust docstring & error msg slightly
* Revert error msg
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
* Huggingface private model support via API tokens (FARMReader) (#1775)
* passed kwargs to model loading
* Pass Auth token explicitly
* add use_auth_token to get_language_model_class
* added use_auth_token parameter at FARMReader
* Add latest docstring and tutorial changes
* added docs for parameter `use_auth_token`
* Add latest docstring and tutorial changes
* adding docs link
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* private hugging face models for retrievers (#1785)
* private dpr
* Add latest docstring and tutorial changes
* added parameters to child functions
* Add latest docstring and tutorial changes
* added tableextractor
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* ignore empty filters parameter (#1783)
* ignore empty filters parameter
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* initialize doc store with doc and label index in tutorial 5 (#1730)
* initialize doc store with doc and label index
* change ipynb according to py for tutorial 5
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Small fixes to the public demo (#1781)
* Make strealit tolerant to haystack not knowing its version, and adding special error for docstore issues
* Add workaround for a Streamlit bug
* Make default filters value an empty dict
* Return more context for each answer in the rest api
* Make the hs_version call not-blocking by adding a very quick timeout
* Add disclaimer on low confidence answer
* Use the no-answer feature of the reader to highlight questions with no good answer
* Upgrade torch to v1.10.0 (#1789)
* Upgrade torch to v1.10.0
* Adapt torch version for torch-scatter in TableQA tutorial
* Add latest docstring and tutorial changes
* Make torch version more flexible
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* fix code rendering for example
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Branden Chan <33759007+brandenchan@users.noreply.github.com>
Co-authored-by: Julian Risch <julian.risch@deepset.ai>
Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
Co-authored-by: nishanthcgit <5066268+nishanthcgit@users.noreply.github.com>
Co-authored-by: tstadel <60758086+tstadel@users.noreply.github.com>
Co-authored-by: bogdankostic <bogdankostic@web.de>
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
Co-authored-by: C V Goudar <cvgoudar@users.noreply.github.com>
Co-authored-by: Kristof Herrmann <37148029+ArzelaAscoIi@users.noreply.github.com>
2021-11-23 17:01:39 +01:00
|
|
|
from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
|
2022-02-23 11:08:57 +01:00
|
|
|
from haystack.nodes.other.join_docs import JoinDocuments
|
2022-02-22 20:33:21 +01:00
|
|
|
from haystack.nodes.base import BaseComponent
|
2022-04-26 16:09:39 +02:00
|
|
|
from haystack.nodes.retriever.sparse import BM25Retriever
|
2022-05-04 17:39:06 +02:00
|
|
|
from haystack.pipelines import Pipeline, RootNode
|
2022-05-24 09:53:59 +02:00
|
|
|
from haystack.pipelines.config import validate_config_strings, get_component_definitions
|
2022-03-03 14:14:42 +01:00
|
|
|
from haystack.pipelines.utils import generate_code
|
2022-03-15 11:17:26 +01:00
|
|
|
from haystack.errors import PipelineConfigError
|
2022-05-04 17:39:06 +02:00
|
|
|
from haystack.nodes import PreProcessor, TextConverter
|
2022-03-10 09:49:28 +01:00
|
|
|
from haystack.utils.deepsetcloud import DeepsetCloudError
|
2022-05-04 17:39:06 +02:00
|
|
|
from haystack import Document, Answer
|
|
|
|
from haystack.nodes.other.route_documents import RouteDocuments
|
|
|
|
from haystack.nodes.other.join_answers import JoinAnswers
|
2022-01-26 18:12:55 +01:00
|
|
|
|
2022-05-17 10:55:53 +02:00
|
|
|
from ..conftest import (
|
2022-03-15 11:17:26 +01:00
|
|
|
MOCK_DC,
|
|
|
|
DC_API_ENDPOINT,
|
|
|
|
DC_API_KEY,
|
|
|
|
DC_TEST_INDEX,
|
|
|
|
SAMPLES_PATH,
|
|
|
|
MockDocumentStore,
|
|
|
|
MockRetriever,
|
2022-04-13 14:29:05 +02:00
|
|
|
MockNode,
|
2022-03-15 11:17:26 +01:00
|
|
|
deepset_cloud_fixture,
|
|
|
|
)
|
|
|
|
|
2022-04-19 16:10:39 +02:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
|
|
def reduce_windows_recursion_limit():
|
|
|
|
"""
|
|
|
|
Prevents Windows CI from crashing with Stackoverflow in situations we want to provoke a RecursionError
|
|
|
|
"""
|
|
|
|
is_windows = platform.system() == "Windows"
|
|
|
|
default_recursion_limit = sys.getrecursionlimit()
|
|
|
|
if is_windows:
|
|
|
|
reduced_recursion_limit = default_recursion_limit // 2
|
|
|
|
logger.warning(f"Reducing recursion limit to {reduced_recursion_limit}")
|
|
|
|
sys.setrecursionlimit(reduced_recursion_limit)
|
|
|
|
yield
|
|
|
|
if is_windows:
|
|
|
|
logger.warning(f"Resetting recursion limit to {default_recursion_limit}")
|
|
|
|
sys.setrecursionlimit(default_recursion_limit)
|
|
|
|
|
2020-11-20 17:41:08 +01:00
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
class ParentComponent(BaseComponent):
|
|
|
|
outgoing_edges = 1
|
|
|
|
|
|
|
|
def __init__(self, dependent: BaseComponent) -> None:
|
|
|
|
super().__init__()
|
2022-03-15 11:17:26 +01:00
|
|
|
|
|
|
|
def run(*args, **kwargs):
|
|
|
|
logging.info("ParentComponent run() was called")
|
2022-03-03 14:14:42 +01:00
|
|
|
|
2022-05-11 11:11:00 +02:00
|
|
|
def run_batch(*args, **kwargs):
|
|
|
|
pass
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
|
|
|
|
class ParentComponent2(BaseComponent):
|
|
|
|
outgoing_edges = 1
|
|
|
|
|
|
|
|
def __init__(self, dependent: BaseComponent) -> None:
|
|
|
|
super().__init__()
|
2022-03-15 11:17:26 +01:00
|
|
|
|
|
|
|
def run(*args, **kwargs):
|
|
|
|
logging.info("ParentComponent2 run() was called")
|
2022-03-03 14:14:42 +01:00
|
|
|
|
2022-05-11 11:11:00 +02:00
|
|
|
def run_batch(*args, **kwargs):
|
|
|
|
pass
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
|
|
|
|
class ChildComponent(BaseComponent):
|
|
|
|
def __init__(self, some_key: str = None) -> None:
|
|
|
|
super().__init__()
|
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
def run(*args, **kwargs):
|
|
|
|
logging.info("ChildComponent run() was called")
|
2022-03-03 14:14:42 +01:00
|
|
|
|
2022-05-11 11:11:00 +02:00
|
|
|
def run_batch(*args, **kwargs):
|
|
|
|
pass
|
|
|
|
|
2022-02-03 13:43:18 +01:00
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
class DummyRetriever(MockRetriever):
|
|
|
|
def __init__(self, document_store):
|
|
|
|
self.document_store = document_store
|
2021-04-30 12:23:29 +02:00
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
def run(self):
|
|
|
|
test = "test"
|
|
|
|
return {"test": test}, "output_1"
|
2021-10-28 16:48:06 +02:00
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
|
|
|
|
class JoinNode(RootNode):
|
|
|
|
def run(self, output=None, inputs=None):
|
|
|
|
if inputs:
|
|
|
|
output = ""
|
|
|
|
for input_dict in inputs:
|
|
|
|
output += input_dict["output"]
|
|
|
|
return {"output": output}, "output_1"
|
2021-10-28 16:48:06 +02:00
|
|
|
|
2022-02-03 13:43:18 +01:00
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
#
|
|
|
|
# Integration tests
|
|
|
|
#
|
|
|
|
|
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
@pytest.mark.integration
|
2022-02-23 11:08:57 +01:00
|
|
|
@pytest.mark.elasticsearch
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_to_code_creates_same_pipelines():
|
2022-02-23 11:08:57 +01:00
|
|
|
index_pipeline = Pipeline.load_from_yaml(
|
2022-05-04 17:39:06 +02:00
|
|
|
SAMPLES_PATH / "pipeline" / "test.haystack-pipeline.yml", pipeline_name="indexing_pipeline"
|
2022-02-23 11:08:57 +01:00
|
|
|
)
|
|
|
|
query_pipeline = Pipeline.load_from_yaml(
|
2022-05-04 17:39:06 +02:00
|
|
|
SAMPLES_PATH / "pipeline" / "test.haystack-pipeline.yml", pipeline_name="query_pipeline"
|
2022-02-23 11:08:57 +01:00
|
|
|
)
|
|
|
|
query_pipeline_code = query_pipeline.to_code(pipeline_variable_name="query_pipeline_from_code")
|
|
|
|
index_pipeline_code = index_pipeline.to_code(pipeline_variable_name="index_pipeline_from_code")
|
2022-03-15 11:17:26 +01:00
|
|
|
|
2022-02-23 11:08:57 +01:00
|
|
|
exec(query_pipeline_code)
|
|
|
|
exec(index_pipeline_code)
|
|
|
|
assert locals()["query_pipeline_from_code"] is not None
|
|
|
|
assert locals()["index_pipeline_from_code"] is not None
|
|
|
|
assert query_pipeline.get_config() == locals()["query_pipeline_from_code"].get_config()
|
|
|
|
assert index_pipeline.get_config() == locals()["index_pipeline_from_code"].get_config()
|
|
|
|
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
#
|
|
|
|
# Unit tests
|
|
|
|
#
|
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_get_config_creates_dependent_component():
|
|
|
|
child = ChildComponent()
|
|
|
|
parent = ParentComponent(dependent=child)
|
2022-02-23 11:08:57 +01:00
|
|
|
pipeline = Pipeline()
|
2022-03-03 14:14:42 +01:00
|
|
|
pipeline.add_node(component=parent, name="parent", inputs=["Query"])
|
2022-02-23 11:08:57 +01:00
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
expected_pipelines = [{"name": "query", "nodes": [{"name": "parent", "inputs": ["Query"]}]}]
|
2022-03-03 14:14:42 +01:00
|
|
|
expected_components = [
|
|
|
|
{"name": "parent", "type": "ParentComponent", "params": {"dependent": "ChildComponent"}},
|
|
|
|
{"name": "ChildComponent", "type": "ChildComponent", "params": {}},
|
|
|
|
]
|
2022-02-23 11:08:57 +01:00
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
config = pipeline.get_config()
|
|
|
|
for expected_pipeline in expected_pipelines:
|
|
|
|
assert expected_pipeline in config["pipelines"]
|
|
|
|
for expected_component in expected_components:
|
|
|
|
assert expected_component in config["components"]
|
|
|
|
|
|
|
|
|
|
|
|
def test_get_config_creates_only_one_dependent_component_referenced_by_multiple_parents():
|
|
|
|
child = ChildComponent()
|
|
|
|
parent = ParentComponent(dependent=child)
|
|
|
|
parent2 = ParentComponent2(dependent=child)
|
2022-02-23 11:08:57 +01:00
|
|
|
p_ensemble = Pipeline()
|
2022-03-03 14:14:42 +01:00
|
|
|
p_ensemble.add_node(component=parent, name="Parent1", inputs=["Query"])
|
|
|
|
p_ensemble.add_node(component=parent2, name="Parent2", inputs=["Query"])
|
|
|
|
p_ensemble.add_node(component=JoinDocuments(join_mode="merge"), name="JoinResults", inputs=["Parent1", "Parent2"])
|
|
|
|
|
|
|
|
expected_components = [
|
|
|
|
{"name": "Parent1", "type": "ParentComponent", "params": {"dependent": "ChildComponent"}},
|
|
|
|
{"name": "ChildComponent", "type": "ChildComponent", "params": {}},
|
|
|
|
{"name": "Parent2", "type": "ParentComponent2", "params": {"dependent": "ChildComponent"}},
|
|
|
|
{"name": "JoinResults", "type": "JoinDocuments", "params": {"join_mode": "merge"}},
|
|
|
|
]
|
2022-02-23 11:08:57 +01:00
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
expected_pipelines = [
|
|
|
|
{
|
|
|
|
"name": "query",
|
|
|
|
"nodes": [
|
|
|
|
{"name": "Parent1", "inputs": ["Query"]},
|
|
|
|
{"name": "Parent2", "inputs": ["Query"]},
|
|
|
|
{"name": "JoinResults", "inputs": ["Parent1", "Parent2"]},
|
|
|
|
],
|
|
|
|
}
|
|
|
|
]
|
2022-02-23 11:08:57 +01:00
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
config = p_ensemble.get_config()
|
|
|
|
for expected_pipeline in expected_pipelines:
|
|
|
|
assert expected_pipeline in config["pipelines"]
|
|
|
|
for expected_component in expected_components:
|
|
|
|
assert expected_component in config["components"]
|
2022-02-23 11:08:57 +01:00
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
|
|
|
|
def test_get_config_creates_two_different_dependent_components_of_same_type():
|
|
|
|
child_a = ChildComponent(some_key="A")
|
|
|
|
child_b = ChildComponent(some_key="B")
|
|
|
|
parent = ParentComponent(dependent=child_a)
|
|
|
|
parent2 = ParentComponent(dependent=child_b)
|
2022-02-23 11:08:57 +01:00
|
|
|
p_ensemble = Pipeline()
|
2022-03-03 14:14:42 +01:00
|
|
|
p_ensemble.add_node(component=parent, name="ParentA", inputs=["Query"])
|
|
|
|
p_ensemble.add_node(component=parent2, name="ParentB", inputs=["Query"])
|
|
|
|
p_ensemble.add_node(component=JoinDocuments(join_mode="merge"), name="JoinResults", inputs=["ParentA", "ParentB"])
|
|
|
|
|
|
|
|
expected_components = [
|
|
|
|
{"name": "ParentA", "type": "ParentComponent", "params": {"dependent": "ChildComponent"}},
|
|
|
|
{"name": "ChildComponent", "type": "ChildComponent", "params": {"some_key": "A"}},
|
|
|
|
{"name": "ParentB", "type": "ParentComponent", "params": {"dependent": "ChildComponent_2"}},
|
|
|
|
{"name": "ChildComponent_2", "type": "ChildComponent", "params": {"some_key": "B"}},
|
|
|
|
{"name": "JoinResults", "type": "JoinDocuments", "params": {"join_mode": "merge"}},
|
|
|
|
]
|
|
|
|
|
|
|
|
expected_pipelines = [
|
|
|
|
{
|
|
|
|
"name": "query",
|
|
|
|
"nodes": [
|
|
|
|
{"name": "ParentA", "inputs": ["Query"]},
|
|
|
|
{"name": "ParentB", "inputs": ["Query"]},
|
|
|
|
{"name": "JoinResults", "inputs": ["ParentA", "ParentB"]},
|
|
|
|
],
|
|
|
|
}
|
|
|
|
]
|
|
|
|
|
|
|
|
config = p_ensemble.get_config()
|
|
|
|
for expected_pipeline in expected_pipelines:
|
|
|
|
assert expected_pipeline in config["pipelines"]
|
|
|
|
for expected_component in expected_components:
|
|
|
|
assert expected_component in config["components"]
|
|
|
|
|
2022-02-23 11:08:57 +01:00
|
|
|
|
2022-03-17 22:03:39 +01:00
|
|
|
def test_get_config_reuses_same_dependent_components():
|
|
|
|
child = ChildComponent()
|
|
|
|
parent = ParentComponent(dependent=child)
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(component=parent, name="parent", inputs=["Query"])
|
|
|
|
pipeline.add_node(component=child, name="child", inputs=["parent"])
|
|
|
|
config = pipeline.get_config()
|
|
|
|
|
|
|
|
expected_pipelines = [
|
|
|
|
{"name": "query", "nodes": [{"name": "parent", "inputs": ["Query"]}, {"name": "child", "inputs": ["parent"]}]}
|
|
|
|
]
|
|
|
|
expected_components = [
|
|
|
|
{"name": "parent", "type": "ParentComponent", "params": {"dependent": "child"}},
|
|
|
|
{"name": "child", "type": "ChildComponent", "params": {}},
|
|
|
|
]
|
|
|
|
|
|
|
|
config = pipeline.get_config()
|
|
|
|
for expected_pipeline in expected_pipelines:
|
|
|
|
assert expected_pipeline in config["pipelines"]
|
|
|
|
for expected_component in expected_components:
|
|
|
|
assert expected_component in config["components"]
|
|
|
|
|
|
|
|
|
|
|
|
def test_get_config_creates_different_components_if_instances_differ():
|
|
|
|
child_a = ChildComponent()
|
|
|
|
child_b = ChildComponent()
|
|
|
|
child_c = ChildComponent()
|
|
|
|
parent = ParentComponent(dependent=child_a)
|
|
|
|
parent2 = ParentComponent(dependent=child_b)
|
|
|
|
p_ensemble = Pipeline()
|
|
|
|
p_ensemble.add_node(component=parent, name="ParentA", inputs=["Query"])
|
|
|
|
p_ensemble.add_node(component=parent2, name="ParentB", inputs=["Query"])
|
|
|
|
p_ensemble.add_node(component=child_c, name="Child", inputs=["Query"])
|
|
|
|
|
|
|
|
expected_components = [
|
|
|
|
{"name": "ParentA", "type": "ParentComponent", "params": {"dependent": "ChildComponent"}},
|
|
|
|
{"name": "ChildComponent", "type": "ChildComponent", "params": {}},
|
|
|
|
{"name": "ParentB", "type": "ParentComponent", "params": {"dependent": "ChildComponent_2"}},
|
|
|
|
{"name": "ChildComponent_2", "type": "ChildComponent", "params": {}},
|
|
|
|
{"name": "Child", "type": "ChildComponent", "params": {}},
|
|
|
|
]
|
|
|
|
|
|
|
|
expected_pipelines = [
|
|
|
|
{
|
|
|
|
"name": "query",
|
|
|
|
"nodes": [
|
|
|
|
{"name": "ParentA", "inputs": ["Query"]},
|
|
|
|
{"name": "ParentB", "inputs": ["Query"]},
|
|
|
|
{"name": "Child", "inputs": ["Query"]},
|
|
|
|
],
|
|
|
|
}
|
|
|
|
]
|
|
|
|
|
|
|
|
config = p_ensemble.get_config()
|
|
|
|
for expected_pipeline in expected_pipelines:
|
|
|
|
assert expected_pipeline in config["pipelines"]
|
|
|
|
for expected_component in expected_components:
|
|
|
|
assert expected_component in config["components"]
|
|
|
|
|
|
|
|
|
|
|
|
def test_get_config_reuses_same_unnamed_dependent_components():
|
|
|
|
child = ChildComponent()
|
|
|
|
parent = ParentComponent(dependent=child)
|
|
|
|
parent2 = ParentComponent(dependent=child)
|
|
|
|
p_ensemble = Pipeline()
|
|
|
|
p_ensemble.add_node(component=parent, name="ParentA", inputs=["Query"])
|
|
|
|
p_ensemble.add_node(component=parent2, name="ParentB", inputs=["Query"])
|
|
|
|
|
|
|
|
expected_components = [
|
|
|
|
{"name": "ParentA", "type": "ParentComponent", "params": {"dependent": "ChildComponent"}},
|
|
|
|
{"name": "ChildComponent", "type": "ChildComponent", "params": {}},
|
|
|
|
{"name": "ParentB", "type": "ParentComponent", "params": {"dependent": "ChildComponent"}},
|
|
|
|
]
|
|
|
|
|
|
|
|
expected_pipelines = [
|
|
|
|
{"name": "query", "nodes": [{"name": "ParentA", "inputs": ["Query"]}, {"name": "ParentB", "inputs": ["Query"]}]}
|
|
|
|
]
|
|
|
|
|
|
|
|
config = p_ensemble.get_config()
|
|
|
|
for expected_pipeline in expected_pipelines:
|
|
|
|
assert expected_pipeline in config["pipelines"]
|
|
|
|
for expected_component in expected_components:
|
|
|
|
assert expected_component in config["components"]
|
|
|
|
|
|
|
|
|
|
|
|
def test_get_config_multi_level_dependencies():
|
|
|
|
child = ChildComponent()
|
|
|
|
intermediate = ParentComponent(dependent=child)
|
|
|
|
parent = ParentComponent(dependent=intermediate)
|
|
|
|
p_ensemble = Pipeline()
|
|
|
|
p_ensemble.add_node(component=parent, name="Parent", inputs=["Query"])
|
|
|
|
|
|
|
|
expected_components = [
|
|
|
|
{"name": "Parent", "type": "ParentComponent", "params": {"dependent": "ParentComponent"}},
|
|
|
|
{"name": "ChildComponent", "type": "ChildComponent", "params": {}},
|
|
|
|
{"name": "ParentComponent", "type": "ParentComponent", "params": {"dependent": "ChildComponent"}},
|
|
|
|
]
|
|
|
|
|
|
|
|
expected_pipelines = [{"name": "query", "nodes": [{"name": "Parent", "inputs": ["Query"]}]}]
|
|
|
|
|
|
|
|
config = p_ensemble.get_config()
|
|
|
|
for expected_pipeline in expected_pipelines:
|
|
|
|
assert expected_pipeline in config["pipelines"]
|
|
|
|
for expected_component in expected_components:
|
|
|
|
assert expected_component in config["components"]
|
|
|
|
|
|
|
|
|
|
|
|
def test_get_config_multi_level_dependencies_of_same_type():
|
|
|
|
child = ChildComponent()
|
|
|
|
second_intermediate = ParentComponent(dependent=child)
|
|
|
|
intermediate = ParentComponent(dependent=second_intermediate)
|
|
|
|
parent = ParentComponent(dependent=intermediate)
|
|
|
|
p_ensemble = Pipeline()
|
|
|
|
p_ensemble.add_node(component=parent, name="ParentComponent", inputs=["Query"])
|
|
|
|
|
|
|
|
expected_components = [
|
|
|
|
{"name": "ParentComponent_3", "type": "ParentComponent", "params": {"dependent": "ChildComponent"}},
|
|
|
|
{"name": "ParentComponent_2", "type": "ParentComponent", "params": {"dependent": "ParentComponent_3"}},
|
|
|
|
{"name": "ParentComponent", "type": "ParentComponent", "params": {"dependent": "ParentComponent_2"}},
|
|
|
|
{"name": "ChildComponent", "type": "ChildComponent", "params": {}},
|
|
|
|
]
|
|
|
|
|
|
|
|
expected_pipelines = [{"name": "query", "nodes": [{"name": "ParentComponent", "inputs": ["Query"]}]}]
|
|
|
|
|
|
|
|
config = p_ensemble.get_config()
|
|
|
|
for expected_pipeline in expected_pipelines:
|
|
|
|
assert expected_pipeline in config["pipelines"]
|
|
|
|
for expected_component in expected_components:
|
|
|
|
assert expected_component in config["components"]
|
|
|
|
|
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
def test_get_config_component_with_superclass_arguments():
|
|
|
|
class CustomBaseDocumentStore(MockDocumentStore):
|
|
|
|
def __init__(self, base_parameter: str):
|
|
|
|
self.base_parameter = base_parameter
|
|
|
|
|
|
|
|
class CustomDocumentStore(CustomBaseDocumentStore):
|
|
|
|
def __init__(self, sub_parameter: int):
|
|
|
|
super().__init__(base_parameter="something")
|
|
|
|
self.sub_parameter = sub_parameter
|
|
|
|
|
|
|
|
class CustomRetriever(MockRetriever):
|
|
|
|
def __init__(self, document_store):
|
|
|
|
super().__init__()
|
|
|
|
self.document_store = document_store
|
|
|
|
|
|
|
|
document_store = CustomDocumentStore(sub_parameter=10)
|
|
|
|
retriever = CustomRetriever(document_store=document_store)
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(retriever, name="Retriever", inputs=["Query"])
|
|
|
|
|
|
|
|
pipeline.get_config()
|
|
|
|
assert pipeline.get_document_store().sub_parameter == 10
|
|
|
|
assert pipeline.get_document_store().base_parameter == "something"
|
|
|
|
|
|
|
|
|
2022-04-13 14:29:05 +02:00
|
|
|
def test_get_config_custom_node_with_params():
|
|
|
|
class CustomNode(MockNode):
|
|
|
|
def __init__(self, param: int):
|
|
|
|
super().__init__()
|
|
|
|
self.param = param
|
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(CustomNode(param=10), name="custom_node", inputs=["Query"])
|
|
|
|
|
|
|
|
assert len(pipeline.get_config()["components"]) == 1
|
|
|
|
assert pipeline.get_config()["components"][0]["params"] == {"param": 10}
|
|
|
|
|
|
|
|
|
2022-05-02 14:41:07 +02:00
|
|
|
def test_get_config_custom_node_with_positional_params():
|
2022-04-13 14:29:05 +02:00
|
|
|
class CustomNode(MockNode):
|
|
|
|
def __init__(self, param: int = 1):
|
|
|
|
super().__init__()
|
|
|
|
self.param = param
|
|
|
|
|
|
|
|
pipeline = Pipeline()
|
2022-05-02 14:41:07 +02:00
|
|
|
pipeline.add_node(CustomNode(10), name="custom_node", inputs=["Query"])
|
|
|
|
|
2022-04-13 14:29:05 +02:00
|
|
|
assert len(pipeline.get_config()["components"]) == 1
|
2022-05-02 14:41:07 +02:00
|
|
|
assert pipeline.get_config()["components"][0]["params"] == {"param": 10}
|
2022-04-13 14:29:05 +02:00
|
|
|
|
|
|
|
|
2022-05-31 21:26:53 +02:00
|
|
|
def test_get_config_multi_output_node():
|
|
|
|
class MultiOutputNode(BaseComponent):
|
|
|
|
outgoing_edges = 2
|
|
|
|
|
|
|
|
def run(self, *a, **k):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def run_batch(self, *a, **k):
|
|
|
|
pass
|
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(MultiOutputNode(), name="multi_output_node", inputs=["Query"])
|
|
|
|
pipeline.add_node(MockNode(), name="fork_1", inputs=["multi_output_node.output_1"])
|
|
|
|
pipeline.add_node(MockNode(), name="fork_2", inputs=["multi_output_node.output_2"])
|
|
|
|
pipeline.add_node(JoinNode(), name="join_node", inputs=["fork_1", "fork_2"])
|
|
|
|
|
|
|
|
config = pipeline.get_config()
|
|
|
|
assert len(config["components"]) == 4
|
|
|
|
assert len(config["pipelines"]) == 1
|
|
|
|
nodes = config["pipelines"][0]["nodes"]
|
|
|
|
assert len(nodes) == 4
|
|
|
|
|
|
|
|
assert nodes[0]["name"] == "multi_output_node"
|
|
|
|
assert len(nodes[0]["inputs"]) == 1
|
|
|
|
assert "Query" in nodes[0]["inputs"]
|
|
|
|
|
|
|
|
assert nodes[1]["name"] == "fork_1"
|
|
|
|
assert len(nodes[1]["inputs"]) == 1
|
|
|
|
assert "multi_output_node.output_1" in nodes[1]["inputs"]
|
|
|
|
|
|
|
|
assert nodes[2]["name"] == "fork_2"
|
|
|
|
assert len(nodes[2]["inputs"]) == 1
|
|
|
|
assert "multi_output_node.output_2" in nodes[2]["inputs"]
|
|
|
|
|
|
|
|
assert nodes[3]["name"] == "join_node"
|
|
|
|
assert len(nodes[3]["inputs"]) == 2
|
|
|
|
assert "fork_1" in nodes[3]["inputs"]
|
|
|
|
assert "fork_2" in nodes[3]["inputs"]
|
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_generate_code_simple_pipeline():
|
|
|
|
config = {
|
2022-05-04 17:39:06 +02:00
|
|
|
"version": "ignore",
|
2022-03-03 14:14:42 +01:00
|
|
|
"components": [
|
|
|
|
{
|
|
|
|
"name": "retri",
|
2022-04-26 16:09:39 +02:00
|
|
|
"type": "BM25Retriever",
|
2022-03-03 14:14:42 +01:00
|
|
|
"params": {"document_store": "ElasticsearchDocumentStore", "top_k": 20},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"name": "ElasticsearchDocumentStore",
|
|
|
|
"type": "ElasticsearchDocumentStore",
|
|
|
|
"params": {"index": "my-index"},
|
|
|
|
},
|
|
|
|
],
|
2022-03-15 11:17:26 +01:00
|
|
|
"pipelines": [{"name": "query", "nodes": [{"name": "retri", "inputs": ["Query"]}]}],
|
2022-03-03 14:14:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
code = generate_code(pipeline_config=config, pipeline_variable_name="p", generate_imports=False)
|
2022-02-23 11:08:57 +01:00
|
|
|
assert code == (
|
|
|
|
'elasticsearch_document_store = ElasticsearchDocumentStore(index="my-index")\n'
|
2022-04-26 16:09:39 +02:00
|
|
|
"retri = BM25Retriever(document_store=elasticsearch_document_store, top_k=20)\n"
|
2022-02-23 11:08:57 +01:00
|
|
|
"\n"
|
|
|
|
"p = Pipeline()\n"
|
2022-03-03 14:14:42 +01:00
|
|
|
'p.add_node(component=retri, name="retri", inputs=["Query"])'
|
2022-02-23 11:08:57 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_generate_code_imports():
|
|
|
|
pipeline_config = {
|
2022-05-04 17:39:06 +02:00
|
|
|
"version": "ignore",
|
2022-03-03 14:14:42 +01:00
|
|
|
"components": [
|
2022-03-07 19:25:33 +01:00
|
|
|
{"name": "DocumentStore", "type": "ElasticsearchDocumentStore"},
|
2022-04-26 16:09:39 +02:00
|
|
|
{"name": "retri", "type": "BM25Retriever", "params": {"document_store": "DocumentStore"}},
|
2022-03-15 11:17:26 +01:00
|
|
|
{"name": "retri2", "type": "TfidfRetriever", "params": {"document_store": "DocumentStore"}},
|
2022-03-03 14:14:42 +01:00
|
|
|
],
|
|
|
|
"pipelines": [
|
|
|
|
{
|
|
|
|
"name": "Query",
|
|
|
|
"nodes": [{"name": "retri", "inputs": ["Query"]}, {"name": "retri2", "inputs": ["Query"]}],
|
|
|
|
}
|
|
|
|
],
|
|
|
|
}
|
2022-02-23 11:08:57 +01:00
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
code = generate_code(pipeline_config=pipeline_config, pipeline_variable_name="p", generate_imports=True)
|
2022-02-23 11:08:57 +01:00
|
|
|
assert code == (
|
2022-03-03 14:14:42 +01:00
|
|
|
"from haystack.document_stores import ElasticsearchDocumentStore\n"
|
2022-04-26 16:09:39 +02:00
|
|
|
"from haystack.nodes import BM25Retriever, TfidfRetriever\n"
|
2022-03-03 14:14:42 +01:00
|
|
|
"from haystack.pipelines import Pipeline\n"
|
|
|
|
"\n"
|
|
|
|
"document_store = ElasticsearchDocumentStore()\n"
|
2022-05-18 14:17:54 +02:00
|
|
|
'document_store.name = "DocumentStore"\n'
|
2022-04-26 16:09:39 +02:00
|
|
|
"retri = BM25Retriever(document_store=document_store)\n"
|
2022-03-15 11:17:26 +01:00
|
|
|
"retri_2 = TfidfRetriever(document_store=document_store)\n"
|
2022-02-23 11:08:57 +01:00
|
|
|
"\n"
|
|
|
|
"p = Pipeline()\n"
|
2022-03-03 14:14:42 +01:00
|
|
|
'p.add_node(component=retri, name="retri", inputs=["Query"])\n'
|
|
|
|
'p.add_node(component=retri_2, name="retri2", inputs=["Query"])'
|
2022-02-23 11:08:57 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_generate_code_imports_no_pipeline_cls():
|
|
|
|
pipeline_config = {
|
2022-05-04 17:39:06 +02:00
|
|
|
"version": "ignore",
|
2022-03-03 14:14:42 +01:00
|
|
|
"components": [
|
2022-03-07 19:25:33 +01:00
|
|
|
{"name": "DocumentStore", "type": "ElasticsearchDocumentStore"},
|
2022-04-26 16:09:39 +02:00
|
|
|
{"name": "retri", "type": "BM25Retriever", "params": {"document_store": "DocumentStore"}},
|
2022-03-03 14:14:42 +01:00
|
|
|
],
|
2022-03-15 11:17:26 +01:00
|
|
|
"pipelines": [{"name": "Query", "nodes": [{"name": "retri", "inputs": ["Query"]}]}],
|
2022-03-03 14:14:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
code = generate_code(
|
|
|
|
pipeline_config=pipeline_config,
|
|
|
|
pipeline_variable_name="p",
|
|
|
|
generate_imports=True,
|
|
|
|
add_pipeline_cls_import=False,
|
2022-02-23 11:08:57 +01:00
|
|
|
)
|
|
|
|
assert code == (
|
2022-03-03 14:14:42 +01:00
|
|
|
"from haystack.document_stores import ElasticsearchDocumentStore\n"
|
2022-04-26 16:09:39 +02:00
|
|
|
"from haystack.nodes import BM25Retriever\n"
|
2022-03-03 14:14:42 +01:00
|
|
|
"\n"
|
|
|
|
"document_store = ElasticsearchDocumentStore()\n"
|
2022-05-18 14:17:54 +02:00
|
|
|
'document_store.name = "DocumentStore"\n'
|
2022-04-26 16:09:39 +02:00
|
|
|
"retri = BM25Retriever(document_store=document_store)\n"
|
2022-02-23 11:08:57 +01:00
|
|
|
"\n"
|
|
|
|
"p = Pipeline()\n"
|
2022-03-03 14:14:42 +01:00
|
|
|
'p.add_node(component=retri, name="retri", inputs=["Query"])'
|
2022-02-23 11:08:57 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_generate_code_comment():
|
|
|
|
pipeline_config = {
|
2022-05-04 17:39:06 +02:00
|
|
|
"version": "ignore",
|
2022-03-03 14:14:42 +01:00
|
|
|
"components": [
|
2022-03-07 19:25:33 +01:00
|
|
|
{"name": "DocumentStore", "type": "ElasticsearchDocumentStore"},
|
2022-04-26 16:09:39 +02:00
|
|
|
{"name": "retri", "type": "BM25Retriever", "params": {"document_store": "DocumentStore"}},
|
2022-03-03 14:14:42 +01:00
|
|
|
],
|
2022-03-15 11:17:26 +01:00
|
|
|
"pipelines": [{"name": "Query", "nodes": [{"name": "retri", "inputs": ["Query"]}]}],
|
2022-03-03 14:14:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
comment = "This is my comment\n...and here is a new line"
|
|
|
|
code = generate_code(pipeline_config=pipeline_config, pipeline_variable_name="p", comment=comment)
|
2022-02-23 11:08:57 +01:00
|
|
|
assert code == (
|
2022-03-03 14:14:42 +01:00
|
|
|
"# This is my comment\n"
|
|
|
|
"# ...and here is a new line\n"
|
2022-02-23 11:08:57 +01:00
|
|
|
"from haystack.document_stores import ElasticsearchDocumentStore\n"
|
2022-04-26 16:09:39 +02:00
|
|
|
"from haystack.nodes import BM25Retriever\n"
|
2022-03-03 14:14:42 +01:00
|
|
|
"from haystack.pipelines import Pipeline\n"
|
2022-02-23 11:08:57 +01:00
|
|
|
"\n"
|
2022-03-03 14:14:42 +01:00
|
|
|
"document_store = ElasticsearchDocumentStore()\n"
|
2022-05-18 14:17:54 +02:00
|
|
|
'document_store.name = "DocumentStore"\n'
|
2022-04-26 16:09:39 +02:00
|
|
|
"retri = BM25Retriever(document_store=document_store)\n"
|
2022-02-23 11:08:57 +01:00
|
|
|
"\n"
|
|
|
|
"p = Pipeline()\n"
|
|
|
|
'p.add_node(component=retri, name="retri", inputs=["Query"])'
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_generate_code_is_component_order_invariant():
|
|
|
|
pipeline_config = {
|
2022-05-04 17:39:06 +02:00
|
|
|
"version": "ignore",
|
2022-03-03 14:14:42 +01:00
|
|
|
"pipelines": [
|
|
|
|
{
|
|
|
|
"name": "Query",
|
|
|
|
"nodes": [
|
|
|
|
{"name": "EsRetriever", "inputs": ["Query"]},
|
|
|
|
{"name": "EmbeddingRetriever", "inputs": ["Query"]},
|
|
|
|
{"name": "JoinResults", "inputs": ["EsRetriever", "EmbeddingRetriever"]},
|
|
|
|
],
|
|
|
|
}
|
2022-03-15 11:17:26 +01:00
|
|
|
],
|
2022-03-03 14:14:42 +01:00
|
|
|
}
|
|
|
|
|
2022-03-07 19:25:33 +01:00
|
|
|
doc_store = {"name": "ElasticsearchDocumentStore", "type": "ElasticsearchDocumentStore"}
|
2022-03-03 14:14:42 +01:00
|
|
|
es_retriever = {
|
|
|
|
"name": "EsRetriever",
|
2022-04-26 16:09:39 +02:00
|
|
|
"type": "BM25Retriever",
|
2022-03-03 14:14:42 +01:00
|
|
|
"params": {"document_store": "ElasticsearchDocumentStore"},
|
|
|
|
}
|
|
|
|
emb_retriever = {
|
|
|
|
"name": "EmbeddingRetriever",
|
|
|
|
"type": "EmbeddingRetriever",
|
|
|
|
"params": {
|
|
|
|
"document_store": "ElasticsearchDocumentStore",
|
|
|
|
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
|
|
|
|
},
|
|
|
|
}
|
2022-03-07 19:25:33 +01:00
|
|
|
join_node = {"name": "JoinResults", "type": "JoinDocuments"}
|
2022-03-03 14:14:42 +01:00
|
|
|
|
|
|
|
component_orders = [
|
|
|
|
[doc_store, es_retriever, emb_retriever, join_node],
|
|
|
|
[es_retriever, emb_retriever, join_node, doc_store],
|
|
|
|
[join_node, es_retriever, emb_retriever, doc_store],
|
|
|
|
]
|
|
|
|
|
|
|
|
expected_code = (
|
|
|
|
"elasticsearch_document_store = ElasticsearchDocumentStore()\n"
|
2022-04-26 16:09:39 +02:00
|
|
|
"es_retriever = BM25Retriever(document_store=elasticsearch_document_store)\n"
|
2022-03-03 14:14:42 +01:00
|
|
|
'embedding_retriever = EmbeddingRetriever(document_store=elasticsearch_document_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2")\n'
|
|
|
|
"join_results = JoinDocuments()\n"
|
|
|
|
"\n"
|
|
|
|
"p = Pipeline()\n"
|
|
|
|
'p.add_node(component=es_retriever, name="EsRetriever", inputs=["Query"])\n'
|
|
|
|
'p.add_node(component=embedding_retriever, name="EmbeddingRetriever", inputs=["Query"])\n'
|
|
|
|
'p.add_node(component=join_results, name="JoinResults", inputs=["EsRetriever", "EmbeddingRetriever"])'
|
|
|
|
)
|
|
|
|
|
|
|
|
for components in component_orders:
|
|
|
|
pipeline_config["components"] = components
|
2022-04-19 16:08:08 +02:00
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
code = generate_code(pipeline_config=pipeline_config, pipeline_variable_name="p", generate_imports=False)
|
|
|
|
assert code == expected_code
|
2022-02-23 11:08:57 +01:00
|
|
|
|
|
|
|
|
2022-03-17 22:03:39 +01:00
|
|
|
def test_generate_code_can_handle_weak_cyclic_pipelines():
|
|
|
|
config = {
|
2022-05-04 17:39:06 +02:00
|
|
|
"version": "ignore",
|
2022-03-17 22:03:39 +01:00
|
|
|
"components": [
|
|
|
|
{"name": "parent", "type": "ParentComponent", "params": {"dependent": "child"}},
|
|
|
|
{"name": "child", "type": "ChildComponent", "params": {}},
|
|
|
|
],
|
|
|
|
"pipelines": [
|
|
|
|
{
|
|
|
|
"name": "query",
|
|
|
|
"nodes": [{"name": "parent", "inputs": ["Query"]}, {"name": "child", "inputs": ["parent"]}],
|
|
|
|
}
|
|
|
|
],
|
|
|
|
}
|
|
|
|
code = generate_code(pipeline_config=config, generate_imports=False)
|
|
|
|
assert code == (
|
|
|
|
"child = ChildComponent()\n"
|
|
|
|
"parent = ParentComponent(dependent=child)\n"
|
|
|
|
"\n"
|
|
|
|
"pipeline = Pipeline()\n"
|
|
|
|
'pipeline.add_node(component=parent, name="parent", inputs=["Query"])\n'
|
|
|
|
'pipeline.add_node(component=child, name="child", inputs=["parent"])'
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-02-23 11:08:57 +01:00
|
|
|
@pytest.mark.parametrize("input", ["\btest", " test", "#test", "+test", "\ttest", "\ntest", "test()"])
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_user_input_invalid(input):
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="is not a valid variable name or value"):
|
|
|
|
validate_config_strings(input)
|
2022-02-23 11:08:57 +01:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
2022-05-04 17:39:06 +02:00
|
|
|
"input",
|
|
|
|
[
|
|
|
|
"test",
|
|
|
|
"testName",
|
|
|
|
"test_name",
|
|
|
|
"test-name",
|
|
|
|
"test-name1234",
|
|
|
|
"http://localhost:8000/my-path",
|
|
|
|
"C:\\Some\\Windows\\Path\\To\\file.txt",
|
|
|
|
],
|
2022-02-23 11:08:57 +01:00
|
|
|
)
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_user_input_valid(input):
|
2022-03-15 11:17:26 +01:00
|
|
|
validate_config_strings(input)
|
2022-02-23 11:08:57 +01:00
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_pipeline_config_invalid_component_name():
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="is not a valid variable name or value"):
|
|
|
|
validate_config_strings({"components": [{"name": "\btest"}]})
|
2022-02-23 11:08:57 +01:00
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_pipeline_config_invalid_component_type():
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="is not a valid variable name or value"):
|
|
|
|
validate_config_strings({"components": [{"name": "test", "type": "\btest"}]})
|
2022-02-23 11:08:57 +01:00
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_pipeline_config_invalid_component_param():
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="is not a valid variable name or value"):
|
|
|
|
validate_config_strings({"components": [{"name": "test", "type": "test", "params": {"key": "\btest"}}]})
|
2022-02-23 11:08:57 +01:00
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_pipeline_config_invalid_component_param_key():
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="is not a valid variable name or value"):
|
|
|
|
validate_config_strings({"components": [{"name": "test", "type": "test", "params": {"\btest": "test"}}]})
|
2022-02-23 11:08:57 +01:00
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_pipeline_config_invalid_pipeline_name():
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="is not a valid variable name or value"):
|
|
|
|
validate_config_strings({"components": [{"name": "test", "type": "test"}], "pipelines": [{"name": "\btest"}]})
|
2022-02-23 11:08:57 +01:00
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_pipeline_config_invalid_pipeline_node_name():
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="is not a valid variable name or value"):
|
|
|
|
validate_config_strings(
|
2022-02-23 11:08:57 +01:00
|
|
|
{
|
2022-03-07 19:25:33 +01:00
|
|
|
"components": [{"name": "test", "type": "test"}],
|
2022-02-23 11:08:57 +01:00
|
|
|
"pipelines": [{"name": "test", "type": "test", "nodes": [{"name": "\btest"}]}],
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_pipeline_config_invalid_pipeline_node_inputs():
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="is not a valid variable name or value"):
|
|
|
|
validate_config_strings(
|
2022-02-23 11:08:57 +01:00
|
|
|
{
|
2022-03-07 19:25:33 +01:00
|
|
|
"components": [{"name": "test", "type": "test"}],
|
2022-02-23 11:08:57 +01:00
|
|
|
"pipelines": [{"name": "test", "type": "test", "nodes": [{"name": "test", "inputs": ["\btest"]}]}],
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-04-19 16:10:39 +02:00
|
|
|
def test_validate_pipeline_config_recursive_config(reduce_windows_recursion_limit):
|
2022-03-15 11:17:26 +01:00
|
|
|
pipeline_config = {}
|
|
|
|
node = {"config": pipeline_config}
|
|
|
|
pipeline_config["node"] = node
|
|
|
|
|
|
|
|
with pytest.raises(PipelineConfigError, match="recursive"):
|
|
|
|
validate_config_strings(pipeline_config)
|
|
|
|
|
|
|
|
|
2022-01-28 17:32:56 +01:00
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_load_from_deepset_cloud_query():
|
|
|
|
if MOCK_DC:
|
2022-02-03 13:43:18 +01:00
|
|
|
with open(SAMPLES_PATH / "dc" / "pipeline_config.json", "r") as f:
|
2022-01-28 17:32:56 +01:00
|
|
|
pipeline_config_yaml_response = json.load(f)
|
|
|
|
|
|
|
|
responses.add(
|
2022-02-03 13:43:18 +01:00
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/{DC_TEST_INDEX}/json",
|
|
|
|
json=pipeline_config_yaml_response,
|
|
|
|
status=200,
|
|
|
|
)
|
2022-01-28 17:32:56 +01:00
|
|
|
|
|
|
|
responses.add(
|
2022-02-03 13:43:18 +01:00
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
|
|
|
|
json=[{"id": "test_doc", "content": "man on hores"}],
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
2022-01-28 17:32:56 +01:00
|
|
|
query_pipeline = Pipeline.load_from_deepset_cloud(
|
2022-02-03 13:43:18 +01:00
|
|
|
pipeline_config_name=DC_TEST_INDEX, api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
2022-01-28 17:32:56 +01:00
|
|
|
retriever = query_pipeline.get_node("Retriever")
|
|
|
|
document_store = retriever.document_store
|
2022-04-26 16:09:39 +02:00
|
|
|
assert isinstance(retriever, BM25Retriever)
|
2022-01-28 17:32:56 +01:00
|
|
|
assert isinstance(document_store, DeepsetCloudDocumentStore)
|
2022-02-22 15:01:07 +01:00
|
|
|
assert document_store == query_pipeline.get_document_store()
|
2022-05-18 14:17:54 +02:00
|
|
|
assert document_store.name == "DocumentStore"
|
2022-01-28 17:32:56 +01:00
|
|
|
|
2022-02-03 13:43:18 +01:00
|
|
|
prediction = query_pipeline.run(query="man on horse", params={})
|
2022-01-28 17:32:56 +01:00
|
|
|
|
|
|
|
assert prediction["query"] == "man on horse"
|
|
|
|
assert len(prediction["documents"]) == 1
|
|
|
|
assert prediction["documents"][0].id == "test_doc"
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
2022-05-17 17:53:55 +02:00
|
|
|
def test_load_from_deepset_cloud_indexing(caplog):
|
2022-01-28 17:32:56 +01:00
|
|
|
if MOCK_DC:
|
2022-02-03 13:43:18 +01:00
|
|
|
with open(SAMPLES_PATH / "dc" / "pipeline_config.json", "r") as f:
|
2022-01-28 17:32:56 +01:00
|
|
|
pipeline_config_yaml_response = json.load(f)
|
|
|
|
|
|
|
|
responses.add(
|
2022-02-03 13:43:18 +01:00
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/{DC_TEST_INDEX}/json",
|
|
|
|
json=pipeline_config_yaml_response,
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
2022-01-28 17:32:56 +01:00
|
|
|
indexing_pipeline = Pipeline.load_from_deepset_cloud(
|
2022-02-03 13:43:18 +01:00
|
|
|
pipeline_config_name=DC_TEST_INDEX, api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, pipeline_name="indexing"
|
|
|
|
)
|
2022-01-28 17:32:56 +01:00
|
|
|
document_store = indexing_pipeline.get_node("DocumentStore")
|
|
|
|
assert isinstance(document_store, DeepsetCloudDocumentStore)
|
|
|
|
|
2022-05-17 17:53:55 +02:00
|
|
|
with caplog.at_level(logging.INFO):
|
2022-02-03 13:43:18 +01:00
|
|
|
indexing_pipeline.run(file_paths=[SAMPLES_PATH / "docs" / "doc_1.txt"])
|
2022-05-17 17:53:55 +02:00
|
|
|
assert "Note that DeepsetCloudDocumentStore does not support write operations." in caplog.text
|
|
|
|
assert "Input to write_documents: {" in caplog.text
|
2022-01-28 17:32:56 +01:00
|
|
|
|
2021-10-28 16:48:06 +02:00
|
|
|
|
2022-02-08 20:35:25 +01:00
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_list_pipelines_on_deepset_cloud():
|
|
|
|
pipelines = Pipeline.list_pipelines_on_deepset_cloud(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY)
|
|
|
|
assert len(pipelines) == 1
|
2022-05-10 15:21:35 +02:00
|
|
|
assert pipelines[0]["name"] == DC_TEST_INDEX
|
2022-02-08 20:35:25 +01:00
|
|
|
|
|
|
|
|
2022-02-11 12:50:53 +01:00
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_save_to_deepset_cloud():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_pipeline_config",
|
|
|
|
json={
|
|
|
|
"name": "test_pipeline_config",
|
|
|
|
"pipeline_id": "2184e9c1-c6ec-40a1-9b28-5d2768e5efa2",
|
|
|
|
"status": "UNDEPLOYED",
|
|
|
|
"created_at": "2022-02-01T09:57:03.803991+00:00",
|
|
|
|
"deleted": False,
|
|
|
|
"is_default": False,
|
|
|
|
"indexing": {"status": "IN_PROGRESS", "pending_file_count": 4, "total_file_count": 33},
|
|
|
|
},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_pipeline_config_deployed",
|
|
|
|
json={
|
|
|
|
"name": "test_pipeline_config_deployed",
|
|
|
|
"pipeline_id": "8184e0c1-c6ec-40a1-9b28-5d2768e5efa3",
|
|
|
|
"status": "DEPLOYED",
|
|
|
|
"created_at": "2022-02-09T09:57:03.803991+00:00",
|
|
|
|
"deleted": False,
|
|
|
|
"is_default": False,
|
|
|
|
"indexing": {"status": "INDEXED", "pending_file_count": 0, "total_file_count": 33},
|
|
|
|
},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_pipeline_config_copy",
|
|
|
|
json={"errors": ["Pipeline with the name test_pipeline_config_copy does not exists."]},
|
|
|
|
status=404,
|
|
|
|
)
|
|
|
|
|
|
|
|
with open(SAMPLES_PATH / "dc" / "pipeline_config.json", "r") as f:
|
|
|
|
pipeline_config_yaml_response = json.load(f)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/{DC_TEST_INDEX}/json",
|
|
|
|
json=pipeline_config_yaml_response,
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines",
|
|
|
|
json={"name": "test_pipeline_config_copy"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.PUT,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_pipeline_config/yaml",
|
|
|
|
json={"name": "test_pipeline_config"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.PUT,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_pipeline_config_deployed/yaml",
|
|
|
|
json={"errors": ["Updating the pipeline yaml is not allowed for pipelines with status: 'DEPLOYED'"]},
|
|
|
|
status=406,
|
|
|
|
)
|
|
|
|
|
|
|
|
query_pipeline = Pipeline.load_from_deepset_cloud(
|
|
|
|
pipeline_config_name=DC_TEST_INDEX, api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
index_pipeline = Pipeline.load_from_deepset_cloud(
|
|
|
|
pipeline_config_name=DC_TEST_INDEX, api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, pipeline_name="indexing"
|
|
|
|
)
|
|
|
|
|
2022-02-16 09:07:58 +01:00
|
|
|
Pipeline.save_to_deepset_cloud(
|
2022-02-11 12:50:53 +01:00
|
|
|
query_pipeline=query_pipeline,
|
|
|
|
index_pipeline=index_pipeline,
|
|
|
|
pipeline_config_name="test_pipeline_config_copy",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(
|
|
|
|
ValueError,
|
|
|
|
match="Pipeline config 'test_pipeline_config' already exists. Set `overwrite=True` to overwrite pipeline config",
|
|
|
|
):
|
2022-02-16 09:07:58 +01:00
|
|
|
Pipeline.save_to_deepset_cloud(
|
2022-02-11 12:50:53 +01:00
|
|
|
query_pipeline=query_pipeline,
|
|
|
|
index_pipeline=index_pipeline,
|
|
|
|
pipeline_config_name="test_pipeline_config",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
)
|
|
|
|
|
2022-02-16 09:07:58 +01:00
|
|
|
Pipeline.save_to_deepset_cloud(
|
2022-02-11 12:50:53 +01:00
|
|
|
query_pipeline=query_pipeline,
|
|
|
|
index_pipeline=index_pipeline,
|
|
|
|
pipeline_config_name="test_pipeline_config",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
overwrite=True,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(
|
|
|
|
ValueError,
|
|
|
|
match="Deployed pipeline configs are not allowed to be updated. Please undeploy pipeline config 'test_pipeline_config_deployed' first",
|
|
|
|
):
|
2022-02-16 09:07:58 +01:00
|
|
|
Pipeline.save_to_deepset_cloud(
|
2022-02-11 12:50:53 +01:00
|
|
|
query_pipeline=query_pipeline,
|
|
|
|
index_pipeline=index_pipeline,
|
|
|
|
pipeline_config_name="test_pipeline_config_deployed",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
overwrite=True,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
@pytest.mark.integration
|
2022-03-07 22:35:15 +01:00
|
|
|
@pytest.mark.elasticsearch
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_save_nonexisting_pipeline_to_deepset_cloud():
|
|
|
|
if MOCK_DC:
|
|
|
|
|
|
|
|
def dc_document_store_matcher(request: PreparedRequest) -> Tuple[bool, str]:
|
|
|
|
matches = False
|
|
|
|
reason = "No DeepsetCloudDocumentStore found."
|
|
|
|
request_body = request.body or ""
|
|
|
|
json_body = yaml.safe_load(request_body)
|
|
|
|
components = json_body["components"]
|
|
|
|
for component in components:
|
|
|
|
if component["type"].endswith("DocumentStore"):
|
|
|
|
if component["type"] == "DeepsetCloudDocumentStore":
|
|
|
|
matches = True
|
|
|
|
else:
|
|
|
|
matches = False
|
|
|
|
reason = f"Component {component['name']} is of type {component['type']} and not DeepsetCloudDocumentStore"
|
|
|
|
break
|
|
|
|
return matches, reason
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"errors": ["Pipeline with the name test_pipeline_config_copy does not exists."]},
|
|
|
|
status=404,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines",
|
|
|
|
json={"name": "test_new_non_existing_pipeline"},
|
|
|
|
status=201,
|
|
|
|
match=[dc_document_store_matcher],
|
|
|
|
)
|
|
|
|
|
|
|
|
es_document_store = ElasticsearchDocumentStore()
|
2022-04-26 16:09:39 +02:00
|
|
|
es_retriever = BM25Retriever(document_store=es_document_store)
|
2022-03-07 22:35:15 +01:00
|
|
|
file_converter = TextConverter()
|
|
|
|
preprocessor = PreProcessor()
|
|
|
|
|
|
|
|
query_pipeline = Pipeline()
|
|
|
|
query_pipeline.add_node(component=es_retriever, name="Retriever", inputs=["Query"])
|
|
|
|
index_pipeline = Pipeline()
|
|
|
|
index_pipeline.add_node(component=file_converter, name="FileConverter", inputs=["File"])
|
|
|
|
index_pipeline.add_node(component=preprocessor, name="Preprocessor", inputs=["FileConverter"])
|
|
|
|
index_pipeline.add_node(component=es_document_store, name="DocumentStore", inputs=["Preprocessor"])
|
|
|
|
|
|
|
|
Pipeline.save_to_deepset_cloud(
|
|
|
|
query_pipeline=query_pipeline,
|
|
|
|
index_pipeline=index_pipeline,
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-03-10 09:49:28 +01:00
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_deploy_on_deepset_cloud_non_existing_pipeline():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"errors": ["Pipeline with the name test_pipeline_config_copy does not exists."]},
|
|
|
|
status=404,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(DeepsetCloudError, match="Pipeline config 'test_new_non_existing_pipeline' does not exist."):
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_undeploy_on_deepset_cloud_non_existing_pipeline():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"errors": ["Pipeline with the name test_pipeline_config_copy does not exists."]},
|
|
|
|
status=404,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(DeepsetCloudError, match="Pipeline config 'test_new_non_existing_pipeline' does not exist."):
|
|
|
|
Pipeline.undeploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
2022-05-23 14:19:31 +02:00
|
|
|
def test_deploy_on_deepset_cloud(caplog):
|
2022-03-10 09:49:28 +01:00
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/deploy",
|
|
|
|
json={"status": "DEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["UNDEPLOYED", "DEPLOYMENT_IN_PROGRESS", "DEPLOYMENT_IN_PROGRESS", "DEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
2022-05-23 14:19:31 +02:00
|
|
|
with caplog.at_level(logging.INFO):
|
|
|
|
pipeline_url = f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/search"
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
assert "Pipeline config 'test_new_non_existing_pipeline' successfully deployed." in caplog.text
|
|
|
|
assert pipeline_url in caplog.text
|
|
|
|
assert "curl" in caplog.text
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_deploy_on_deepset_cloud_no_curl_message(caplog):
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/deploy",
|
|
|
|
json={"status": "DEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["UNDEPLOYED", "DEPLOYMENT_IN_PROGRESS", "DEPLOYMENT_IN_PROGRESS", "DEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
with caplog.at_level(logging.INFO):
|
|
|
|
pipeline_url = f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/search"
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
show_curl_message=False,
|
|
|
|
)
|
|
|
|
assert "Pipeline config 'test_new_non_existing_pipeline' successfully deployed." in caplog.text
|
|
|
|
assert pipeline_url in caplog.text
|
|
|
|
assert "curl" not in caplog.text
|
2022-03-10 09:49:28 +01:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_undeploy_on_deepset_cloud():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/undeploy",
|
|
|
|
json={"status": "UNDEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["DEPLOYED", "UNDEPLOYMENT_IN_PROGRESS", "UNDEPLOYMENT_IN_PROGRESS", "UNDEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
Pipeline.undeploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_deploy_on_deepset_cloud_sate_already_satisfied():
|
|
|
|
if MOCK_DC:
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["DEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_undeploy_on_deepset_cloud_sate_already_satisfied():
|
|
|
|
if MOCK_DC:
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["UNDEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
Pipeline.undeploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_deploy_on_deepset_cloud_failed():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/deploy",
|
|
|
|
json={"status": "DEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress and the third time undeployed
|
|
|
|
status_flow = ["UNDEPLOYED", "DEPLOYMENT_IN_PROGRESS", "UNDEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(
|
|
|
|
DeepsetCloudError, match="Deployment of pipeline config 'test_new_non_existing_pipeline' failed."
|
|
|
|
):
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_undeploy_on_deepset_cloud_failed():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/undeploy",
|
|
|
|
json={"status": "UNDEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress and the third time undeployed
|
|
|
|
status_flow = ["DEPLOYED", "UNDEPLOYMENT_IN_PROGRESS", "DEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(
|
|
|
|
DeepsetCloudError, match="Undeployment of pipeline config 'test_new_non_existing_pipeline' failed."
|
|
|
|
):
|
|
|
|
Pipeline.undeploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_deploy_on_deepset_cloud_invalid_initial_state():
|
|
|
|
if MOCK_DC:
|
|
|
|
status_flow = ["UNDEPLOYMENT_SCHEDULED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(
|
|
|
|
DeepsetCloudError,
|
|
|
|
match="Pipeline config 'test_new_non_existing_pipeline' is in invalid state 'UNDEPLOYMENT_SCHEDULED' to be transitioned to 'DEPLOYED'.",
|
|
|
|
):
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_undeploy_on_deepset_cloud_invalid_initial_state():
|
|
|
|
if MOCK_DC:
|
|
|
|
status_flow = ["DEPLOYMENT_SCHEDULED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(
|
|
|
|
DeepsetCloudError,
|
|
|
|
match="Pipeline config 'test_new_non_existing_pipeline' is in invalid state 'DEPLOYMENT_SCHEDULED' to be transitioned to 'UNDEPLOYED'.",
|
|
|
|
):
|
|
|
|
Pipeline.undeploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_deploy_on_deepset_cloud_invalid_state_in_progress():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/deploy",
|
|
|
|
json={"status": "DEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["UNDEPLOYED", "UNDEPLOYMENT_IN_PROGRESS"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
with pytest.raises(
|
|
|
|
DeepsetCloudError,
|
2022-05-10 15:21:35 +02:00
|
|
|
match="Deployment of pipeline config 'test_new_non_existing_pipeline' aborted. Undeployment was requested.",
|
2022-03-10 09:49:28 +01:00
|
|
|
):
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_undeploy_on_deepset_cloud_invalid_state_in_progress():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/undeploy",
|
|
|
|
json={"status": "UNDEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["DEPLOYED", "DEPLOYMENT_IN_PROGRESS"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
with pytest.raises(
|
|
|
|
DeepsetCloudError,
|
2022-05-10 15:21:35 +02:00
|
|
|
match="Undeployment of pipeline config 'test_new_non_existing_pipeline' aborted. Deployment was requested.",
|
2022-03-10 09:49:28 +01:00
|
|
|
):
|
|
|
|
Pipeline.undeploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_deploy_on_deepset_cloud_unknown_state_in_progress():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/deploy",
|
|
|
|
json={"status": "DEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["UNDEPLOYED", "ASKDHFASJDF"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
with pytest.raises(
|
|
|
|
DeepsetCloudError,
|
|
|
|
match="Deployment of pipeline config 'test_new_non_existing_pipeline ended in unexpected status: UNKNOWN",
|
|
|
|
):
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_undeploy_on_deepset_cloud_unknown_state_in_progress():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/undeploy",
|
|
|
|
json={"status": "UNDEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["DEPLOYED", "ASKDHFASJDF"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
with pytest.raises(
|
|
|
|
DeepsetCloudError,
|
|
|
|
match="Undeployment of pipeline config 'test_new_non_existing_pipeline ended in unexpected status: UNKNOWN",
|
|
|
|
):
|
|
|
|
Pipeline.undeploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_deploy_on_deepset_cloud_timeout():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/deploy",
|
|
|
|
json={"status": "DEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["UNDEPLOYED", "DEPLOYMENT_IN_PROGRESS", "DEPLOYMENT_IN_PROGRESS", "DEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
with pytest.raises(
|
|
|
|
TimeoutError, match="Transitioning of 'test_new_non_existing_pipeline' to state 'DEPLOYED' timed out."
|
|
|
|
):
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
timeout=5,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_undeploy_on_deepset_cloud_timeout():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/undeploy",
|
|
|
|
json={"status": "UNDEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["DEPLOYED", "UNDEPLOYMENT_IN_PROGRESS", "UNDEPLOYMENT_IN_PROGRESS", "UNDEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
with pytest.raises(
|
|
|
|
TimeoutError, match="Transitioning of 'test_new_non_existing_pipeline' to state 'UNDEPLOYED' timed out."
|
|
|
|
):
|
|
|
|
Pipeline.undeploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
timeout=5,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
def test_graph_validation_invalid_edge():
|
2022-03-15 11:17:26 +01:00
|
|
|
docstore = MockDocumentStore()
|
|
|
|
retriever = DummyRetriever(document_store=docstore)
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="DocStore", component=docstore, inputs=["Query"])
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
with pytest.raises(PipelineConfigError, match="DocStore has only 1 outgoing edge"):
|
2022-03-15 11:17:26 +01:00
|
|
|
pipeline.add_node(name="Retriever", component=retriever, inputs=["DocStore.output_2"])
|
|
|
|
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
def test_graph_validation_non_existing_edge():
|
2022-03-15 11:17:26 +01:00
|
|
|
docstore = MockDocumentStore()
|
|
|
|
retriever = DummyRetriever(document_store=docstore)
|
2020-11-20 17:41:08 +01:00
|
|
|
pipeline = Pipeline()
|
2022-03-15 11:17:26 +01:00
|
|
|
pipeline.add_node(name="DocStore", component=docstore, inputs=["Query"])
|
2020-11-20 17:41:08 +01:00
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="'wrong_edge_label' is not a valid edge name"):
|
|
|
|
pipeline.add_node(name="Retriever", component=retriever, inputs=["DocStore.wrong_edge_label"])
|
2020-11-20 17:41:08 +01:00
|
|
|
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
def test_graph_validation_invalid_node():
|
2022-03-15 11:17:26 +01:00
|
|
|
docstore = MockDocumentStore()
|
|
|
|
retriever = DummyRetriever(document_store=docstore)
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="DocStore", component=docstore, inputs=["Query"])
|
|
|
|
|
|
|
|
with pytest.raises(PipelineConfigError, match="Cannot find node 'InvalidNode'"):
|
|
|
|
pipeline.add_node(name="Retriever", component=retriever, inputs=["InvalidNode"])
|
|
|
|
|
2020-11-20 17:41:08 +01:00
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
def test_graph_validation_invalid_root_node():
|
2022-03-15 11:17:26 +01:00
|
|
|
docstore = MockDocumentStore()
|
|
|
|
pipeline = Pipeline()
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
with pytest.raises(PipelineConfigError, match="one single root node"):
|
2022-03-15 11:17:26 +01:00
|
|
|
pipeline.add_node(name="DocStore", component=docstore, inputs=["InvalidNode"])
|
2021-04-21 12:18:33 +02:00
|
|
|
|
2020-11-20 17:41:08 +01:00
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
def test_graph_validation_no_root_node():
|
|
|
|
docstore = MockNode()
|
|
|
|
pipeline = Pipeline()
|
|
|
|
|
|
|
|
with pytest.raises(PipelineConfigError, match="one single root node"):
|
|
|
|
pipeline.add_node(name="Node", component=docstore, inputs=[])
|
|
|
|
|
|
|
|
|
|
|
|
def test_graph_validation_two_root_nodes():
|
|
|
|
docstore = MockNode()
|
|
|
|
pipeline = Pipeline()
|
|
|
|
|
|
|
|
with pytest.raises(PipelineConfigError, match="one single root node"):
|
|
|
|
pipeline.add_node(name="Node", component=docstore, inputs=["Query", "File"])
|
|
|
|
|
|
|
|
with pytest.raises(PipelineConfigError, match="one single root node"):
|
|
|
|
pipeline.add_node(name="Node", component=docstore, inputs=["Query", "Query"])
|
|
|
|
|
|
|
|
|
|
|
|
def test_graph_validation_duplicate_node_instance():
|
|
|
|
node = MockNode()
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="node_a", component=node, inputs=["Query"])
|
|
|
|
|
|
|
|
with pytest.raises(PipelineConfigError, match="You have already added the same instance to the pipeline"):
|
|
|
|
pipeline.add_node(name="node_b", component=node, inputs=["node_a"])
|
|
|
|
|
|
|
|
|
|
|
|
def test_graph_validation_duplicate_node():
|
|
|
|
node = MockNode()
|
|
|
|
other_node = MockNode()
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="node", component=node, inputs=["Query"])
|
|
|
|
with pytest.raises(PipelineConfigError, match="'node' is already in the pipeline"):
|
|
|
|
pipeline.add_node(name="node", component=other_node, inputs=["Query"])
|
|
|
|
|
|
|
|
|
2022-05-24 09:53:59 +02:00
|
|
|
# See https://github.com/deepset-ai/haystack/issues/2568
|
|
|
|
def test_pipeline_nodes_can_have_uncopiable_objects_as_args():
|
|
|
|
class DummyNode(MockNode):
|
|
|
|
def __init__(self, uncopiable: ssl.SSLContext):
|
|
|
|
self.uncopiable = uncopiable
|
|
|
|
|
|
|
|
node = DummyNode(uncopiable=ssl.SSLContext())
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(component=node, name="node", inputs=["Query"])
|
|
|
|
|
|
|
|
# If the object is getting copied, it will raise TypeError: cannot pickle 'SSLContext' object
|
|
|
|
# `get_components_definitions()` should NOT copy objects to allow this usecase
|
|
|
|
get_component_definitions(pipeline.get_config())
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_env_vars_do_not_modify__component_config(monkeypatch):
|
|
|
|
class DummyNode(MockNode):
|
|
|
|
def __init__(self, replaceable: str):
|
|
|
|
self.replaceable = replaceable
|
|
|
|
|
|
|
|
monkeypatch.setenv("NODE_PARAMS_REPLACEABLE", "env value")
|
|
|
|
|
|
|
|
node = DummyNode(replaceable="init value")
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(component=node, name="node", inputs=["Query"])
|
|
|
|
|
|
|
|
original_component_config = deepcopy(node._component_config)
|
|
|
|
original_pipeline_config = deepcopy(pipeline.get_config())
|
|
|
|
|
|
|
|
no_env_defs = get_component_definitions(pipeline.get_config(), overwrite_with_env_variables=False)
|
|
|
|
env_defs = get_component_definitions(pipeline.get_config(), overwrite_with_env_variables=True)
|
|
|
|
|
|
|
|
new_component_config = deepcopy(node._component_config)
|
|
|
|
new_pipeline_config = deepcopy(pipeline.get_config())
|
|
|
|
|
|
|
|
assert no_env_defs != env_defs
|
|
|
|
assert no_env_defs["node"]["params"]["replaceable"] == "init value"
|
|
|
|
assert env_defs["node"]["params"]["replaceable"] == "env value"
|
|
|
|
|
|
|
|
assert original_component_config == new_component_config
|
|
|
|
assert original_component_config["params"]["replaceable"] == "init value"
|
|
|
|
assert new_component_config["params"]["replaceable"] == "init value"
|
|
|
|
|
|
|
|
assert original_pipeline_config == new_pipeline_config
|
|
|
|
assert original_pipeline_config["components"][0]["params"]["replaceable"] == "init value"
|
|
|
|
assert new_pipeline_config["components"][0]["params"]["replaceable"] == "init value"
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_env_vars_do_not_modify_pipeline_config(monkeypatch):
|
|
|
|
class DummyNode(MockNode):
|
|
|
|
def __init__(self, replaceable: str):
|
|
|
|
self.replaceable = replaceable
|
|
|
|
|
|
|
|
monkeypatch.setenv("NODE_PARAMS_REPLACEABLE", "env value")
|
|
|
|
|
|
|
|
node = DummyNode(replaceable="init value")
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(component=node, name="node", inputs=["Query"])
|
|
|
|
|
|
|
|
pipeline_config = pipeline.get_config()
|
|
|
|
original_pipeline_config = deepcopy(pipeline_config)
|
|
|
|
|
|
|
|
get_component_definitions(pipeline_config, overwrite_with_env_variables=True)
|
|
|
|
|
|
|
|
assert original_pipeline_config == pipeline_config
|
|
|
|
assert original_pipeline_config["components"][0]["params"]["replaceable"] == "init value"
|
|
|
|
assert pipeline_config["components"][0]["params"]["replaceable"] == "init value"
|
|
|
|
|
|
|
|
|
2021-03-10 18:17:23 +01:00
|
|
|
def test_parallel_paths_in_pipeline_graph():
|
|
|
|
class A(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self):
|
|
|
|
test = "A"
|
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
class B(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, test):
|
|
|
|
test += "B"
|
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
class C(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, test):
|
|
|
|
test += "C"
|
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
class D(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, test):
|
|
|
|
test += "D"
|
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
class E(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, test):
|
|
|
|
test += "E"
|
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
class JoinNode(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, inputs):
|
2022-02-03 13:43:18 +01:00
|
|
|
test = inputs[0]["test"] + inputs[1]["test"]
|
2021-09-10 11:41:16 +02:00
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=A(), inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=B(), inputs=["A"])
|
|
|
|
pipeline.add_node(name="C", component=C(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="E", component=E(), inputs=["C"])
|
|
|
|
pipeline.add_node(name="D", component=D(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="F", component=JoinNode(), inputs=["D", "E"])
|
|
|
|
output = pipeline.run(query="test")
|
2021-09-10 11:41:16 +02:00
|
|
|
assert output["test"] == "ABDABCE"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=A(), inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=B(), inputs=["A"])
|
|
|
|
pipeline.add_node(name="C", component=C(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="D", component=D(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="E", component=JoinNode(), inputs=["C", "D"])
|
|
|
|
output = pipeline.run(query="test")
|
2021-09-10 11:41:16 +02:00
|
|
|
assert output["test"] == "ABCABD"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
|
|
|
|
def test_parallel_paths_in_pipeline_graph_with_branching():
|
|
|
|
class AWithOutput1(RootNode):
|
|
|
|
outgoing_edges = 2
|
2021-06-08 15:20:13 +02:00
|
|
|
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self):
|
|
|
|
output = "A"
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class AWithOutput2(RootNode):
|
|
|
|
outgoing_edges = 2
|
2021-06-08 15:20:13 +02:00
|
|
|
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self):
|
|
|
|
output = "A"
|
|
|
|
return {"output": output}, "output_2"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class AWithOutputAll(RootNode):
|
|
|
|
outgoing_edges = 2
|
2021-06-08 15:20:13 +02:00
|
|
|
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self):
|
|
|
|
output = "A"
|
|
|
|
return {"output": output}, "output_all"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class B(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, output):
|
|
|
|
output += "B"
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class C(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, output):
|
|
|
|
output += "C"
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class D(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, output):
|
|
|
|
output += "D"
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class E(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, output):
|
|
|
|
output += "E"
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class JoinNode(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, output=None, inputs=None):
|
|
|
|
if inputs:
|
|
|
|
output = ""
|
|
|
|
for input_dict in inputs:
|
|
|
|
output += input_dict["output"]
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=AWithOutput1(), inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=B(), inputs=["A.output_1"])
|
|
|
|
pipeline.add_node(name="C", component=C(), inputs=["A.output_2"])
|
|
|
|
pipeline.add_node(name="D", component=E(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="E", component=D(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="F", component=JoinNode(), inputs=["D", "E", "C"])
|
|
|
|
output = pipeline.run(query="test")
|
|
|
|
assert output["output"] == "ABEABD"
|
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=AWithOutput2(), inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=B(), inputs=["A.output_1"])
|
|
|
|
pipeline.add_node(name="C", component=C(), inputs=["A.output_2"])
|
|
|
|
pipeline.add_node(name="D", component=E(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="E", component=D(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="F", component=JoinNode(), inputs=["D", "E", "C"])
|
|
|
|
output = pipeline.run(query="test")
|
|
|
|
assert output["output"] == "AC"
|
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=AWithOutputAll(), inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=B(), inputs=["A.output_1"])
|
|
|
|
pipeline.add_node(name="C", component=C(), inputs=["A.output_2"])
|
|
|
|
pipeline.add_node(name="D", component=E(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="E", component=D(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="F", component=JoinNode(), inputs=["D", "E", "C"])
|
|
|
|
output = pipeline.run(query="test")
|
|
|
|
assert output["output"] == "ACABEABD"
|
|
|
|
|
|
|
|
|
2022-02-22 15:01:07 +01:00
|
|
|
def test_pipeline_components():
|
2022-02-22 20:33:21 +01:00
|
|
|
class Node(BaseComponent):
|
|
|
|
outgoing_edges = 1
|
|
|
|
|
2022-02-22 15:01:07 +01:00
|
|
|
def run(self):
|
|
|
|
test = "test"
|
|
|
|
return {"test": test}, "output_1"
|
|
|
|
|
2022-05-11 11:11:00 +02:00
|
|
|
def run_batch(self):
|
|
|
|
return
|
|
|
|
|
2022-02-22 15:01:07 +01:00
|
|
|
a = Node()
|
|
|
|
b = Node()
|
|
|
|
c = Node()
|
|
|
|
d = Node()
|
|
|
|
e = Node()
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=a, inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=b, inputs=["A"])
|
|
|
|
pipeline.add_node(name="C", component=c, inputs=["B"])
|
|
|
|
pipeline.add_node(name="D", component=d, inputs=["C"])
|
|
|
|
pipeline.add_node(name="E", component=e, inputs=["D"])
|
|
|
|
assert len(pipeline.components) == 5
|
|
|
|
assert pipeline.components["A"] == a
|
|
|
|
assert pipeline.components["B"] == b
|
|
|
|
assert pipeline.components["C"] == c
|
|
|
|
assert pipeline.components["D"] == d
|
|
|
|
assert pipeline.components["E"] == e
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_get_document_store_from_components():
|
2022-03-15 11:17:26 +01:00
|
|
|
doc_store = MockDocumentStore()
|
2022-02-22 15:01:07 +01:00
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=doc_store, inputs=["File"])
|
|
|
|
|
|
|
|
assert doc_store == pipeline.get_document_store()
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_get_document_store_from_components_multiple_doc_stores():
|
2022-03-15 11:17:26 +01:00
|
|
|
doc_store_a = MockDocumentStore()
|
|
|
|
doc_store_b = MockDocumentStore()
|
2022-02-22 15:01:07 +01:00
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=doc_store_a, inputs=["File"])
|
|
|
|
pipeline.add_node(name="B", component=doc_store_b, inputs=["File"])
|
|
|
|
|
|
|
|
with pytest.raises(Exception, match="Multiple Document Stores found in Pipeline"):
|
|
|
|
pipeline.get_document_store()
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_get_document_store_from_retriever():
|
2022-03-15 11:17:26 +01:00
|
|
|
doc_store = MockDocumentStore()
|
2022-02-22 15:01:07 +01:00
|
|
|
retriever = DummyRetriever(document_store=doc_store)
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=retriever, inputs=["Query"])
|
|
|
|
|
|
|
|
assert doc_store == pipeline.get_document_store()
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_get_document_store_from_dual_retriever():
|
2022-03-15 11:17:26 +01:00
|
|
|
doc_store = MockDocumentStore()
|
2022-02-22 15:01:07 +01:00
|
|
|
retriever_a = DummyRetriever(document_store=doc_store)
|
|
|
|
retriever_b = DummyRetriever(document_store=doc_store)
|
2022-05-04 17:39:06 +02:00
|
|
|
join_node = JoinNode()
|
2022-02-22 15:01:07 +01:00
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=retriever_a, inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=retriever_b, inputs=["Query"])
|
2022-05-04 17:39:06 +02:00
|
|
|
pipeline.add_node(name="C", component=join_node, inputs=["A", "B"])
|
2022-02-22 15:01:07 +01:00
|
|
|
|
|
|
|
assert doc_store == pipeline.get_document_store()
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_get_document_store_multiple_doc_stores_from_dual_retriever():
|
2022-03-15 11:17:26 +01:00
|
|
|
doc_store_a = MockDocumentStore()
|
|
|
|
doc_store_b = MockDocumentStore()
|
2022-02-22 15:01:07 +01:00
|
|
|
retriever_a = DummyRetriever(document_store=doc_store_a)
|
|
|
|
retriever_b = DummyRetriever(document_store=doc_store_b)
|
2022-05-04 17:39:06 +02:00
|
|
|
join_node = JoinNode()
|
2022-02-22 15:01:07 +01:00
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=retriever_a, inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=retriever_b, inputs=["Query"])
|
2022-05-04 17:39:06 +02:00
|
|
|
pipeline.add_node(name="C", component=join_node, inputs=["A", "B"])
|
2022-02-22 15:01:07 +01:00
|
|
|
|
|
|
|
with pytest.raises(Exception, match="Multiple Document Stores found in Pipeline"):
|
|
|
|
pipeline.get_document_store()
|
|
|
|
|
|
|
|
|
2022-05-11 11:11:00 +02:00
|
|
|
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
|
|
|
|
def test_batch_querying_single_query(document_store_with_docs):
|
|
|
|
query_pipeline = Pipeline.load_from_yaml(
|
|
|
|
SAMPLES_PATH / "pipeline" / "test.haystack-pipeline.yml", pipeline_name="query_pipeline"
|
|
|
|
)
|
|
|
|
query_pipeline.components["ESRetriever"].document_store = document_store_with_docs
|
2022-05-24 12:33:45 +02:00
|
|
|
result = query_pipeline.run_batch(queries=["Who lives in Berlin?"])
|
2022-05-11 11:11:00 +02:00
|
|
|
assert isinstance(result["answers"], list)
|
|
|
|
assert isinstance(result["answers"][0], list)
|
|
|
|
assert isinstance(result["answers"][0][0], Answer)
|
2022-05-24 12:33:45 +02:00
|
|
|
assert len(result["answers"]) == 1 # Predictions for 1 collection of docs (single query)
|
|
|
|
assert len(result["answers"][0]) == 5 # Reader top-k set to 5
|
2022-05-11 11:11:00 +02:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
|
|
|
|
def test_batch_querying_multiple_queries(document_store_with_docs):
|
|
|
|
query_pipeline = Pipeline.load_from_yaml(
|
|
|
|
SAMPLES_PATH / "pipeline" / "test.haystack-pipeline.yml", pipeline_name="query_pipeline"
|
|
|
|
)
|
|
|
|
query_pipeline.components["ESRetriever"].document_store = document_store_with_docs
|
|
|
|
result = query_pipeline.run_batch(queries=["Who lives in Berlin?", "Who lives in New York?"])
|
|
|
|
# As we have a list of queries as input, this Pipeline will retrieve a list of relevant documents for each of the
|
|
|
|
# queries (resulting in a list of lists of documents), apply the reader with each query and their corresponding
|
|
|
|
# retrieved documents and return the predicted answers for each document list
|
|
|
|
assert isinstance(result["answers"], list)
|
|
|
|
assert isinstance(result["answers"][0], list)
|
|
|
|
assert isinstance(result["answers"][0][0], Answer)
|
|
|
|
assert len(result["answers"]) == 2 # Predictions for 2 collections of documents
|
|
|
|
assert len(result["answers"][0]) == 5 # top-k of 5 for collection of docs
|