2021-02-02 17:32:17 +01:00
|
|
|
from pathlib import Path
|
2021-11-11 11:02:22 +01:00
|
|
|
import os
|
2022-01-28 17:32:56 +01:00
|
|
|
import json
|
2022-04-19 16:10:39 +02:00
|
|
|
import platform
|
|
|
|
import sys
|
2022-03-07 22:35:15 +01:00
|
|
|
from typing import Tuple
|
2022-03-01 17:42:11 +01:00
|
|
|
|
2020-11-20 17:41:08 +01:00
|
|
|
import pytest
|
2022-03-07 22:35:15 +01:00
|
|
|
from requests import PreparedRequest
|
2022-01-28 17:32:56 +01:00
|
|
|
import responses
|
2022-03-15 11:17:26 +01:00
|
|
|
import logging
|
2022-03-07 22:35:15 +01:00
|
|
|
import yaml
|
2022-05-04 17:39:06 +02:00
|
|
|
import pandas as pd
|
2020-11-20 17:41:08 +01:00
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
from haystack import __version__
|
2022-02-21 12:22:37 +01:00
|
|
|
from haystack.document_stores.deepsetcloud import DeepsetCloudDocumentStore
|
Adding yaml functionality to standard pipelines (save/load...) (#1735)
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* Update API Reference Pages for v1.0 (#1729)
* Create new API pages and update existing ones
* Create query classifier page
* Remove Objects suffix
* Change answer aggregation key to doc_id, query instead of label_id, query (#1726)
* Add debugging example to tutorial (#1731)
* Add debugging example to tutorial
* Add latest docstring and tutorial changes
* Remove Objects suffix
* Add latest docstring and tutorial changes
* Revert "Remove Objects suffix"
This reverts commit 6681cb06510b080775994effe6a50bae42254be4.
* Revert unintentional commit
* Add third debugging option
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix another self.device/s typo (#1734)
* Fix yet another self.device(s) typo
* Add typing to 'initialize_device_settings' to try prevent future issues
* Fix bug in Tutorial5
* Fix the same bug in the notebook
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* Fix a few details of some tutorials (#1733)
* Make Tutorial10 use print instead of logs and fix a typo in Tutoria15
* Add a type check in 'print_answers'
* Add same checks to print_documents and print_questions
* Make RAGenerator return Answers instead of dictionaries
* Fix RAGenerator tests
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fix `print_answers` (#1743)
* Fix a specific path of print_answers that was assuming answers are dictionaries
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split pipeline tests into three suites (#1755)
* Split pipeline tests into three suites
* Will this trigger the CI?
* Rename duplicate test into test_most_similar_documents_pipeline
* Fixing a bug that was probably never noticed
* Capitalize starting letter in params (#1750)
* Capitalize starting letter in params
Capitalized the starting letter in code examples for params in keeping with the latest names for nodes where first letter is capitalized.
Refer: https://github.com/deepset-ai/haystack/issues/1748
* Update standard_pipelines.py
Capitalized some starting letters in the docstrings in keeping with the updated node names for standard pipelines
* Multi query eval (#1746)
* add eval() to pipeline
* Add latest docstring and tutorial changes
* support multiple queries in eval()
* Add latest docstring and tutorial changes
* keep single query test
* fix EvaluationResult node_results default
* adjust docstrings
* Add latest docstring and tutorial changes
* minor improvements from comments
* Add latest docstring and tutorial changes
* move EvaluationResult and calculate_metrics to schema
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Split summarizer tests in order to make windows CI work again (#1757)
* separate testfile for summarizer with translation
* Add latest docstring and tutorial changes
* import SPLIT_DOCS from test_summarizer
* add workflow_dispatch to windows_ci
* add worflow_dispatch to linux_ci
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* fix import of EvaluationResult in test case
* exclude test_summarizer_translation.py for windows_ci (#1759)
* Pipelines now tolerate custom _debug content (#1756)
* Pipelines now tolerate custom _debug content
* Support Tables in all DocumentStores (#1744)
* Add support for tables in SQLDocumentStore, FAISSDocumentStore and MilvuDocumentStore
* Add support for WeaviateDocumentStore
* Make sure that embedded meta fields are strings + add embedding_dim to WeaviateDocStore in test config
* Add latest docstring and tutorial changes
* Represent tables in WeaviateDocumentStore as nested lists
* Fix mypy
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Allow TableReader models without aggregation classifier (#1772)
* Fix usage of filters in `/query` endpoint in REST API (#1774)
* WIP filter refactoring
* fix filter formatting
* remove inplace modification of filters
* Public demo (#1747)
* Queries now run only when pressing RUN. File upload hidden. Question is not sent if the textbox is empty.
* Add latest docstring and tutorial changes
* Tidy up: remove needless state, add comments, fix minor bugs
* Had to add results to the status to avoid some bugs in eval mode
* Added 'credits'
* Add footers, update requirements, some random questions for the evaluation
* Add requested changes
* Temporary rollback the UI to the old GoT dataset
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Facilitate concurrent query / indexing in Elasticsearch with dense retrievers (new `skip_missing_embeddings` param) (#1762)
* Filtering records not having embeddings
* Added support for skip_missing_embeddings Flag. Default behavior is throw error when embeddings are missing. If skip_missing_embeddings=True then documents without embeddings are ignored for vector similarity
* Fix for below error:
haystack/document_stores/elasticsearch.py:852: error: Need type annotation for "script_score_query"
* docstring for skip_missing_embeddings parameter
* Raise exception where no documents with embeddings is found for Embedding retriever.
* Default skip_missing_embeddings to True
* Explicitly check if embeddings are present if no results are returned by EmbeddingRetriever for Elasticsearch
* Added test case for based on Julian's input
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Added test case for based on Julian's input. Fix pytest error on the testcase
* Simplify code by using get_embed_count
* Adjust docstring & error msg slightly
* Revert error msg
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
* Huggingface private model support via API tokens (FARMReader) (#1775)
* passed kwargs to model loading
* Pass Auth token explicitly
* add use_auth_token to get_language_model_class
* added use_auth_token parameter at FARMReader
* Add latest docstring and tutorial changes
* added docs for parameter `use_auth_token`
* Add latest docstring and tutorial changes
* adding docs link
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* private hugging face models for retrievers (#1785)
* private dpr
* Add latest docstring and tutorial changes
* added parameters to child functions
* Add latest docstring and tutorial changes
* added tableextractor
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* ignore empty filters parameter (#1783)
* ignore empty filters parameter
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* initialize doc store with doc and label index in tutorial 5 (#1730)
* initialize doc store with doc and label index
* change ipynb according to py for tutorial 5
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Small fixes to the public demo (#1781)
* Make strealit tolerant to haystack not knowing its version, and adding special error for docstore issues
* Add workaround for a Streamlit bug
* Make default filters value an empty dict
* Return more context for each answer in the rest api
* Make the hs_version call not-blocking by adding a very quick timeout
* Add disclaimer on low confidence answer
* Use the no-answer feature of the reader to highlight questions with no good answer
* Upgrade torch to v1.10.0 (#1789)
* Upgrade torch to v1.10.0
* Adapt torch version for torch-scatter in TableQA tutorial
* Add latest docstring and tutorial changes
* Make torch version more flexible
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* adding yaml functionality to BaseStandardPipeline
fixes #1681
* Add latest docstring and tutorial changes
* added test for saving and loading prebuilt pipelines
* fixed typo, changed variable name and added comments
* Add latest docstring and tutorial changes
* fix code rendering for example
* Add latest docstring and tutorial changes
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Branden Chan <33759007+brandenchan@users.noreply.github.com>
Co-authored-by: Julian Risch <julian.risch@deepset.ai>
Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
Co-authored-by: nishanthcgit <5066268+nishanthcgit@users.noreply.github.com>
Co-authored-by: tstadel <60758086+tstadel@users.noreply.github.com>
Co-authored-by: bogdankostic <bogdankostic@web.de>
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
Co-authored-by: C V Goudar <cvgoudar@users.noreply.github.com>
Co-authored-by: Kristof Herrmann <37148029+ArzelaAscoIi@users.noreply.github.com>
2021-11-23 17:01:39 +01:00
|
|
|
from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
|
2022-02-23 11:08:57 +01:00
|
|
|
from haystack.nodes.other.join_docs import JoinDocuments
|
2022-02-22 20:33:21 +01:00
|
|
|
from haystack.nodes.base import BaseComponent
|
2022-04-26 16:09:39 +02:00
|
|
|
from haystack.nodes.retriever.sparse import BM25Retriever
|
2022-05-04 17:39:06 +02:00
|
|
|
from haystack.pipelines import Pipeline, RootNode
|
2022-03-15 11:17:26 +01:00
|
|
|
from haystack.pipelines.config import validate_config_strings
|
2022-03-03 14:14:42 +01:00
|
|
|
from haystack.pipelines.utils import generate_code
|
2022-03-15 11:17:26 +01:00
|
|
|
from haystack.errors import PipelineConfigError
|
2022-05-04 17:39:06 +02:00
|
|
|
from haystack.nodes import PreProcessor, TextConverter
|
2022-03-10 09:49:28 +01:00
|
|
|
from haystack.utils.deepsetcloud import DeepsetCloudError
|
2022-05-04 17:39:06 +02:00
|
|
|
from haystack import Document, Answer
|
|
|
|
from haystack.nodes.other.route_documents import RouteDocuments
|
|
|
|
from haystack.nodes.other.join_answers import JoinAnswers
|
2022-01-26 18:12:55 +01:00
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
from .conftest import (
|
|
|
|
MOCK_DC,
|
|
|
|
DC_API_ENDPOINT,
|
|
|
|
DC_API_KEY,
|
|
|
|
DC_TEST_INDEX,
|
|
|
|
SAMPLES_PATH,
|
|
|
|
MockDocumentStore,
|
|
|
|
MockRetriever,
|
2022-04-13 14:29:05 +02:00
|
|
|
MockNode,
|
2022-03-15 11:17:26 +01:00
|
|
|
deepset_cloud_fixture,
|
|
|
|
)
|
|
|
|
|
2022-04-19 16:10:39 +02:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
|
|
def reduce_windows_recursion_limit():
|
|
|
|
"""
|
|
|
|
Prevents Windows CI from crashing with Stackoverflow in situations we want to provoke a RecursionError
|
|
|
|
"""
|
|
|
|
is_windows = platform.system() == "Windows"
|
|
|
|
default_recursion_limit = sys.getrecursionlimit()
|
|
|
|
if is_windows:
|
|
|
|
reduced_recursion_limit = default_recursion_limit // 2
|
|
|
|
logger.warning(f"Reducing recursion limit to {reduced_recursion_limit}")
|
|
|
|
sys.setrecursionlimit(reduced_recursion_limit)
|
|
|
|
yield
|
|
|
|
if is_windows:
|
|
|
|
logger.warning(f"Resetting recursion limit to {default_recursion_limit}")
|
|
|
|
sys.setrecursionlimit(default_recursion_limit)
|
|
|
|
|
2020-11-20 17:41:08 +01:00
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
class ParentComponent(BaseComponent):
|
|
|
|
outgoing_edges = 1
|
|
|
|
|
|
|
|
def __init__(self, dependent: BaseComponent) -> None:
|
|
|
|
super().__init__()
|
2022-03-15 11:17:26 +01:00
|
|
|
|
|
|
|
def run(*args, **kwargs):
|
|
|
|
logging.info("ParentComponent run() was called")
|
2022-03-03 14:14:42 +01:00
|
|
|
|
|
|
|
|
|
|
|
class ParentComponent2(BaseComponent):
|
|
|
|
outgoing_edges = 1
|
|
|
|
|
|
|
|
def __init__(self, dependent: BaseComponent) -> None:
|
|
|
|
super().__init__()
|
2022-03-15 11:17:26 +01:00
|
|
|
|
|
|
|
def run(*args, **kwargs):
|
|
|
|
logging.info("ParentComponent2 run() was called")
|
2022-03-03 14:14:42 +01:00
|
|
|
|
|
|
|
|
|
|
|
class ChildComponent(BaseComponent):
|
|
|
|
def __init__(self, some_key: str = None) -> None:
|
|
|
|
super().__init__()
|
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
def run(*args, **kwargs):
|
|
|
|
logging.info("ChildComponent run() was called")
|
2022-03-03 14:14:42 +01:00
|
|
|
|
2022-02-03 13:43:18 +01:00
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
class DummyRetriever(MockRetriever):
|
|
|
|
def __init__(self, document_store):
|
|
|
|
self.document_store = document_store
|
2021-04-30 12:23:29 +02:00
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
def run(self):
|
|
|
|
test = "test"
|
|
|
|
return {"test": test}, "output_1"
|
2021-10-28 16:48:06 +02:00
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
|
|
|
|
class JoinNode(RootNode):
|
|
|
|
def run(self, output=None, inputs=None):
|
|
|
|
if inputs:
|
|
|
|
output = ""
|
|
|
|
for input_dict in inputs:
|
|
|
|
output += input_dict["output"]
|
|
|
|
return {"output": output}, "output_1"
|
2021-10-28 16:48:06 +02:00
|
|
|
|
2022-02-03 13:43:18 +01:00
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
#
|
|
|
|
# Integration tests
|
|
|
|
#
|
|
|
|
|
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
@pytest.mark.integration
|
2022-02-23 11:08:57 +01:00
|
|
|
@pytest.mark.elasticsearch
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_to_code_creates_same_pipelines():
|
2022-02-23 11:08:57 +01:00
|
|
|
index_pipeline = Pipeline.load_from_yaml(
|
2022-05-04 17:39:06 +02:00
|
|
|
SAMPLES_PATH / "pipeline" / "test.haystack-pipeline.yml", pipeline_name="indexing_pipeline"
|
2022-02-23 11:08:57 +01:00
|
|
|
)
|
|
|
|
query_pipeline = Pipeline.load_from_yaml(
|
2022-05-04 17:39:06 +02:00
|
|
|
SAMPLES_PATH / "pipeline" / "test.haystack-pipeline.yml", pipeline_name="query_pipeline"
|
2022-02-23 11:08:57 +01:00
|
|
|
)
|
|
|
|
query_pipeline_code = query_pipeline.to_code(pipeline_variable_name="query_pipeline_from_code")
|
|
|
|
index_pipeline_code = index_pipeline.to_code(pipeline_variable_name="index_pipeline_from_code")
|
2022-03-15 11:17:26 +01:00
|
|
|
|
2022-02-23 11:08:57 +01:00
|
|
|
exec(query_pipeline_code)
|
|
|
|
exec(index_pipeline_code)
|
|
|
|
assert locals()["query_pipeline_from_code"] is not None
|
|
|
|
assert locals()["index_pipeline_from_code"] is not None
|
|
|
|
assert query_pipeline.get_config() == locals()["query_pipeline_from_code"].get_config()
|
|
|
|
assert index_pipeline.get_config() == locals()["index_pipeline_from_code"].get_config()
|
|
|
|
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
#
|
|
|
|
# Unit tests
|
|
|
|
#
|
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_get_config_creates_dependent_component():
|
|
|
|
child = ChildComponent()
|
|
|
|
parent = ParentComponent(dependent=child)
|
2022-02-23 11:08:57 +01:00
|
|
|
pipeline = Pipeline()
|
2022-03-03 14:14:42 +01:00
|
|
|
pipeline.add_node(component=parent, name="parent", inputs=["Query"])
|
2022-02-23 11:08:57 +01:00
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
expected_pipelines = [{"name": "query", "nodes": [{"name": "parent", "inputs": ["Query"]}]}]
|
2022-03-03 14:14:42 +01:00
|
|
|
expected_components = [
|
|
|
|
{"name": "parent", "type": "ParentComponent", "params": {"dependent": "ChildComponent"}},
|
|
|
|
{"name": "ChildComponent", "type": "ChildComponent", "params": {}},
|
|
|
|
]
|
2022-02-23 11:08:57 +01:00
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
config = pipeline.get_config()
|
|
|
|
for expected_pipeline in expected_pipelines:
|
|
|
|
assert expected_pipeline in config["pipelines"]
|
|
|
|
for expected_component in expected_components:
|
|
|
|
assert expected_component in config["components"]
|
|
|
|
|
|
|
|
|
|
|
|
def test_get_config_creates_only_one_dependent_component_referenced_by_multiple_parents():
|
|
|
|
child = ChildComponent()
|
|
|
|
parent = ParentComponent(dependent=child)
|
|
|
|
parent2 = ParentComponent2(dependent=child)
|
2022-02-23 11:08:57 +01:00
|
|
|
p_ensemble = Pipeline()
|
2022-03-03 14:14:42 +01:00
|
|
|
p_ensemble.add_node(component=parent, name="Parent1", inputs=["Query"])
|
|
|
|
p_ensemble.add_node(component=parent2, name="Parent2", inputs=["Query"])
|
|
|
|
p_ensemble.add_node(component=JoinDocuments(join_mode="merge"), name="JoinResults", inputs=["Parent1", "Parent2"])
|
|
|
|
|
|
|
|
expected_components = [
|
|
|
|
{"name": "Parent1", "type": "ParentComponent", "params": {"dependent": "ChildComponent"}},
|
|
|
|
{"name": "ChildComponent", "type": "ChildComponent", "params": {}},
|
|
|
|
{"name": "Parent2", "type": "ParentComponent2", "params": {"dependent": "ChildComponent"}},
|
|
|
|
{"name": "JoinResults", "type": "JoinDocuments", "params": {"join_mode": "merge"}},
|
|
|
|
]
|
2022-02-23 11:08:57 +01:00
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
expected_pipelines = [
|
|
|
|
{
|
|
|
|
"name": "query",
|
|
|
|
"nodes": [
|
|
|
|
{"name": "Parent1", "inputs": ["Query"]},
|
|
|
|
{"name": "Parent2", "inputs": ["Query"]},
|
|
|
|
{"name": "JoinResults", "inputs": ["Parent1", "Parent2"]},
|
|
|
|
],
|
|
|
|
}
|
|
|
|
]
|
2022-02-23 11:08:57 +01:00
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
config = p_ensemble.get_config()
|
|
|
|
for expected_pipeline in expected_pipelines:
|
|
|
|
assert expected_pipeline in config["pipelines"]
|
|
|
|
for expected_component in expected_components:
|
|
|
|
assert expected_component in config["components"]
|
2022-02-23 11:08:57 +01:00
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
|
|
|
|
def test_get_config_creates_two_different_dependent_components_of_same_type():
|
|
|
|
child_a = ChildComponent(some_key="A")
|
|
|
|
child_b = ChildComponent(some_key="B")
|
|
|
|
parent = ParentComponent(dependent=child_a)
|
|
|
|
parent2 = ParentComponent(dependent=child_b)
|
2022-02-23 11:08:57 +01:00
|
|
|
p_ensemble = Pipeline()
|
2022-03-03 14:14:42 +01:00
|
|
|
p_ensemble.add_node(component=parent, name="ParentA", inputs=["Query"])
|
|
|
|
p_ensemble.add_node(component=parent2, name="ParentB", inputs=["Query"])
|
|
|
|
p_ensemble.add_node(component=JoinDocuments(join_mode="merge"), name="JoinResults", inputs=["ParentA", "ParentB"])
|
|
|
|
|
|
|
|
expected_components = [
|
|
|
|
{"name": "ParentA", "type": "ParentComponent", "params": {"dependent": "ChildComponent"}},
|
|
|
|
{"name": "ChildComponent", "type": "ChildComponent", "params": {"some_key": "A"}},
|
|
|
|
{"name": "ParentB", "type": "ParentComponent", "params": {"dependent": "ChildComponent_2"}},
|
|
|
|
{"name": "ChildComponent_2", "type": "ChildComponent", "params": {"some_key": "B"}},
|
|
|
|
{"name": "JoinResults", "type": "JoinDocuments", "params": {"join_mode": "merge"}},
|
|
|
|
]
|
|
|
|
|
|
|
|
expected_pipelines = [
|
|
|
|
{
|
|
|
|
"name": "query",
|
|
|
|
"nodes": [
|
|
|
|
{"name": "ParentA", "inputs": ["Query"]},
|
|
|
|
{"name": "ParentB", "inputs": ["Query"]},
|
|
|
|
{"name": "JoinResults", "inputs": ["ParentA", "ParentB"]},
|
|
|
|
],
|
|
|
|
}
|
|
|
|
]
|
|
|
|
|
|
|
|
config = p_ensemble.get_config()
|
|
|
|
for expected_pipeline in expected_pipelines:
|
|
|
|
assert expected_pipeline in config["pipelines"]
|
|
|
|
for expected_component in expected_components:
|
|
|
|
assert expected_component in config["components"]
|
|
|
|
|
2022-02-23 11:08:57 +01:00
|
|
|
|
2022-03-17 22:03:39 +01:00
|
|
|
def test_get_config_reuses_same_dependent_components():
|
|
|
|
child = ChildComponent()
|
|
|
|
parent = ParentComponent(dependent=child)
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(component=parent, name="parent", inputs=["Query"])
|
|
|
|
pipeline.add_node(component=child, name="child", inputs=["parent"])
|
|
|
|
config = pipeline.get_config()
|
|
|
|
|
|
|
|
expected_pipelines = [
|
|
|
|
{"name": "query", "nodes": [{"name": "parent", "inputs": ["Query"]}, {"name": "child", "inputs": ["parent"]}]}
|
|
|
|
]
|
|
|
|
expected_components = [
|
|
|
|
{"name": "parent", "type": "ParentComponent", "params": {"dependent": "child"}},
|
|
|
|
{"name": "child", "type": "ChildComponent", "params": {}},
|
|
|
|
]
|
|
|
|
|
|
|
|
config = pipeline.get_config()
|
|
|
|
for expected_pipeline in expected_pipelines:
|
|
|
|
assert expected_pipeline in config["pipelines"]
|
|
|
|
for expected_component in expected_components:
|
|
|
|
assert expected_component in config["components"]
|
|
|
|
|
|
|
|
|
|
|
|
def test_get_config_creates_different_components_if_instances_differ():
|
|
|
|
child_a = ChildComponent()
|
|
|
|
child_b = ChildComponent()
|
|
|
|
child_c = ChildComponent()
|
|
|
|
parent = ParentComponent(dependent=child_a)
|
|
|
|
parent2 = ParentComponent(dependent=child_b)
|
|
|
|
p_ensemble = Pipeline()
|
|
|
|
p_ensemble.add_node(component=parent, name="ParentA", inputs=["Query"])
|
|
|
|
p_ensemble.add_node(component=parent2, name="ParentB", inputs=["Query"])
|
|
|
|
p_ensemble.add_node(component=child_c, name="Child", inputs=["Query"])
|
|
|
|
|
|
|
|
expected_components = [
|
|
|
|
{"name": "ParentA", "type": "ParentComponent", "params": {"dependent": "ChildComponent"}},
|
|
|
|
{"name": "ChildComponent", "type": "ChildComponent", "params": {}},
|
|
|
|
{"name": "ParentB", "type": "ParentComponent", "params": {"dependent": "ChildComponent_2"}},
|
|
|
|
{"name": "ChildComponent_2", "type": "ChildComponent", "params": {}},
|
|
|
|
{"name": "Child", "type": "ChildComponent", "params": {}},
|
|
|
|
]
|
|
|
|
|
|
|
|
expected_pipelines = [
|
|
|
|
{
|
|
|
|
"name": "query",
|
|
|
|
"nodes": [
|
|
|
|
{"name": "ParentA", "inputs": ["Query"]},
|
|
|
|
{"name": "ParentB", "inputs": ["Query"]},
|
|
|
|
{"name": "Child", "inputs": ["Query"]},
|
|
|
|
],
|
|
|
|
}
|
|
|
|
]
|
|
|
|
|
|
|
|
config = p_ensemble.get_config()
|
|
|
|
for expected_pipeline in expected_pipelines:
|
|
|
|
assert expected_pipeline in config["pipelines"]
|
|
|
|
for expected_component in expected_components:
|
|
|
|
assert expected_component in config["components"]
|
|
|
|
|
|
|
|
|
|
|
|
def test_get_config_reuses_same_unnamed_dependent_components():
|
|
|
|
child = ChildComponent()
|
|
|
|
parent = ParentComponent(dependent=child)
|
|
|
|
parent2 = ParentComponent(dependent=child)
|
|
|
|
p_ensemble = Pipeline()
|
|
|
|
p_ensemble.add_node(component=parent, name="ParentA", inputs=["Query"])
|
|
|
|
p_ensemble.add_node(component=parent2, name="ParentB", inputs=["Query"])
|
|
|
|
|
|
|
|
expected_components = [
|
|
|
|
{"name": "ParentA", "type": "ParentComponent", "params": {"dependent": "ChildComponent"}},
|
|
|
|
{"name": "ChildComponent", "type": "ChildComponent", "params": {}},
|
|
|
|
{"name": "ParentB", "type": "ParentComponent", "params": {"dependent": "ChildComponent"}},
|
|
|
|
]
|
|
|
|
|
|
|
|
expected_pipelines = [
|
|
|
|
{"name": "query", "nodes": [{"name": "ParentA", "inputs": ["Query"]}, {"name": "ParentB", "inputs": ["Query"]}]}
|
|
|
|
]
|
|
|
|
|
|
|
|
config = p_ensemble.get_config()
|
|
|
|
for expected_pipeline in expected_pipelines:
|
|
|
|
assert expected_pipeline in config["pipelines"]
|
|
|
|
for expected_component in expected_components:
|
|
|
|
assert expected_component in config["components"]
|
|
|
|
|
|
|
|
|
|
|
|
def test_get_config_multi_level_dependencies():
|
|
|
|
child = ChildComponent()
|
|
|
|
intermediate = ParentComponent(dependent=child)
|
|
|
|
parent = ParentComponent(dependent=intermediate)
|
|
|
|
p_ensemble = Pipeline()
|
|
|
|
p_ensemble.add_node(component=parent, name="Parent", inputs=["Query"])
|
|
|
|
|
|
|
|
expected_components = [
|
|
|
|
{"name": "Parent", "type": "ParentComponent", "params": {"dependent": "ParentComponent"}},
|
|
|
|
{"name": "ChildComponent", "type": "ChildComponent", "params": {}},
|
|
|
|
{"name": "ParentComponent", "type": "ParentComponent", "params": {"dependent": "ChildComponent"}},
|
|
|
|
]
|
|
|
|
|
|
|
|
expected_pipelines = [{"name": "query", "nodes": [{"name": "Parent", "inputs": ["Query"]}]}]
|
|
|
|
|
|
|
|
config = p_ensemble.get_config()
|
|
|
|
for expected_pipeline in expected_pipelines:
|
|
|
|
assert expected_pipeline in config["pipelines"]
|
|
|
|
for expected_component in expected_components:
|
|
|
|
assert expected_component in config["components"]
|
|
|
|
|
|
|
|
|
|
|
|
def test_get_config_multi_level_dependencies_of_same_type():
|
|
|
|
child = ChildComponent()
|
|
|
|
second_intermediate = ParentComponent(dependent=child)
|
|
|
|
intermediate = ParentComponent(dependent=second_intermediate)
|
|
|
|
parent = ParentComponent(dependent=intermediate)
|
|
|
|
p_ensemble = Pipeline()
|
|
|
|
p_ensemble.add_node(component=parent, name="ParentComponent", inputs=["Query"])
|
|
|
|
|
|
|
|
expected_components = [
|
|
|
|
{"name": "ParentComponent_3", "type": "ParentComponent", "params": {"dependent": "ChildComponent"}},
|
|
|
|
{"name": "ParentComponent_2", "type": "ParentComponent", "params": {"dependent": "ParentComponent_3"}},
|
|
|
|
{"name": "ParentComponent", "type": "ParentComponent", "params": {"dependent": "ParentComponent_2"}},
|
|
|
|
{"name": "ChildComponent", "type": "ChildComponent", "params": {}},
|
|
|
|
]
|
|
|
|
|
|
|
|
expected_pipelines = [{"name": "query", "nodes": [{"name": "ParentComponent", "inputs": ["Query"]}]}]
|
|
|
|
|
|
|
|
config = p_ensemble.get_config()
|
|
|
|
for expected_pipeline in expected_pipelines:
|
|
|
|
assert expected_pipeline in config["pipelines"]
|
|
|
|
for expected_component in expected_components:
|
|
|
|
assert expected_component in config["components"]
|
|
|
|
|
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
def test_get_config_component_with_superclass_arguments():
|
|
|
|
class CustomBaseDocumentStore(MockDocumentStore):
|
|
|
|
def __init__(self, base_parameter: str):
|
|
|
|
self.base_parameter = base_parameter
|
|
|
|
|
|
|
|
class CustomDocumentStore(CustomBaseDocumentStore):
|
|
|
|
def __init__(self, sub_parameter: int):
|
|
|
|
super().__init__(base_parameter="something")
|
|
|
|
self.sub_parameter = sub_parameter
|
|
|
|
|
|
|
|
class CustomRetriever(MockRetriever):
|
|
|
|
def __init__(self, document_store):
|
|
|
|
super().__init__()
|
|
|
|
self.document_store = document_store
|
|
|
|
|
|
|
|
document_store = CustomDocumentStore(sub_parameter=10)
|
|
|
|
retriever = CustomRetriever(document_store=document_store)
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(retriever, name="Retriever", inputs=["Query"])
|
|
|
|
|
|
|
|
pipeline.get_config()
|
|
|
|
assert pipeline.get_document_store().sub_parameter == 10
|
|
|
|
assert pipeline.get_document_store().base_parameter == "something"
|
|
|
|
|
|
|
|
|
2022-04-13 14:29:05 +02:00
|
|
|
def test_get_config_custom_node_with_params():
|
|
|
|
class CustomNode(MockNode):
|
|
|
|
def __init__(self, param: int):
|
|
|
|
super().__init__()
|
|
|
|
self.param = param
|
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(CustomNode(param=10), name="custom_node", inputs=["Query"])
|
|
|
|
|
|
|
|
assert len(pipeline.get_config()["components"]) == 1
|
|
|
|
assert pipeline.get_config()["components"][0]["params"] == {"param": 10}
|
|
|
|
|
|
|
|
|
2022-05-02 14:41:07 +02:00
|
|
|
def test_get_config_custom_node_with_positional_params():
|
2022-04-13 14:29:05 +02:00
|
|
|
class CustomNode(MockNode):
|
|
|
|
def __init__(self, param: int = 1):
|
|
|
|
super().__init__()
|
|
|
|
self.param = param
|
|
|
|
|
|
|
|
pipeline = Pipeline()
|
2022-05-02 14:41:07 +02:00
|
|
|
pipeline.add_node(CustomNode(10), name="custom_node", inputs=["Query"])
|
|
|
|
|
2022-04-13 14:29:05 +02:00
|
|
|
assert len(pipeline.get_config()["components"]) == 1
|
2022-05-02 14:41:07 +02:00
|
|
|
assert pipeline.get_config()["components"][0]["params"] == {"param": 10}
|
2022-04-13 14:29:05 +02:00
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_generate_code_simple_pipeline():
|
|
|
|
config = {
|
2022-05-04 17:39:06 +02:00
|
|
|
"version": "ignore",
|
2022-03-03 14:14:42 +01:00
|
|
|
"components": [
|
|
|
|
{
|
|
|
|
"name": "retri",
|
2022-04-26 16:09:39 +02:00
|
|
|
"type": "BM25Retriever",
|
2022-03-03 14:14:42 +01:00
|
|
|
"params": {"document_store": "ElasticsearchDocumentStore", "top_k": 20},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"name": "ElasticsearchDocumentStore",
|
|
|
|
"type": "ElasticsearchDocumentStore",
|
|
|
|
"params": {"index": "my-index"},
|
|
|
|
},
|
|
|
|
],
|
2022-03-15 11:17:26 +01:00
|
|
|
"pipelines": [{"name": "query", "nodes": [{"name": "retri", "inputs": ["Query"]}]}],
|
2022-03-03 14:14:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
code = generate_code(pipeline_config=config, pipeline_variable_name="p", generate_imports=False)
|
2022-02-23 11:08:57 +01:00
|
|
|
assert code == (
|
|
|
|
'elasticsearch_document_store = ElasticsearchDocumentStore(index="my-index")\n'
|
2022-04-26 16:09:39 +02:00
|
|
|
"retri = BM25Retriever(document_store=elasticsearch_document_store, top_k=20)\n"
|
2022-02-23 11:08:57 +01:00
|
|
|
"\n"
|
|
|
|
"p = Pipeline()\n"
|
2022-03-03 14:14:42 +01:00
|
|
|
'p.add_node(component=retri, name="retri", inputs=["Query"])'
|
2022-02-23 11:08:57 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_generate_code_imports():
|
|
|
|
pipeline_config = {
|
2022-05-04 17:39:06 +02:00
|
|
|
"version": "ignore",
|
2022-03-03 14:14:42 +01:00
|
|
|
"components": [
|
2022-03-07 19:25:33 +01:00
|
|
|
{"name": "DocumentStore", "type": "ElasticsearchDocumentStore"},
|
2022-04-26 16:09:39 +02:00
|
|
|
{"name": "retri", "type": "BM25Retriever", "params": {"document_store": "DocumentStore"}},
|
2022-03-15 11:17:26 +01:00
|
|
|
{"name": "retri2", "type": "TfidfRetriever", "params": {"document_store": "DocumentStore"}},
|
2022-03-03 14:14:42 +01:00
|
|
|
],
|
|
|
|
"pipelines": [
|
|
|
|
{
|
|
|
|
"name": "Query",
|
|
|
|
"nodes": [{"name": "retri", "inputs": ["Query"]}, {"name": "retri2", "inputs": ["Query"]}],
|
|
|
|
}
|
|
|
|
],
|
|
|
|
}
|
2022-02-23 11:08:57 +01:00
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
code = generate_code(pipeline_config=pipeline_config, pipeline_variable_name="p", generate_imports=True)
|
2022-02-23 11:08:57 +01:00
|
|
|
assert code == (
|
2022-03-03 14:14:42 +01:00
|
|
|
"from haystack.document_stores import ElasticsearchDocumentStore\n"
|
2022-04-26 16:09:39 +02:00
|
|
|
"from haystack.nodes import BM25Retriever, TfidfRetriever\n"
|
2022-03-03 14:14:42 +01:00
|
|
|
"from haystack.pipelines import Pipeline\n"
|
|
|
|
"\n"
|
|
|
|
"document_store = ElasticsearchDocumentStore()\n"
|
2022-04-26 16:09:39 +02:00
|
|
|
"retri = BM25Retriever(document_store=document_store)\n"
|
2022-03-15 11:17:26 +01:00
|
|
|
"retri_2 = TfidfRetriever(document_store=document_store)\n"
|
2022-02-23 11:08:57 +01:00
|
|
|
"\n"
|
|
|
|
"p = Pipeline()\n"
|
2022-03-03 14:14:42 +01:00
|
|
|
'p.add_node(component=retri, name="retri", inputs=["Query"])\n'
|
|
|
|
'p.add_node(component=retri_2, name="retri2", inputs=["Query"])'
|
2022-02-23 11:08:57 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_generate_code_imports_no_pipeline_cls():
|
|
|
|
pipeline_config = {
|
2022-05-04 17:39:06 +02:00
|
|
|
"version": "ignore",
|
2022-03-03 14:14:42 +01:00
|
|
|
"components": [
|
2022-03-07 19:25:33 +01:00
|
|
|
{"name": "DocumentStore", "type": "ElasticsearchDocumentStore"},
|
2022-04-26 16:09:39 +02:00
|
|
|
{"name": "retri", "type": "BM25Retriever", "params": {"document_store": "DocumentStore"}},
|
2022-03-03 14:14:42 +01:00
|
|
|
],
|
2022-03-15 11:17:26 +01:00
|
|
|
"pipelines": [{"name": "Query", "nodes": [{"name": "retri", "inputs": ["Query"]}]}],
|
2022-03-03 14:14:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
code = generate_code(
|
|
|
|
pipeline_config=pipeline_config,
|
|
|
|
pipeline_variable_name="p",
|
|
|
|
generate_imports=True,
|
|
|
|
add_pipeline_cls_import=False,
|
2022-02-23 11:08:57 +01:00
|
|
|
)
|
|
|
|
assert code == (
|
2022-03-03 14:14:42 +01:00
|
|
|
"from haystack.document_stores import ElasticsearchDocumentStore\n"
|
2022-04-26 16:09:39 +02:00
|
|
|
"from haystack.nodes import BM25Retriever\n"
|
2022-03-03 14:14:42 +01:00
|
|
|
"\n"
|
|
|
|
"document_store = ElasticsearchDocumentStore()\n"
|
2022-04-26 16:09:39 +02:00
|
|
|
"retri = BM25Retriever(document_store=document_store)\n"
|
2022-02-23 11:08:57 +01:00
|
|
|
"\n"
|
|
|
|
"p = Pipeline()\n"
|
2022-03-03 14:14:42 +01:00
|
|
|
'p.add_node(component=retri, name="retri", inputs=["Query"])'
|
2022-02-23 11:08:57 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_generate_code_comment():
|
|
|
|
pipeline_config = {
|
2022-05-04 17:39:06 +02:00
|
|
|
"version": "ignore",
|
2022-03-03 14:14:42 +01:00
|
|
|
"components": [
|
2022-03-07 19:25:33 +01:00
|
|
|
{"name": "DocumentStore", "type": "ElasticsearchDocumentStore"},
|
2022-04-26 16:09:39 +02:00
|
|
|
{"name": "retri", "type": "BM25Retriever", "params": {"document_store": "DocumentStore"}},
|
2022-03-03 14:14:42 +01:00
|
|
|
],
|
2022-03-15 11:17:26 +01:00
|
|
|
"pipelines": [{"name": "Query", "nodes": [{"name": "retri", "inputs": ["Query"]}]}],
|
2022-03-03 14:14:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
comment = "This is my comment\n...and here is a new line"
|
|
|
|
code = generate_code(pipeline_config=pipeline_config, pipeline_variable_name="p", comment=comment)
|
2022-02-23 11:08:57 +01:00
|
|
|
assert code == (
|
2022-03-03 14:14:42 +01:00
|
|
|
"# This is my comment\n"
|
|
|
|
"# ...and here is a new line\n"
|
2022-02-23 11:08:57 +01:00
|
|
|
"from haystack.document_stores import ElasticsearchDocumentStore\n"
|
2022-04-26 16:09:39 +02:00
|
|
|
"from haystack.nodes import BM25Retriever\n"
|
2022-03-03 14:14:42 +01:00
|
|
|
"from haystack.pipelines import Pipeline\n"
|
2022-02-23 11:08:57 +01:00
|
|
|
"\n"
|
2022-03-03 14:14:42 +01:00
|
|
|
"document_store = ElasticsearchDocumentStore()\n"
|
2022-04-26 16:09:39 +02:00
|
|
|
"retri = BM25Retriever(document_store=document_store)\n"
|
2022-02-23 11:08:57 +01:00
|
|
|
"\n"
|
|
|
|
"p = Pipeline()\n"
|
|
|
|
'p.add_node(component=retri, name="retri", inputs=["Query"])'
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_generate_code_is_component_order_invariant():
|
|
|
|
pipeline_config = {
|
2022-05-04 17:39:06 +02:00
|
|
|
"version": "ignore",
|
2022-03-03 14:14:42 +01:00
|
|
|
"pipelines": [
|
|
|
|
{
|
|
|
|
"name": "Query",
|
|
|
|
"nodes": [
|
|
|
|
{"name": "EsRetriever", "inputs": ["Query"]},
|
|
|
|
{"name": "EmbeddingRetriever", "inputs": ["Query"]},
|
|
|
|
{"name": "JoinResults", "inputs": ["EsRetriever", "EmbeddingRetriever"]},
|
|
|
|
],
|
|
|
|
}
|
2022-03-15 11:17:26 +01:00
|
|
|
],
|
2022-03-03 14:14:42 +01:00
|
|
|
}
|
|
|
|
|
2022-03-07 19:25:33 +01:00
|
|
|
doc_store = {"name": "ElasticsearchDocumentStore", "type": "ElasticsearchDocumentStore"}
|
2022-03-03 14:14:42 +01:00
|
|
|
es_retriever = {
|
|
|
|
"name": "EsRetriever",
|
2022-04-26 16:09:39 +02:00
|
|
|
"type": "BM25Retriever",
|
2022-03-03 14:14:42 +01:00
|
|
|
"params": {"document_store": "ElasticsearchDocumentStore"},
|
|
|
|
}
|
|
|
|
emb_retriever = {
|
|
|
|
"name": "EmbeddingRetriever",
|
|
|
|
"type": "EmbeddingRetriever",
|
|
|
|
"params": {
|
|
|
|
"document_store": "ElasticsearchDocumentStore",
|
|
|
|
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
|
|
|
|
},
|
|
|
|
}
|
2022-03-07 19:25:33 +01:00
|
|
|
join_node = {"name": "JoinResults", "type": "JoinDocuments"}
|
2022-03-03 14:14:42 +01:00
|
|
|
|
|
|
|
component_orders = [
|
|
|
|
[doc_store, es_retriever, emb_retriever, join_node],
|
|
|
|
[es_retriever, emb_retriever, join_node, doc_store],
|
|
|
|
[join_node, es_retriever, emb_retriever, doc_store],
|
|
|
|
]
|
|
|
|
|
|
|
|
expected_code = (
|
|
|
|
"elasticsearch_document_store = ElasticsearchDocumentStore()\n"
|
2022-04-26 16:09:39 +02:00
|
|
|
"es_retriever = BM25Retriever(document_store=elasticsearch_document_store)\n"
|
2022-03-03 14:14:42 +01:00
|
|
|
'embedding_retriever = EmbeddingRetriever(document_store=elasticsearch_document_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2")\n'
|
|
|
|
"join_results = JoinDocuments()\n"
|
|
|
|
"\n"
|
|
|
|
"p = Pipeline()\n"
|
|
|
|
'p.add_node(component=es_retriever, name="EsRetriever", inputs=["Query"])\n'
|
|
|
|
'p.add_node(component=embedding_retriever, name="EmbeddingRetriever", inputs=["Query"])\n'
|
|
|
|
'p.add_node(component=join_results, name="JoinResults", inputs=["EsRetriever", "EmbeddingRetriever"])'
|
|
|
|
)
|
|
|
|
|
|
|
|
for components in component_orders:
|
|
|
|
pipeline_config["components"] = components
|
2022-04-19 16:08:08 +02:00
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
code = generate_code(pipeline_config=pipeline_config, pipeline_variable_name="p", generate_imports=False)
|
|
|
|
assert code == expected_code
|
2022-02-23 11:08:57 +01:00
|
|
|
|
|
|
|
|
2022-03-17 22:03:39 +01:00
|
|
|
def test_generate_code_can_handle_weak_cyclic_pipelines():
|
|
|
|
config = {
|
2022-05-04 17:39:06 +02:00
|
|
|
"version": "ignore",
|
2022-03-17 22:03:39 +01:00
|
|
|
"components": [
|
|
|
|
{"name": "parent", "type": "ParentComponent", "params": {"dependent": "child"}},
|
|
|
|
{"name": "child", "type": "ChildComponent", "params": {}},
|
|
|
|
],
|
|
|
|
"pipelines": [
|
|
|
|
{
|
|
|
|
"name": "query",
|
|
|
|
"nodes": [{"name": "parent", "inputs": ["Query"]}, {"name": "child", "inputs": ["parent"]}],
|
|
|
|
}
|
|
|
|
],
|
|
|
|
}
|
|
|
|
code = generate_code(pipeline_config=config, generate_imports=False)
|
|
|
|
assert code == (
|
|
|
|
"child = ChildComponent()\n"
|
|
|
|
"parent = ParentComponent(dependent=child)\n"
|
|
|
|
"\n"
|
|
|
|
"pipeline = Pipeline()\n"
|
|
|
|
'pipeline.add_node(component=parent, name="parent", inputs=["Query"])\n'
|
|
|
|
'pipeline.add_node(component=child, name="child", inputs=["parent"])'
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-02-23 11:08:57 +01:00
|
|
|
@pytest.mark.parametrize("input", ["\btest", " test", "#test", "+test", "\ttest", "\ntest", "test()"])
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_user_input_invalid(input):
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="is not a valid variable name or value"):
|
|
|
|
validate_config_strings(input)
|
2022-02-23 11:08:57 +01:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
2022-05-04 17:39:06 +02:00
|
|
|
"input",
|
|
|
|
[
|
|
|
|
"test",
|
|
|
|
"testName",
|
|
|
|
"test_name",
|
|
|
|
"test-name",
|
|
|
|
"test-name1234",
|
|
|
|
"http://localhost:8000/my-path",
|
|
|
|
"C:\\Some\\Windows\\Path\\To\\file.txt",
|
|
|
|
],
|
2022-02-23 11:08:57 +01:00
|
|
|
)
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_user_input_valid(input):
|
2022-03-15 11:17:26 +01:00
|
|
|
validate_config_strings(input)
|
2022-02-23 11:08:57 +01:00
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_pipeline_config_invalid_component_name():
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="is not a valid variable name or value"):
|
|
|
|
validate_config_strings({"components": [{"name": "\btest"}]})
|
2022-02-23 11:08:57 +01:00
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_pipeline_config_invalid_component_type():
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="is not a valid variable name or value"):
|
|
|
|
validate_config_strings({"components": [{"name": "test", "type": "\btest"}]})
|
2022-02-23 11:08:57 +01:00
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_pipeline_config_invalid_component_param():
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="is not a valid variable name or value"):
|
|
|
|
validate_config_strings({"components": [{"name": "test", "type": "test", "params": {"key": "\btest"}}]})
|
2022-02-23 11:08:57 +01:00
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_pipeline_config_invalid_component_param_key():
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="is not a valid variable name or value"):
|
|
|
|
validate_config_strings({"components": [{"name": "test", "type": "test", "params": {"\btest": "test"}}]})
|
2022-02-23 11:08:57 +01:00
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_pipeline_config_invalid_pipeline_name():
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="is not a valid variable name or value"):
|
|
|
|
validate_config_strings({"components": [{"name": "test", "type": "test"}], "pipelines": [{"name": "\btest"}]})
|
2022-02-23 11:08:57 +01:00
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_pipeline_config_invalid_pipeline_node_name():
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="is not a valid variable name or value"):
|
|
|
|
validate_config_strings(
|
2022-02-23 11:08:57 +01:00
|
|
|
{
|
2022-03-07 19:25:33 +01:00
|
|
|
"components": [{"name": "test", "type": "test"}],
|
2022-02-23 11:08:57 +01:00
|
|
|
"pipelines": [{"name": "test", "type": "test", "nodes": [{"name": "\btest"}]}],
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-03-03 14:14:42 +01:00
|
|
|
def test_validate_pipeline_config_invalid_pipeline_node_inputs():
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="is not a valid variable name or value"):
|
|
|
|
validate_config_strings(
|
2022-02-23 11:08:57 +01:00
|
|
|
{
|
2022-03-07 19:25:33 +01:00
|
|
|
"components": [{"name": "test", "type": "test"}],
|
2022-02-23 11:08:57 +01:00
|
|
|
"pipelines": [{"name": "test", "type": "test", "nodes": [{"name": "test", "inputs": ["\btest"]}]}],
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-04-19 16:10:39 +02:00
|
|
|
def test_validate_pipeline_config_recursive_config(reduce_windows_recursion_limit):
|
2022-03-15 11:17:26 +01:00
|
|
|
pipeline_config = {}
|
|
|
|
node = {"config": pipeline_config}
|
|
|
|
pipeline_config["node"] = node
|
|
|
|
|
|
|
|
with pytest.raises(PipelineConfigError, match="recursive"):
|
|
|
|
validate_config_strings(pipeline_config)
|
|
|
|
|
|
|
|
|
2022-01-28 17:32:56 +01:00
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_load_from_deepset_cloud_query():
|
|
|
|
if MOCK_DC:
|
2022-02-03 13:43:18 +01:00
|
|
|
with open(SAMPLES_PATH / "dc" / "pipeline_config.json", "r") as f:
|
2022-01-28 17:32:56 +01:00
|
|
|
pipeline_config_yaml_response = json.load(f)
|
|
|
|
|
|
|
|
responses.add(
|
2022-02-03 13:43:18 +01:00
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/{DC_TEST_INDEX}/json",
|
|
|
|
json=pipeline_config_yaml_response,
|
|
|
|
status=200,
|
|
|
|
)
|
2022-01-28 17:32:56 +01:00
|
|
|
|
|
|
|
responses.add(
|
2022-02-03 13:43:18 +01:00
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
|
|
|
|
json=[{"id": "test_doc", "content": "man on hores"}],
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
2022-01-28 17:32:56 +01:00
|
|
|
query_pipeline = Pipeline.load_from_deepset_cloud(
|
2022-02-03 13:43:18 +01:00
|
|
|
pipeline_config_name=DC_TEST_INDEX, api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
2022-01-28 17:32:56 +01:00
|
|
|
retriever = query_pipeline.get_node("Retriever")
|
|
|
|
document_store = retriever.document_store
|
2022-04-26 16:09:39 +02:00
|
|
|
assert isinstance(retriever, BM25Retriever)
|
2022-01-28 17:32:56 +01:00
|
|
|
assert isinstance(document_store, DeepsetCloudDocumentStore)
|
2022-02-22 15:01:07 +01:00
|
|
|
assert document_store == query_pipeline.get_document_store()
|
2022-01-28 17:32:56 +01:00
|
|
|
|
2022-02-03 13:43:18 +01:00
|
|
|
prediction = query_pipeline.run(query="man on horse", params={})
|
2022-01-28 17:32:56 +01:00
|
|
|
|
|
|
|
assert prediction["query"] == "man on horse"
|
|
|
|
assert len(prediction["documents"]) == 1
|
|
|
|
assert prediction["documents"][0].id == "test_doc"
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_load_from_deepset_cloud_indexing():
|
|
|
|
if MOCK_DC:
|
2022-02-03 13:43:18 +01:00
|
|
|
with open(SAMPLES_PATH / "dc" / "pipeline_config.json", "r") as f:
|
2022-01-28 17:32:56 +01:00
|
|
|
pipeline_config_yaml_response = json.load(f)
|
|
|
|
|
|
|
|
responses.add(
|
2022-02-03 13:43:18 +01:00
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/{DC_TEST_INDEX}/json",
|
|
|
|
json=pipeline_config_yaml_response,
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
2022-01-28 17:32:56 +01:00
|
|
|
indexing_pipeline = Pipeline.load_from_deepset_cloud(
|
2022-02-03 13:43:18 +01:00
|
|
|
pipeline_config_name=DC_TEST_INDEX, api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, pipeline_name="indexing"
|
|
|
|
)
|
2022-01-28 17:32:56 +01:00
|
|
|
document_store = indexing_pipeline.get_node("DocumentStore")
|
|
|
|
assert isinstance(document_store, DeepsetCloudDocumentStore)
|
|
|
|
|
2022-02-03 13:43:18 +01:00
|
|
|
with pytest.raises(
|
|
|
|
Exception, match=".*NotImplementedError.*DeepsetCloudDocumentStore currently does not support writing documents"
|
|
|
|
):
|
|
|
|
indexing_pipeline.run(file_paths=[SAMPLES_PATH / "docs" / "doc_1.txt"])
|
2022-01-28 17:32:56 +01:00
|
|
|
|
2021-10-28 16:48:06 +02:00
|
|
|
|
2022-02-08 20:35:25 +01:00
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_list_pipelines_on_deepset_cloud():
|
|
|
|
pipelines = Pipeline.list_pipelines_on_deepset_cloud(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY)
|
|
|
|
assert len(pipelines) == 1
|
2022-05-10 15:21:35 +02:00
|
|
|
assert pipelines[0]["name"] == DC_TEST_INDEX
|
2022-02-08 20:35:25 +01:00
|
|
|
|
|
|
|
|
2022-02-11 12:50:53 +01:00
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_save_to_deepset_cloud():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_pipeline_config",
|
|
|
|
json={
|
|
|
|
"name": "test_pipeline_config",
|
|
|
|
"pipeline_id": "2184e9c1-c6ec-40a1-9b28-5d2768e5efa2",
|
|
|
|
"status": "UNDEPLOYED",
|
|
|
|
"created_at": "2022-02-01T09:57:03.803991+00:00",
|
|
|
|
"deleted": False,
|
|
|
|
"is_default": False,
|
|
|
|
"indexing": {"status": "IN_PROGRESS", "pending_file_count": 4, "total_file_count": 33},
|
|
|
|
},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_pipeline_config_deployed",
|
|
|
|
json={
|
|
|
|
"name": "test_pipeline_config_deployed",
|
|
|
|
"pipeline_id": "8184e0c1-c6ec-40a1-9b28-5d2768e5efa3",
|
|
|
|
"status": "DEPLOYED",
|
|
|
|
"created_at": "2022-02-09T09:57:03.803991+00:00",
|
|
|
|
"deleted": False,
|
|
|
|
"is_default": False,
|
|
|
|
"indexing": {"status": "INDEXED", "pending_file_count": 0, "total_file_count": 33},
|
|
|
|
},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_pipeline_config_copy",
|
|
|
|
json={"errors": ["Pipeline with the name test_pipeline_config_copy does not exists."]},
|
|
|
|
status=404,
|
|
|
|
)
|
|
|
|
|
|
|
|
with open(SAMPLES_PATH / "dc" / "pipeline_config.json", "r") as f:
|
|
|
|
pipeline_config_yaml_response = json.load(f)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/{DC_TEST_INDEX}/json",
|
|
|
|
json=pipeline_config_yaml_response,
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines",
|
|
|
|
json={"name": "test_pipeline_config_copy"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.PUT,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_pipeline_config/yaml",
|
|
|
|
json={"name": "test_pipeline_config"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.PUT,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_pipeline_config_deployed/yaml",
|
|
|
|
json={"errors": ["Updating the pipeline yaml is not allowed for pipelines with status: 'DEPLOYED'"]},
|
|
|
|
status=406,
|
|
|
|
)
|
|
|
|
|
|
|
|
query_pipeline = Pipeline.load_from_deepset_cloud(
|
|
|
|
pipeline_config_name=DC_TEST_INDEX, api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
index_pipeline = Pipeline.load_from_deepset_cloud(
|
|
|
|
pipeline_config_name=DC_TEST_INDEX, api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, pipeline_name="indexing"
|
|
|
|
)
|
|
|
|
|
2022-02-16 09:07:58 +01:00
|
|
|
Pipeline.save_to_deepset_cloud(
|
2022-02-11 12:50:53 +01:00
|
|
|
query_pipeline=query_pipeline,
|
|
|
|
index_pipeline=index_pipeline,
|
|
|
|
pipeline_config_name="test_pipeline_config_copy",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(
|
|
|
|
ValueError,
|
|
|
|
match="Pipeline config 'test_pipeline_config' already exists. Set `overwrite=True` to overwrite pipeline config",
|
|
|
|
):
|
2022-02-16 09:07:58 +01:00
|
|
|
Pipeline.save_to_deepset_cloud(
|
2022-02-11 12:50:53 +01:00
|
|
|
query_pipeline=query_pipeline,
|
|
|
|
index_pipeline=index_pipeline,
|
|
|
|
pipeline_config_name="test_pipeline_config",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
)
|
|
|
|
|
2022-02-16 09:07:58 +01:00
|
|
|
Pipeline.save_to_deepset_cloud(
|
2022-02-11 12:50:53 +01:00
|
|
|
query_pipeline=query_pipeline,
|
|
|
|
index_pipeline=index_pipeline,
|
|
|
|
pipeline_config_name="test_pipeline_config",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
overwrite=True,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(
|
|
|
|
ValueError,
|
|
|
|
match="Deployed pipeline configs are not allowed to be updated. Please undeploy pipeline config 'test_pipeline_config_deployed' first",
|
|
|
|
):
|
2022-02-16 09:07:58 +01:00
|
|
|
Pipeline.save_to_deepset_cloud(
|
2022-02-11 12:50:53 +01:00
|
|
|
query_pipeline=query_pipeline,
|
|
|
|
index_pipeline=index_pipeline,
|
|
|
|
pipeline_config_name="test_pipeline_config_deployed",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
overwrite=True,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
@pytest.mark.integration
|
2022-03-07 22:35:15 +01:00
|
|
|
@pytest.mark.elasticsearch
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_save_nonexisting_pipeline_to_deepset_cloud():
|
|
|
|
if MOCK_DC:
|
|
|
|
|
|
|
|
def dc_document_store_matcher(request: PreparedRequest) -> Tuple[bool, str]:
|
|
|
|
matches = False
|
|
|
|
reason = "No DeepsetCloudDocumentStore found."
|
|
|
|
request_body = request.body or ""
|
|
|
|
json_body = yaml.safe_load(request_body)
|
|
|
|
components = json_body["components"]
|
|
|
|
for component in components:
|
|
|
|
if component["type"].endswith("DocumentStore"):
|
|
|
|
if component["type"] == "DeepsetCloudDocumentStore":
|
|
|
|
matches = True
|
|
|
|
else:
|
|
|
|
matches = False
|
|
|
|
reason = f"Component {component['name']} is of type {component['type']} and not DeepsetCloudDocumentStore"
|
|
|
|
break
|
|
|
|
return matches, reason
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"errors": ["Pipeline with the name test_pipeline_config_copy does not exists."]},
|
|
|
|
status=404,
|
|
|
|
)
|
|
|
|
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines",
|
|
|
|
json={"name": "test_new_non_existing_pipeline"},
|
|
|
|
status=201,
|
|
|
|
match=[dc_document_store_matcher],
|
|
|
|
)
|
|
|
|
|
|
|
|
es_document_store = ElasticsearchDocumentStore()
|
2022-04-26 16:09:39 +02:00
|
|
|
es_retriever = BM25Retriever(document_store=es_document_store)
|
2022-03-07 22:35:15 +01:00
|
|
|
file_converter = TextConverter()
|
|
|
|
preprocessor = PreProcessor()
|
|
|
|
|
|
|
|
query_pipeline = Pipeline()
|
|
|
|
query_pipeline.add_node(component=es_retriever, name="Retriever", inputs=["Query"])
|
|
|
|
index_pipeline = Pipeline()
|
|
|
|
index_pipeline.add_node(component=file_converter, name="FileConverter", inputs=["File"])
|
|
|
|
index_pipeline.add_node(component=preprocessor, name="Preprocessor", inputs=["FileConverter"])
|
|
|
|
index_pipeline.add_node(component=es_document_store, name="DocumentStore", inputs=["Preprocessor"])
|
|
|
|
|
|
|
|
Pipeline.save_to_deepset_cloud(
|
|
|
|
query_pipeline=query_pipeline,
|
|
|
|
index_pipeline=index_pipeline,
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-03-10 09:49:28 +01:00
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_deploy_on_deepset_cloud_non_existing_pipeline():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"errors": ["Pipeline with the name test_pipeline_config_copy does not exists."]},
|
|
|
|
status=404,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(DeepsetCloudError, match="Pipeline config 'test_new_non_existing_pipeline' does not exist."):
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_undeploy_on_deepset_cloud_non_existing_pipeline():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"errors": ["Pipeline with the name test_pipeline_config_copy does not exists."]},
|
|
|
|
status=404,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(DeepsetCloudError, match="Pipeline config 'test_new_non_existing_pipeline' does not exist."):
|
|
|
|
Pipeline.undeploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_deploy_on_deepset_cloud():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/deploy",
|
|
|
|
json={"status": "DEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["UNDEPLOYED", "DEPLOYMENT_IN_PROGRESS", "DEPLOYMENT_IN_PROGRESS", "DEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_undeploy_on_deepset_cloud():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/undeploy",
|
|
|
|
json={"status": "UNDEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["DEPLOYED", "UNDEPLOYMENT_IN_PROGRESS", "UNDEPLOYMENT_IN_PROGRESS", "UNDEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
Pipeline.undeploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_deploy_on_deepset_cloud_sate_already_satisfied():
|
|
|
|
if MOCK_DC:
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["DEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_undeploy_on_deepset_cloud_sate_already_satisfied():
|
|
|
|
if MOCK_DC:
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["UNDEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
Pipeline.undeploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_deploy_on_deepset_cloud_failed():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/deploy",
|
|
|
|
json={"status": "DEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress and the third time undeployed
|
|
|
|
status_flow = ["UNDEPLOYED", "DEPLOYMENT_IN_PROGRESS", "UNDEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(
|
|
|
|
DeepsetCloudError, match="Deployment of pipeline config 'test_new_non_existing_pipeline' failed."
|
|
|
|
):
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_undeploy_on_deepset_cloud_failed():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/undeploy",
|
|
|
|
json={"status": "UNDEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress and the third time undeployed
|
|
|
|
status_flow = ["DEPLOYED", "UNDEPLOYMENT_IN_PROGRESS", "DEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(
|
|
|
|
DeepsetCloudError, match="Undeployment of pipeline config 'test_new_non_existing_pipeline' failed."
|
|
|
|
):
|
|
|
|
Pipeline.undeploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_deploy_on_deepset_cloud_invalid_initial_state():
|
|
|
|
if MOCK_DC:
|
|
|
|
status_flow = ["UNDEPLOYMENT_SCHEDULED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(
|
|
|
|
DeepsetCloudError,
|
|
|
|
match="Pipeline config 'test_new_non_existing_pipeline' is in invalid state 'UNDEPLOYMENT_SCHEDULED' to be transitioned to 'DEPLOYED'.",
|
|
|
|
):
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_undeploy_on_deepset_cloud_invalid_initial_state():
|
|
|
|
if MOCK_DC:
|
|
|
|
status_flow = ["DEPLOYMENT_SCHEDULED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
with pytest.raises(
|
|
|
|
DeepsetCloudError,
|
|
|
|
match="Pipeline config 'test_new_non_existing_pipeline' is in invalid state 'DEPLOYMENT_SCHEDULED' to be transitioned to 'UNDEPLOYED'.",
|
|
|
|
):
|
|
|
|
Pipeline.undeploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_deploy_on_deepset_cloud_invalid_state_in_progress():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/deploy",
|
|
|
|
json={"status": "DEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["UNDEPLOYED", "UNDEPLOYMENT_IN_PROGRESS"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
with pytest.raises(
|
|
|
|
DeepsetCloudError,
|
2022-05-10 15:21:35 +02:00
|
|
|
match="Deployment of pipeline config 'test_new_non_existing_pipeline' aborted. Undeployment was requested.",
|
2022-03-10 09:49:28 +01:00
|
|
|
):
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_undeploy_on_deepset_cloud_invalid_state_in_progress():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/undeploy",
|
|
|
|
json={"status": "UNDEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["DEPLOYED", "DEPLOYMENT_IN_PROGRESS"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
with pytest.raises(
|
|
|
|
DeepsetCloudError,
|
2022-05-10 15:21:35 +02:00
|
|
|
match="Undeployment of pipeline config 'test_new_non_existing_pipeline' aborted. Deployment was requested.",
|
2022-03-10 09:49:28 +01:00
|
|
|
):
|
|
|
|
Pipeline.undeploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_deploy_on_deepset_cloud_unknown_state_in_progress():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/deploy",
|
|
|
|
json={"status": "DEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["UNDEPLOYED", "ASKDHFASJDF"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
with pytest.raises(
|
|
|
|
DeepsetCloudError,
|
|
|
|
match="Deployment of pipeline config 'test_new_non_existing_pipeline ended in unexpected status: UNKNOWN",
|
|
|
|
):
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_undeploy_on_deepset_cloud_unknown_state_in_progress():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/undeploy",
|
|
|
|
json={"status": "UNDEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["DEPLOYED", "ASKDHFASJDF"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
with pytest.raises(
|
|
|
|
DeepsetCloudError,
|
|
|
|
match="Undeployment of pipeline config 'test_new_non_existing_pipeline ended in unexpected status: UNKNOWN",
|
|
|
|
):
|
|
|
|
Pipeline.undeploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline", api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_deploy_on_deepset_cloud_timeout():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/deploy",
|
|
|
|
json={"status": "DEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["UNDEPLOYED", "DEPLOYMENT_IN_PROGRESS", "DEPLOYMENT_IN_PROGRESS", "DEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
with pytest.raises(
|
|
|
|
TimeoutError, match="Transitioning of 'test_new_non_existing_pipeline' to state 'DEPLOYED' timed out."
|
|
|
|
):
|
|
|
|
Pipeline.deploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
timeout=5,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
|
|
@responses.activate
|
|
|
|
def test_undeploy_on_deepset_cloud_timeout():
|
|
|
|
if MOCK_DC:
|
|
|
|
responses.add(
|
|
|
|
method=responses.POST,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline/undeploy",
|
|
|
|
json={"status": "UNDEPLOYMENT_SCHEDULED"},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
# status will be first undeployed, after deploy() it's in progress twice and the third time deployed
|
|
|
|
status_flow = ["DEPLOYED", "UNDEPLOYMENT_IN_PROGRESS", "UNDEPLOYMENT_IN_PROGRESS", "UNDEPLOYED"]
|
|
|
|
for status in status_flow:
|
|
|
|
responses.add(
|
|
|
|
method=responses.GET,
|
|
|
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
|
|
|
json={"status": status},
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
with pytest.raises(
|
|
|
|
TimeoutError, match="Transitioning of 'test_new_non_existing_pipeline' to state 'UNDEPLOYED' timed out."
|
|
|
|
):
|
|
|
|
Pipeline.undeploy_on_deepset_cloud(
|
|
|
|
pipeline_config_name="test_new_non_existing_pipeline",
|
|
|
|
api_endpoint=DC_API_ENDPOINT,
|
|
|
|
api_key=DC_API_KEY,
|
|
|
|
timeout=5,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
def test_graph_validation_invalid_edge():
|
2022-03-15 11:17:26 +01:00
|
|
|
docstore = MockDocumentStore()
|
|
|
|
retriever = DummyRetriever(document_store=docstore)
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="DocStore", component=docstore, inputs=["Query"])
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
with pytest.raises(PipelineConfigError, match="DocStore has only 1 outgoing edge"):
|
2022-03-15 11:17:26 +01:00
|
|
|
pipeline.add_node(name="Retriever", component=retriever, inputs=["DocStore.output_2"])
|
|
|
|
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
def test_graph_validation_non_existing_edge():
|
2022-03-15 11:17:26 +01:00
|
|
|
docstore = MockDocumentStore()
|
|
|
|
retriever = DummyRetriever(document_store=docstore)
|
2020-11-20 17:41:08 +01:00
|
|
|
pipeline = Pipeline()
|
2022-03-15 11:17:26 +01:00
|
|
|
pipeline.add_node(name="DocStore", component=docstore, inputs=["Query"])
|
2020-11-20 17:41:08 +01:00
|
|
|
|
2022-03-15 11:17:26 +01:00
|
|
|
with pytest.raises(PipelineConfigError, match="'wrong_edge_label' is not a valid edge name"):
|
|
|
|
pipeline.add_node(name="Retriever", component=retriever, inputs=["DocStore.wrong_edge_label"])
|
2020-11-20 17:41:08 +01:00
|
|
|
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
def test_graph_validation_invalid_node():
|
2022-03-15 11:17:26 +01:00
|
|
|
docstore = MockDocumentStore()
|
|
|
|
retriever = DummyRetriever(document_store=docstore)
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="DocStore", component=docstore, inputs=["Query"])
|
|
|
|
|
|
|
|
with pytest.raises(PipelineConfigError, match="Cannot find node 'InvalidNode'"):
|
|
|
|
pipeline.add_node(name="Retriever", component=retriever, inputs=["InvalidNode"])
|
|
|
|
|
2020-11-20 17:41:08 +01:00
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
def test_graph_validation_invalid_root_node():
|
2022-03-15 11:17:26 +01:00
|
|
|
docstore = MockDocumentStore()
|
|
|
|
pipeline = Pipeline()
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
with pytest.raises(PipelineConfigError, match="one single root node"):
|
2022-03-15 11:17:26 +01:00
|
|
|
pipeline.add_node(name="DocStore", component=docstore, inputs=["InvalidNode"])
|
2021-04-21 12:18:33 +02:00
|
|
|
|
2020-11-20 17:41:08 +01:00
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
def test_graph_validation_no_root_node():
|
|
|
|
docstore = MockNode()
|
|
|
|
pipeline = Pipeline()
|
|
|
|
|
|
|
|
with pytest.raises(PipelineConfigError, match="one single root node"):
|
|
|
|
pipeline.add_node(name="Node", component=docstore, inputs=[])
|
|
|
|
|
|
|
|
|
|
|
|
def test_graph_validation_two_root_nodes():
|
|
|
|
docstore = MockNode()
|
|
|
|
pipeline = Pipeline()
|
|
|
|
|
|
|
|
with pytest.raises(PipelineConfigError, match="one single root node"):
|
|
|
|
pipeline.add_node(name="Node", component=docstore, inputs=["Query", "File"])
|
|
|
|
|
|
|
|
with pytest.raises(PipelineConfigError, match="one single root node"):
|
|
|
|
pipeline.add_node(name="Node", component=docstore, inputs=["Query", "Query"])
|
|
|
|
|
|
|
|
|
|
|
|
def test_graph_validation_duplicate_node_instance():
|
|
|
|
node = MockNode()
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="node_a", component=node, inputs=["Query"])
|
|
|
|
|
|
|
|
with pytest.raises(PipelineConfigError, match="You have already added the same instance to the pipeline"):
|
|
|
|
pipeline.add_node(name="node_b", component=node, inputs=["node_a"])
|
|
|
|
|
|
|
|
|
|
|
|
def test_graph_validation_duplicate_node():
|
|
|
|
node = MockNode()
|
|
|
|
other_node = MockNode()
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="node", component=node, inputs=["Query"])
|
|
|
|
with pytest.raises(PipelineConfigError, match="'node' is already in the pipeline"):
|
|
|
|
pipeline.add_node(name="node", component=other_node, inputs=["Query"])
|
|
|
|
|
|
|
|
|
2021-03-10 18:17:23 +01:00
|
|
|
def test_parallel_paths_in_pipeline_graph():
|
|
|
|
class A(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self):
|
|
|
|
test = "A"
|
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
class B(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, test):
|
|
|
|
test += "B"
|
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
class C(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, test):
|
|
|
|
test += "C"
|
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
class D(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, test):
|
|
|
|
test += "D"
|
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
class E(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, test):
|
|
|
|
test += "E"
|
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
class JoinNode(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, inputs):
|
2022-02-03 13:43:18 +01:00
|
|
|
test = inputs[0]["test"] + inputs[1]["test"]
|
2021-09-10 11:41:16 +02:00
|
|
|
return {"test": test}, "output_1"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=A(), inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=B(), inputs=["A"])
|
|
|
|
pipeline.add_node(name="C", component=C(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="E", component=E(), inputs=["C"])
|
|
|
|
pipeline.add_node(name="D", component=D(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="F", component=JoinNode(), inputs=["D", "E"])
|
|
|
|
output = pipeline.run(query="test")
|
2021-09-10 11:41:16 +02:00
|
|
|
assert output["test"] == "ABDABCE"
|
2021-03-10 18:17:23 +01:00
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=A(), inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=B(), inputs=["A"])
|
|
|
|
pipeline.add_node(name="C", component=C(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="D", component=D(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="E", component=JoinNode(), inputs=["C", "D"])
|
|
|
|
output = pipeline.run(query="test")
|
2021-09-10 11:41:16 +02:00
|
|
|
assert output["test"] == "ABCABD"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
|
|
|
|
def test_parallel_paths_in_pipeline_graph_with_branching():
|
|
|
|
class AWithOutput1(RootNode):
|
|
|
|
outgoing_edges = 2
|
2021-06-08 15:20:13 +02:00
|
|
|
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self):
|
|
|
|
output = "A"
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class AWithOutput2(RootNode):
|
|
|
|
outgoing_edges = 2
|
2021-06-08 15:20:13 +02:00
|
|
|
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self):
|
|
|
|
output = "A"
|
|
|
|
return {"output": output}, "output_2"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class AWithOutputAll(RootNode):
|
|
|
|
outgoing_edges = 2
|
2021-06-08 15:20:13 +02:00
|
|
|
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self):
|
|
|
|
output = "A"
|
|
|
|
return {"output": output}, "output_all"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class B(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, output):
|
|
|
|
output += "B"
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class C(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, output):
|
|
|
|
output += "C"
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class D(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, output):
|
|
|
|
output += "D"
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class E(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, output):
|
|
|
|
output += "E"
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
class JoinNode(RootNode):
|
2021-09-10 11:41:16 +02:00
|
|
|
def run(self, output=None, inputs=None):
|
|
|
|
if inputs:
|
|
|
|
output = ""
|
|
|
|
for input_dict in inputs:
|
|
|
|
output += input_dict["output"]
|
|
|
|
return {"output": output}, "output_1"
|
2021-03-18 12:41:30 +01:00
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=AWithOutput1(), inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=B(), inputs=["A.output_1"])
|
|
|
|
pipeline.add_node(name="C", component=C(), inputs=["A.output_2"])
|
|
|
|
pipeline.add_node(name="D", component=E(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="E", component=D(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="F", component=JoinNode(), inputs=["D", "E", "C"])
|
|
|
|
output = pipeline.run(query="test")
|
|
|
|
assert output["output"] == "ABEABD"
|
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=AWithOutput2(), inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=B(), inputs=["A.output_1"])
|
|
|
|
pipeline.add_node(name="C", component=C(), inputs=["A.output_2"])
|
|
|
|
pipeline.add_node(name="D", component=E(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="E", component=D(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="F", component=JoinNode(), inputs=["D", "E", "C"])
|
|
|
|
output = pipeline.run(query="test")
|
|
|
|
assert output["output"] == "AC"
|
|
|
|
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=AWithOutputAll(), inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=B(), inputs=["A.output_1"])
|
|
|
|
pipeline.add_node(name="C", component=C(), inputs=["A.output_2"])
|
|
|
|
pipeline.add_node(name="D", component=E(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="E", component=D(), inputs=["B"])
|
|
|
|
pipeline.add_node(name="F", component=JoinNode(), inputs=["D", "E", "C"])
|
|
|
|
output = pipeline.run(query="test")
|
|
|
|
assert output["output"] == "ACABEABD"
|
|
|
|
|
|
|
|
|
2022-02-22 15:01:07 +01:00
|
|
|
def test_pipeline_components():
|
2022-02-22 20:33:21 +01:00
|
|
|
class Node(BaseComponent):
|
|
|
|
outgoing_edges = 1
|
|
|
|
|
2022-02-22 15:01:07 +01:00
|
|
|
def run(self):
|
|
|
|
test = "test"
|
|
|
|
return {"test": test}, "output_1"
|
|
|
|
|
|
|
|
a = Node()
|
|
|
|
b = Node()
|
|
|
|
c = Node()
|
|
|
|
d = Node()
|
|
|
|
e = Node()
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=a, inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=b, inputs=["A"])
|
|
|
|
pipeline.add_node(name="C", component=c, inputs=["B"])
|
|
|
|
pipeline.add_node(name="D", component=d, inputs=["C"])
|
|
|
|
pipeline.add_node(name="E", component=e, inputs=["D"])
|
|
|
|
assert len(pipeline.components) == 5
|
|
|
|
assert pipeline.components["A"] == a
|
|
|
|
assert pipeline.components["B"] == b
|
|
|
|
assert pipeline.components["C"] == c
|
|
|
|
assert pipeline.components["D"] == d
|
|
|
|
assert pipeline.components["E"] == e
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_get_document_store_from_components():
|
2022-03-15 11:17:26 +01:00
|
|
|
doc_store = MockDocumentStore()
|
2022-02-22 15:01:07 +01:00
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=doc_store, inputs=["File"])
|
|
|
|
|
|
|
|
assert doc_store == pipeline.get_document_store()
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_get_document_store_from_components_multiple_doc_stores():
|
2022-03-15 11:17:26 +01:00
|
|
|
doc_store_a = MockDocumentStore()
|
|
|
|
doc_store_b = MockDocumentStore()
|
2022-02-22 15:01:07 +01:00
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=doc_store_a, inputs=["File"])
|
|
|
|
pipeline.add_node(name="B", component=doc_store_b, inputs=["File"])
|
|
|
|
|
|
|
|
with pytest.raises(Exception, match="Multiple Document Stores found in Pipeline"):
|
|
|
|
pipeline.get_document_store()
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_get_document_store_from_retriever():
|
2022-03-15 11:17:26 +01:00
|
|
|
doc_store = MockDocumentStore()
|
2022-02-22 15:01:07 +01:00
|
|
|
retriever = DummyRetriever(document_store=doc_store)
|
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=retriever, inputs=["Query"])
|
|
|
|
|
|
|
|
assert doc_store == pipeline.get_document_store()
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_get_document_store_from_dual_retriever():
|
2022-03-15 11:17:26 +01:00
|
|
|
doc_store = MockDocumentStore()
|
2022-02-22 15:01:07 +01:00
|
|
|
retriever_a = DummyRetriever(document_store=doc_store)
|
|
|
|
retriever_b = DummyRetriever(document_store=doc_store)
|
2022-05-04 17:39:06 +02:00
|
|
|
join_node = JoinNode()
|
2022-02-22 15:01:07 +01:00
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=retriever_a, inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=retriever_b, inputs=["Query"])
|
2022-05-04 17:39:06 +02:00
|
|
|
pipeline.add_node(name="C", component=join_node, inputs=["A", "B"])
|
2022-02-22 15:01:07 +01:00
|
|
|
|
|
|
|
assert doc_store == pipeline.get_document_store()
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_get_document_store_multiple_doc_stores_from_dual_retriever():
|
2022-03-15 11:17:26 +01:00
|
|
|
doc_store_a = MockDocumentStore()
|
|
|
|
doc_store_b = MockDocumentStore()
|
2022-02-22 15:01:07 +01:00
|
|
|
retriever_a = DummyRetriever(document_store=doc_store_a)
|
|
|
|
retriever_b = DummyRetriever(document_store=doc_store_b)
|
2022-05-04 17:39:06 +02:00
|
|
|
join_node = JoinNode()
|
2022-02-22 15:01:07 +01:00
|
|
|
pipeline = Pipeline()
|
|
|
|
pipeline.add_node(name="A", component=retriever_a, inputs=["Query"])
|
|
|
|
pipeline.add_node(name="B", component=retriever_b, inputs=["Query"])
|
2022-05-04 17:39:06 +02:00
|
|
|
pipeline.add_node(name="C", component=join_node, inputs=["A", "B"])
|
2022-02-22 15:01:07 +01:00
|
|
|
|
|
|
|
with pytest.raises(Exception, match="Multiple Document Stores found in Pipeline"):
|
|
|
|
pipeline.get_document_store()
|
|
|
|
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
#
|
|
|
|
# RouteDocuments tests
|
|
|
|
#
|
2021-11-11 11:02:22 +01:00
|
|
|
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
def test_routedocuments_by_content_type():
|
2022-03-01 17:42:11 +01:00
|
|
|
docs = [
|
|
|
|
Document(content="text document", content_type="text"),
|
|
|
|
Document(
|
|
|
|
content=pd.DataFrame(columns=["col 1", "col 2"], data=[["row 1", "row 1"], ["row 2", "row 2"]]),
|
|
|
|
content_type="table",
|
|
|
|
),
|
|
|
|
]
|
|
|
|
route_documents = RouteDocuments()
|
|
|
|
result, _ = route_documents.run(documents=docs)
|
|
|
|
assert len(result["output_1"]) == 1
|
|
|
|
assert len(result["output_2"]) == 1
|
|
|
|
assert result["output_1"][0].content_type == "text"
|
|
|
|
assert result["output_2"][0].content_type == "table"
|
|
|
|
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
def test_routedocuments_by_metafield(test_docs_xs):
|
2022-03-01 17:42:11 +01:00
|
|
|
docs = [Document.from_dict(doc) if isinstance(doc, dict) else doc for doc in test_docs_xs]
|
|
|
|
route_documents = RouteDocuments(split_by="meta_field", metadata_values=["test1", "test3", "test5"])
|
|
|
|
result, _ = route_documents.run(docs)
|
|
|
|
assert len(result["output_1"]) == 1
|
|
|
|
assert len(result["output_2"]) == 1
|
|
|
|
assert len(result["output_3"]) == 1
|
|
|
|
assert result["output_1"][0].meta["meta_field"] == "test1"
|
|
|
|
assert result["output_2"][0].meta["meta_field"] == "test3"
|
|
|
|
assert result["output_3"][0].meta["meta_field"] == "test5"
|
|
|
|
|
|
|
|
|
2022-05-04 17:39:06 +02:00
|
|
|
#
|
|
|
|
# JoinAnswers tests
|
|
|
|
#
|
|
|
|
|
|
|
|
|
2022-03-01 17:42:11 +01:00
|
|
|
@pytest.mark.parametrize("join_mode", ["concatenate", "merge"])
|
2022-05-04 17:39:06 +02:00
|
|
|
def test_joinanswers(join_mode):
|
2022-03-01 17:42:11 +01:00
|
|
|
inputs = [{"answers": [Answer(answer="answer 1", score=0.7)]}, {"answers": [Answer(answer="answer 2", score=0.8)]}]
|
|
|
|
|
|
|
|
join_answers = JoinAnswers(join_mode=join_mode)
|
|
|
|
result, _ = join_answers.run(inputs)
|
|
|
|
assert len(result["answers"]) == 2
|
|
|
|
assert result["answers"] == sorted(result["answers"], reverse=True)
|
|
|
|
|
|
|
|
result, _ = join_answers.run(inputs, top_k_join=1)
|
|
|
|
assert len(result["answers"]) == 1
|
|
|
|
assert result["answers"][0].answer == "answer 2"
|