mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-25 17:08:35 +00:00
save_to_deepset_cloud: automatically convert document stores (#2283)
* automatically convert to DeepsetCloudDocumentStore * shorten info text. * fix typo * the -> this * add test * ensure request body has only DeepsetCloudDocumentStores * mark test as elasticsearch to fix milvus1 ci
This commit is contained in:
parent
ccef3effd9
commit
4b46f2047b
@ -373,7 +373,18 @@ class BasePipeline:
|
||||
pipelines = query_config["pipelines"] + index_config["pipelines"]
|
||||
all_components = query_config["components"] + index_config["components"]
|
||||
distinct_components = [c for c in {component["name"]: component for component in all_components}.values()]
|
||||
config = {"components": distinct_components, "pipelines": pipelines, "version": "0.9"}
|
||||
document_stores = [c for c in distinct_components if c["type"].endswith("DocumentStore")]
|
||||
for document_store in document_stores:
|
||||
if document_store["type"] != "DeepsetCloudDocumentStore":
|
||||
logger.info(
|
||||
f"In order to be used on Deepset Cloud, component '{document_store['name']}' of type '{document_store['type']}' "
|
||||
f"has been automatically converted to type DeepsetCloudDocumentStore. "
|
||||
f"Usually this replacement will result in equivalent pipeline quality. "
|
||||
f"However depending on chosen settings of '{document_store['name']}' differences might occur."
|
||||
)
|
||||
document_store["type"] = "DeepsetCloudDocumentStore"
|
||||
document_store["params"] = {}
|
||||
config = {"components": distinct_components, "pipelines": pipelines, "version": __version__}
|
||||
|
||||
client = DeepsetCloud.get_pipeline_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace)
|
||||
pipeline_config_info = client.get_pipeline_config_info(pipeline_config_name=pipeline_config_name)
|
||||
|
@ -2,11 +2,14 @@ from pathlib import Path
|
||||
|
||||
import os
|
||||
import json
|
||||
from typing import Tuple
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from requests import PreparedRequest
|
||||
import responses
|
||||
import yaml
|
||||
|
||||
from haystack import __version__, Document, Answer, JoinAnswers
|
||||
from haystack.document_stores.base import BaseDocumentStore
|
||||
@ -19,7 +22,7 @@ from haystack.nodes.retriever.sparse import ElasticsearchRetriever
|
||||
from haystack.pipelines import Pipeline, DocumentSearchPipeline, RootNode, ExtractiveQAPipeline
|
||||
from haystack.pipelines.config import _validate_user_input, validate_config
|
||||
from haystack.pipelines.utils import generate_code
|
||||
from haystack.nodes import DensePassageRetriever, EmbeddingRetriever, RouteDocuments
|
||||
from haystack.nodes import DensePassageRetriever, EmbeddingRetriever, RouteDocuments, PreProcessor, TextConverter
|
||||
|
||||
from conftest import MOCK_DC, DC_API_ENDPOINT, DC_API_KEY, DC_TEST_INDEX, SAMPLES_PATH, deepset_cloud_fixture
|
||||
|
||||
@ -746,6 +749,64 @@ def test_save_to_deepset_cloud():
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
||||
@responses.activate
|
||||
def test_save_nonexisting_pipeline_to_deepset_cloud():
|
||||
if MOCK_DC:
|
||||
|
||||
def dc_document_store_matcher(request: PreparedRequest) -> Tuple[bool, str]:
|
||||
matches = False
|
||||
reason = "No DeepsetCloudDocumentStore found."
|
||||
request_body = request.body or ""
|
||||
json_body = yaml.safe_load(request_body)
|
||||
components = json_body["components"]
|
||||
for component in components:
|
||||
if component["type"].endswith("DocumentStore"):
|
||||
if component["type"] == "DeepsetCloudDocumentStore":
|
||||
matches = True
|
||||
else:
|
||||
matches = False
|
||||
reason = f"Component {component['name']} is of type {component['type']} and not DeepsetCloudDocumentStore"
|
||||
break
|
||||
return matches, reason
|
||||
|
||||
responses.add(
|
||||
method=responses.GET,
|
||||
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines/test_new_non_existing_pipeline",
|
||||
json={"errors": ["Pipeline with the name test_pipeline_config_copy does not exists."]},
|
||||
status=404,
|
||||
)
|
||||
|
||||
responses.add(
|
||||
method=responses.POST,
|
||||
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines",
|
||||
json={"name": "test_new_non_existing_pipeline"},
|
||||
status=201,
|
||||
match=[dc_document_store_matcher],
|
||||
)
|
||||
|
||||
es_document_store = ElasticsearchDocumentStore()
|
||||
es_retriever = ElasticsearchRetriever(document_store=es_document_store)
|
||||
file_converter = TextConverter()
|
||||
preprocessor = PreProcessor()
|
||||
|
||||
query_pipeline = Pipeline()
|
||||
query_pipeline.add_node(component=es_retriever, name="Retriever", inputs=["Query"])
|
||||
index_pipeline = Pipeline()
|
||||
index_pipeline.add_node(component=file_converter, name="FileConverter", inputs=["File"])
|
||||
index_pipeline.add_node(component=preprocessor, name="Preprocessor", inputs=["FileConverter"])
|
||||
index_pipeline.add_node(component=es_document_store, name="DocumentStore", inputs=["Preprocessor"])
|
||||
|
||||
Pipeline.save_to_deepset_cloud(
|
||||
query_pipeline=query_pipeline,
|
||||
index_pipeline=index_pipeline,
|
||||
pipeline_config_name="test_new_non_existing_pipeline",
|
||||
api_endpoint=DC_API_ENDPOINT,
|
||||
api_key=DC_API_KEY,
|
||||
)
|
||||
|
||||
|
||||
# @pytest.mark.slow
|
||||
# @pytest.mark.elasticsearch
|
||||
# @pytest.mark.parametrize(
|
||||
|
Loading…
x
Reference in New Issue
Block a user