Warnings for write operations of DeepsetCloudDocumentStore (#2565)

* log inputs to write operations

* Update Documentation & Code Style

* adjust tests

* simplify by using decorator for write operation functions

* Update Documentation & Code Style

* fix comma

* fix comma in test

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
tstadel 2022-05-17 17:53:55 +02:00 committed by GitHub
parent 686a19b35d
commit 110b9c2b0a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 70 additions and 21 deletions

View File

@ -4195,6 +4195,16 @@ query result
# Module deepsetcloud
<a id="deepsetcloud.disable_and_log"></a>
#### disable\_and\_log
```python
def disable_and_log(func)
```
Decorator to disable write operation, shows warning and inputs instead.
<a id="deepsetcloud.DeepsetCloudDocumentStore"></a>
## DeepsetCloudDocumentStore
@ -4522,6 +4532,7 @@ Otherwise raw similarity scores (e.g. cosine or dot_product) will be used.
#### DeepsetCloudDocumentStore.write\_documents
```python
@disable_and_log
def write_documents(documents: Union[List[dict], List[Document]], index: Optional[str] = None, batch_size: int = 10_000, duplicate_documents: Optional[str] = None, headers: Optional[Dict[str, str]] = None)
```

View File

@ -1,3 +1,4 @@
from functools import wraps
from typing import List, Optional, Union, Dict, Generator
import json
@ -7,11 +8,32 @@ import numpy as np
from haystack.document_stores import KeywordDocumentStore
from haystack.errors import HaystackError
from haystack.schema import Document, Label
from haystack.utils import DeepsetCloud, DeepsetCloudError
from haystack.utils import DeepsetCloud, DeepsetCloudError, args_to_kwargs
logger = logging.getLogger(__name__)
def disable_and_log(func):
"""
Decorator to disable write operation, shows warning and inputs instead.
"""
@wraps(func)
def wrapper(self, *args, **kwargs):
if not self.disabled_write_warning_shown:
logger.warning(
"Note that DeepsetCloudDocumentStore does not support write operations. "
"In order to verify your pipeline works correctly, each input to write operations will be logged."
)
self.disabled_write_warning_shown = True
args_as_kwargs = args_to_kwargs(args, func)
parameters = {**args_as_kwargs, **kwargs}
logger.info(f"Input to {func.__name__}: {parameters}")
return wrapper
class DeepsetCloudDocumentStore(KeywordDocumentStore):
def __init__(
self,
@ -118,6 +140,8 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
api_key=api_key, api_endpoint=api_endpoint, workspace=workspace, evaluation_set=label_index
)
self.disabled_write_warning_shown = False
super().__init__()
def get_all_documents(
@ -560,6 +584,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
def _create_document_field_map(self) -> Dict:
return {}
@disable_and_log
def write_documents(
self,
documents: Union[List[dict], List[Document]],
@ -589,7 +614,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
:return: None
"""
raise NotImplementedError("DeepsetCloudDocumentStore currently does not support writing documents.")
pass
def get_evaluation_sets(self) -> List[dict]:
"""
@ -630,22 +655,25 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
"""
return self.evaluation_set_client.get_labels_count(evaluation_set=index)
@disable_and_log
def write_labels(
self,
labels: Union[List[Label], List[dict]],
index: Optional[str] = None,
headers: Optional[Dict[str, str]] = None,
):
raise NotImplementedError("DeepsetCloudDocumentStore currently does not support labels.")
pass
@disable_and_log
def delete_all_documents(
self,
index: Optional[str] = None,
filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None,
headers: Optional[Dict[str, str]] = None,
):
raise NotImplementedError("DeepsetCloudDocumentStore currently does not support deleting documents.")
pass
@disable_and_log
def delete_documents(
self,
index: Optional[str] = None,
@ -653,8 +681,9 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None,
headers: Optional[Dict[str, str]] = None,
):
raise NotImplementedError("DeepsetCloudDocumentStore currently does not support deleting documents.")
pass
@disable_and_log
def delete_labels(
self,
index: Optional[str] = None,
@ -662,7 +691,8 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None,
headers: Optional[Dict[str, str]] = None,
):
raise NotImplementedError("DeepsetCloudDocumentStore currently does not support labels.")
pass
@disable_and_log
def delete_index(self, index: str):
raise NotImplementedError("DeepsetCloudDocumentStore currently does not support deleting indexes.")
pass

View File

@ -10,6 +10,7 @@ import logging
from haystack.schema import Document, MultiLabel
from haystack.errors import PipelineSchemaError
from haystack.telemetry import send_custom_event
from haystack.utils import args_to_kwargs
logger = logging.getLogger(__name__)
@ -37,18 +38,12 @@ def exportable_to_yaml(init_func):
# Inner classes are heavily used in tests.
if init_func.__qualname__.endswith(f"{self.__class__.__name__}.{init_func.__name__}"):
# Store all the named input parameters in self._component_config
for k, v in kwargs.items():
# Store all the input parameters in self._component_config
args_as_kwargs = args_to_kwargs(args, init_func)
params = {**args_as_kwargs, **kwargs}
for k, v in params.items():
self._component_config["params"][k] = v
# Store unnamed input parameters in self._component_config too by inferring their names
sig = inspect.signature(init_func)
parameter_names = list(sig.parameters.keys())
# we can be sure that the first one is always "self"
arg_names = parameter_names[1 : 1 + len(args)]
for arg, arg_name in zip(args, arg_names):
self._component_config["params"][arg_name] = arg
return wrapper_exportable_to_yaml

View File

@ -1,3 +1,4 @@
from haystack.utils.reflection import args_to_kwargs
from haystack.utils.preprocessing import convert_files_to_docs, tika_convert_files_to_docs
from haystack.utils.import_utils import fetch_archive_from_http
from haystack.utils.cleaning import clean_wiki_text

View File

@ -0,0 +1,12 @@
import inspect
from typing import Any, Dict, Tuple, Callable
def args_to_kwargs(args: Tuple, func: Callable) -> Dict[str, Any]:
sig = inspect.signature(func)
arg_names = list(sig.parameters.keys())
# skip self and cls args for instance and class methods
if any(arg_names) and arg_names[0] in ["self", "cls"]:
arg_names = arg_names[1 : 1 + len(args)]
args_as_kwargs = {arg_name: arg for arg, arg_name in zip(args, arg_names)}
return args_as_kwargs

View File

@ -727,7 +727,7 @@ def test_load_from_deepset_cloud_query():
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
@responses.activate
def test_load_from_deepset_cloud_indexing():
def test_load_from_deepset_cloud_indexing(caplog):
if MOCK_DC:
with open(SAMPLES_PATH / "dc" / "pipeline_config.json", "r") as f:
pipeline_config_yaml_response = json.load(f)
@ -745,10 +745,10 @@ def test_load_from_deepset_cloud_indexing():
document_store = indexing_pipeline.get_node("DocumentStore")
assert isinstance(document_store, DeepsetCloudDocumentStore)
with pytest.raises(
Exception, match=".*NotImplementedError.*DeepsetCloudDocumentStore currently does not support writing documents"
):
with caplog.at_level(logging.INFO):
indexing_pipeline.run(file_paths=[SAMPLES_PATH / "docs" / "doc_1.txt"])
assert "Note that DeepsetCloudDocumentStore does not support write operations." in caplog.text
assert "Input to write_documents: {" in caplog.text
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)