mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-06 12:07:04 +00:00
Warnings for write operations of DeepsetCloudDocumentStore (#2565)
* log inputs to write operations * Update Documentation & Code Style * adjust tests * simplify by using decorator for write operation functions * Update Documentation & Code Style * fix comma * fix comma in test Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
686a19b35d
commit
110b9c2b0a
@ -4195,6 +4195,16 @@ query result
|
||||
|
||||
# Module deepsetcloud
|
||||
|
||||
<a id="deepsetcloud.disable_and_log"></a>
|
||||
|
||||
#### disable\_and\_log
|
||||
|
||||
```python
|
||||
def disable_and_log(func)
|
||||
```
|
||||
|
||||
Decorator to disable write operation, shows warning and inputs instead.
|
||||
|
||||
<a id="deepsetcloud.DeepsetCloudDocumentStore"></a>
|
||||
|
||||
## DeepsetCloudDocumentStore
|
||||
@ -4522,6 +4532,7 @@ Otherwise raw similarity scores (e.g. cosine or dot_product) will be used.
|
||||
#### DeepsetCloudDocumentStore.write\_documents
|
||||
|
||||
```python
|
||||
@disable_and_log
|
||||
def write_documents(documents: Union[List[dict], List[Document]], index: Optional[str] = None, batch_size: int = 10_000, duplicate_documents: Optional[str] = None, headers: Optional[Dict[str, str]] = None)
|
||||
```
|
||||
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
from functools import wraps
|
||||
from typing import List, Optional, Union, Dict, Generator
|
||||
|
||||
import json
|
||||
@ -7,11 +8,32 @@ import numpy as np
|
||||
from haystack.document_stores import KeywordDocumentStore
|
||||
from haystack.errors import HaystackError
|
||||
from haystack.schema import Document, Label
|
||||
from haystack.utils import DeepsetCloud, DeepsetCloudError
|
||||
from haystack.utils import DeepsetCloud, DeepsetCloudError, args_to_kwargs
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def disable_and_log(func):
|
||||
"""
|
||||
Decorator to disable write operation, shows warning and inputs instead.
|
||||
"""
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(self, *args, **kwargs):
|
||||
if not self.disabled_write_warning_shown:
|
||||
logger.warning(
|
||||
"Note that DeepsetCloudDocumentStore does not support write operations. "
|
||||
"In order to verify your pipeline works correctly, each input to write operations will be logged."
|
||||
)
|
||||
self.disabled_write_warning_shown = True
|
||||
|
||||
args_as_kwargs = args_to_kwargs(args, func)
|
||||
parameters = {**args_as_kwargs, **kwargs}
|
||||
logger.info(f"Input to {func.__name__}: {parameters}")
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class DeepsetCloudDocumentStore(KeywordDocumentStore):
|
||||
def __init__(
|
||||
self,
|
||||
@ -118,6 +140,8 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
|
||||
api_key=api_key, api_endpoint=api_endpoint, workspace=workspace, evaluation_set=label_index
|
||||
)
|
||||
|
||||
self.disabled_write_warning_shown = False
|
||||
|
||||
super().__init__()
|
||||
|
||||
def get_all_documents(
|
||||
@ -560,6 +584,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
|
||||
def _create_document_field_map(self) -> Dict:
|
||||
return {}
|
||||
|
||||
@disable_and_log
|
||||
def write_documents(
|
||||
self,
|
||||
documents: Union[List[dict], List[Document]],
|
||||
@ -589,7 +614,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
|
||||
|
||||
:return: None
|
||||
"""
|
||||
raise NotImplementedError("DeepsetCloudDocumentStore currently does not support writing documents.")
|
||||
pass
|
||||
|
||||
def get_evaluation_sets(self) -> List[dict]:
|
||||
"""
|
||||
@ -630,22 +655,25 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
|
||||
"""
|
||||
return self.evaluation_set_client.get_labels_count(evaluation_set=index)
|
||||
|
||||
@disable_and_log
|
||||
def write_labels(
|
||||
self,
|
||||
labels: Union[List[Label], List[dict]],
|
||||
index: Optional[str] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
raise NotImplementedError("DeepsetCloudDocumentStore currently does not support labels.")
|
||||
pass
|
||||
|
||||
@disable_and_log
|
||||
def delete_all_documents(
|
||||
self,
|
||||
index: Optional[str] = None,
|
||||
filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
raise NotImplementedError("DeepsetCloudDocumentStore currently does not support deleting documents.")
|
||||
pass
|
||||
|
||||
@disable_and_log
|
||||
def delete_documents(
|
||||
self,
|
||||
index: Optional[str] = None,
|
||||
@ -653,8 +681,9 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
|
||||
filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
raise NotImplementedError("DeepsetCloudDocumentStore currently does not support deleting documents.")
|
||||
pass
|
||||
|
||||
@disable_and_log
|
||||
def delete_labels(
|
||||
self,
|
||||
index: Optional[str] = None,
|
||||
@ -662,7 +691,8 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
|
||||
filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
raise NotImplementedError("DeepsetCloudDocumentStore currently does not support labels.")
|
||||
pass
|
||||
|
||||
@disable_and_log
|
||||
def delete_index(self, index: str):
|
||||
raise NotImplementedError("DeepsetCloudDocumentStore currently does not support deleting indexes.")
|
||||
pass
|
||||
|
||||
@ -10,6 +10,7 @@ import logging
|
||||
from haystack.schema import Document, MultiLabel
|
||||
from haystack.errors import PipelineSchemaError
|
||||
from haystack.telemetry import send_custom_event
|
||||
from haystack.utils import args_to_kwargs
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -37,18 +38,12 @@ def exportable_to_yaml(init_func):
|
||||
# Inner classes are heavily used in tests.
|
||||
if init_func.__qualname__.endswith(f"{self.__class__.__name__}.{init_func.__name__}"):
|
||||
|
||||
# Store all the named input parameters in self._component_config
|
||||
for k, v in kwargs.items():
|
||||
# Store all the input parameters in self._component_config
|
||||
args_as_kwargs = args_to_kwargs(args, init_func)
|
||||
params = {**args_as_kwargs, **kwargs}
|
||||
for k, v in params.items():
|
||||
self._component_config["params"][k] = v
|
||||
|
||||
# Store unnamed input parameters in self._component_config too by inferring their names
|
||||
sig = inspect.signature(init_func)
|
||||
parameter_names = list(sig.parameters.keys())
|
||||
# we can be sure that the first one is always "self"
|
||||
arg_names = parameter_names[1 : 1 + len(args)]
|
||||
for arg, arg_name in zip(args, arg_names):
|
||||
self._component_config["params"][arg_name] = arg
|
||||
|
||||
return wrapper_exportable_to_yaml
|
||||
|
||||
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
from haystack.utils.reflection import args_to_kwargs
|
||||
from haystack.utils.preprocessing import convert_files_to_docs, tika_convert_files_to_docs
|
||||
from haystack.utils.import_utils import fetch_archive_from_http
|
||||
from haystack.utils.cleaning import clean_wiki_text
|
||||
|
||||
12
haystack/utils/reflection.py
Normal file
12
haystack/utils/reflection.py
Normal file
@ -0,0 +1,12 @@
|
||||
import inspect
|
||||
from typing import Any, Dict, Tuple, Callable
|
||||
|
||||
|
||||
def args_to_kwargs(args: Tuple, func: Callable) -> Dict[str, Any]:
|
||||
sig = inspect.signature(func)
|
||||
arg_names = list(sig.parameters.keys())
|
||||
# skip self and cls args for instance and class methods
|
||||
if any(arg_names) and arg_names[0] in ["self", "cls"]:
|
||||
arg_names = arg_names[1 : 1 + len(args)]
|
||||
args_as_kwargs = {arg_name: arg for arg, arg_name in zip(args, arg_names)}
|
||||
return args_as_kwargs
|
||||
@ -727,7 +727,7 @@ def test_load_from_deepset_cloud_query():
|
||||
|
||||
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
||||
@responses.activate
|
||||
def test_load_from_deepset_cloud_indexing():
|
||||
def test_load_from_deepset_cloud_indexing(caplog):
|
||||
if MOCK_DC:
|
||||
with open(SAMPLES_PATH / "dc" / "pipeline_config.json", "r") as f:
|
||||
pipeline_config_yaml_response = json.load(f)
|
||||
@ -745,10 +745,10 @@ def test_load_from_deepset_cloud_indexing():
|
||||
document_store = indexing_pipeline.get_node("DocumentStore")
|
||||
assert isinstance(document_store, DeepsetCloudDocumentStore)
|
||||
|
||||
with pytest.raises(
|
||||
Exception, match=".*NotImplementedError.*DeepsetCloudDocumentStore currently does not support writing documents"
|
||||
):
|
||||
with caplog.at_level(logging.INFO):
|
||||
indexing_pipeline.run(file_paths=[SAMPLES_PATH / "docs" / "doc_1.txt"])
|
||||
assert "Note that DeepsetCloudDocumentStore does not support write operations." in caplog.text
|
||||
assert "Input to write_documents: {" in caplog.text
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user