mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-28 03:12:54 +00:00
234 lines
8.0 KiB
Python
234 lines
8.0 KiB
Python
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
|
|
|
from haystack.core.component import Component, component
|
|
from haystack.core.serialization import default_from_dict, default_to_dict
|
|
from haystack.dataclasses import Document
|
|
from haystack.document_stores.types import DocumentStore, DuplicatePolicy
|
|
|
|
|
|
def document_store_class(
|
|
name: str,
|
|
documents: Optional[List[Document]] = None,
|
|
documents_count: Optional[int] = None,
|
|
bases: Optional[Tuple[type, ...]] = None,
|
|
extra_fields: Optional[Dict[str, Any]] = None,
|
|
) -> Type[DocumentStore]:
|
|
"""
|
|
Utility function to create a DocumentStore class with the given name and list of documents.
|
|
|
|
If `documents` is set but `documents_count` is not, `documents_count` will be the length
|
|
of `documents`.
|
|
If both are set explicitly they don't influence each other.
|
|
|
|
`write_documents()` and `delete_documents()` are no-op.
|
|
You can override them using `extra_fields`.
|
|
|
|
### Usage
|
|
|
|
Create a DocumentStore class that returns no documents:
|
|
```python
|
|
MyFakeStore = document_store_class("MyFakeComponent")
|
|
document_store = MyFakeStore()
|
|
assert document_store.documents_count() == 0
|
|
assert document_store.filter_documents() == []
|
|
```
|
|
|
|
Create a DocumentStore class that returns a single document:
|
|
```python
|
|
doc = Document(id="fake_id", content="Fake content")
|
|
MyFakeStore = document_store_class("MyFakeComponent", documents=[doc])
|
|
document_store = MyFakeStore()
|
|
assert document_store.documents_count() == 1
|
|
assert document_store.filter_documents() == [doc]
|
|
```
|
|
|
|
Create a DocumentStore class that returns no document but returns a custom count:
|
|
```python
|
|
MyFakeStore = document_store_class("MyFakeComponent", documents_count=100)
|
|
document_store = MyFakeStore()
|
|
assert document_store.documents_count() == 100
|
|
assert document_store.filter_documents() == []
|
|
```
|
|
|
|
Create a DocumentStore class that returns a document and a custom count:
|
|
```python
|
|
doc = Document(id="fake_id", content="Fake content")
|
|
MyFakeStore = document_store_class("MyFakeComponent", documents=[doc], documents_count=100)
|
|
document_store = MyFakeStore()
|
|
assert document_store.documents_count() == 100
|
|
assert document_store.filter_documents() == [doc]
|
|
```
|
|
|
|
Create a DocumentStore class with a custom base class:
|
|
```python
|
|
MyFakeStore = document_store_class(
|
|
"MyFakeStore",
|
|
bases=(MyBaseClass,)
|
|
)
|
|
document_store = MyFakeStore()
|
|
assert isinstance(store, MyBaseClass)
|
|
```
|
|
|
|
Create a DocumentStore class with an extra field `my_field`:
|
|
```python
|
|
MyFakeStore = document_store_class(
|
|
"MyFakeStore",
|
|
extra_fields={"my_field": 10}
|
|
)
|
|
document_store = MyFakeStore()
|
|
assert document_store.my_field == 10
|
|
```
|
|
"""
|
|
if documents is not None and documents_count is None:
|
|
documents_count = len(documents)
|
|
elif documents_count is None:
|
|
documents_count = 0
|
|
|
|
def count_documents(self) -> Union[int, None]:
|
|
return documents_count
|
|
|
|
def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
|
|
if documents is not None:
|
|
return documents
|
|
return []
|
|
|
|
def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.FAIL) -> None:
|
|
return
|
|
|
|
def delete_documents(self, document_ids: List[str]) -> None:
|
|
return
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return default_to_dict(self)
|
|
|
|
fields = {
|
|
"count_documents": count_documents,
|
|
"filter_documents": filter_documents,
|
|
"write_documents": write_documents,
|
|
"delete_documents": delete_documents,
|
|
"to_dict": to_dict,
|
|
"from_dict": classmethod(default_from_dict),
|
|
}
|
|
|
|
if extra_fields is not None:
|
|
fields = {**fields, **extra_fields}
|
|
|
|
if bases is None:
|
|
bases = (object,)
|
|
|
|
cls = type(name, bases, fields)
|
|
return cls
|
|
|
|
|
|
def component_class( # pylint: disable=too-many-positional-arguments
|
|
name: str,
|
|
input_types: Optional[Dict[str, Any]] = None,
|
|
output_types: Optional[Dict[str, Any]] = None,
|
|
output: Optional[Dict[str, Any]] = None,
|
|
bases: Optional[Tuple[type, ...]] = None,
|
|
extra_fields: Optional[Dict[str, Any]] = None,
|
|
) -> Type[Component]:
|
|
"""
|
|
Utility class to create a Component class with the given name and input and output types.
|
|
|
|
If `output` is set but `output_types` is not, `output_types` will be set to the types of the values in `output`.
|
|
Though if `output_types` is set but `output` is not the component's `run` method will return a dictionary
|
|
of the same keys as `output_types` all with a value of None.
|
|
|
|
### Usage
|
|
|
|
Create a component class with default input and output types:
|
|
```python
|
|
MyFakeComponent = component_class_factory("MyFakeComponent")
|
|
component = MyFakeComponent()
|
|
output = component.run(value=1)
|
|
assert output == {"value": None}
|
|
```
|
|
|
|
Create a component class with an "value" input of type `int` and with a "value" output of `10`:
|
|
```python
|
|
MyFakeComponent = component_class_factory(
|
|
"MyFakeComponent",
|
|
input_types={"value": int},
|
|
output={"value": 10}
|
|
)
|
|
component = MyFakeComponent()
|
|
output = component.run(value=1)
|
|
assert output == {"value": 10}
|
|
```
|
|
|
|
Create a component class with a custom base class:
|
|
```python
|
|
MyFakeComponent = component_class_factory(
|
|
"MyFakeComponent",
|
|
bases=(MyBaseClass,)
|
|
)
|
|
component = MyFakeComponent()
|
|
assert isinstance(component, MyBaseClass)
|
|
```
|
|
|
|
Create a component class with an extra field `my_field`:
|
|
```python
|
|
MyFakeComponent = component_class_factory(
|
|
"MyFakeComponent",
|
|
extra_fields={"my_field": 10}
|
|
)
|
|
component = MyFakeComponent()
|
|
assert component.my_field == 10
|
|
```
|
|
|
|
Args:
|
|
name: Name of the component class
|
|
input_types: Dictionary of string and type that defines the inputs of the component,
|
|
if set to None created component will expect a single input "value" of Any type.
|
|
Defaults to None.
|
|
output_types: Dictionary of string and type that defines the outputs of the component,
|
|
if set to None created component will return a single output "value" of NoneType and None value.
|
|
Defaults to None.
|
|
output: Actual output dictionary returned by the created component run,
|
|
is set to None it will return a dictionary of string and None values.
|
|
Keys will be the same as the keys of output_types. Defaults to None.
|
|
bases: Base classes for this component, if set to None only base is object. Defaults to None.
|
|
extra_fields: Extra fields for the Component, defaults to None.
|
|
|
|
:return: A class definition that can be used as a component.
|
|
"""
|
|
if input_types is None:
|
|
input_types = {"value": Any}
|
|
if output_types is None and output is not None:
|
|
output_types = {key: type(value) for key, value in output.items()}
|
|
elif output_types is None:
|
|
output_types = {"value": type(None)}
|
|
|
|
def init(self):
|
|
component.set_input_types(self, **input_types)
|
|
component.set_output_types(self, **output_types)
|
|
|
|
# Both arguments are necessary to correctly define
|
|
# run but pylint doesn't like that we don't use them.
|
|
# It's fine ignoring the warning here.
|
|
def run(self, **kwargs): # pylint: disable=unused-argument
|
|
if output is not None:
|
|
return output
|
|
return dict.fromkeys(output_types.keys())
|
|
|
|
def to_dict(self):
|
|
return default_to_dict(self)
|
|
|
|
def from_dict(cls, data: Dict[str, Any]):
|
|
return default_from_dict(cls, data)
|
|
|
|
fields = {"__init__": init, "run": run, "to_dict": to_dict, "from_dict": classmethod(from_dict)}
|
|
if extra_fields is not None:
|
|
fields = {**fields, **extra_fields}
|
|
|
|
if bases is None:
|
|
bases = (object,)
|
|
|
|
cls = type(name, bases, fields)
|
|
return component(cls)
|