mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-31 20:03:38 +00:00
refactor: Move filter utilities (2.0) (#5797)
* Move filter utilities * PR feedback
This commit is contained in:
parent
ad5b615503
commit
1a212420b7
@ -2,10 +2,6 @@ class DocumentStoreError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class FilterError(DocumentStoreError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class DuplicateDocumentError(DocumentStoreError):
|
class DuplicateDocumentError(DocumentStoreError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
from haystack.preview.document_stores.memory.document_store import MemoryDocumentStore
|
from haystack.preview.document_stores.memory.document_store import MemoryDocumentStore
|
||||||
from haystack.preview.document_stores.memory.errors import MemoryDocumentStoreFilterError
|
|
||||||
|
|
||||||
__all__ = ["MemoryDocumentStore", "MemoryDocumentStoreFilterError"]
|
__all__ = ["MemoryDocumentStore"]
|
||||||
|
@ -11,7 +11,7 @@ from haystack.preview import default_from_dict, default_to_dict
|
|||||||
from haystack.preview.document_stores.decorator import document_store
|
from haystack.preview.document_stores.decorator import document_store
|
||||||
from haystack.preview.dataclasses import Document
|
from haystack.preview.dataclasses import Document
|
||||||
from haystack.preview.document_stores.protocols import DuplicatePolicy, DocumentStore
|
from haystack.preview.document_stores.protocols import DuplicatePolicy, DocumentStore
|
||||||
from haystack.preview.document_stores.memory._filters import match
|
from haystack.preview.utils.filters import document_matches_filter
|
||||||
from haystack.preview.document_stores.errors import DuplicateDocumentError, MissingDocumentError, DocumentStoreError
|
from haystack.preview.document_stores.errors import DuplicateDocumentError, MissingDocumentError, DocumentStoreError
|
||||||
from haystack.preview.utils import expit
|
from haystack.preview.utils import expit
|
||||||
|
|
||||||
@ -160,7 +160,7 @@ class MemoryDocumentStore:
|
|||||||
:return: A list of Documents that match the given filters.
|
:return: A list of Documents that match the given filters.
|
||||||
"""
|
"""
|
||||||
if filters:
|
if filters:
|
||||||
return [doc for doc in self.storage.values() if match(conditions=filters, document=doc)]
|
return [doc for doc in self.storage.values() if document_matches_filter(conditions=filters, document=doc)]
|
||||||
return list(self.storage.values())
|
return list(self.storage.values())
|
||||||
|
|
||||||
def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.FAIL) -> None:
|
def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.FAIL) -> None:
|
||||||
|
@ -1,5 +0,0 @@
|
|||||||
from haystack.preview.document_stores.errors import FilterError
|
|
||||||
|
|
||||||
|
|
||||||
class MemoryDocumentStoreFilterError(FilterError):
|
|
||||||
pass
|
|
2
haystack/preview/errors.py
Normal file
2
haystack/preview/errors.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
class FilterError(Exception):
|
||||||
|
pass
|
@ -7,7 +7,8 @@ import pandas as pd
|
|||||||
|
|
||||||
from haystack.preview.dataclasses import Document
|
from haystack.preview.dataclasses import Document
|
||||||
from haystack.preview.document_stores import DocumentStore, DuplicatePolicy
|
from haystack.preview.document_stores import DocumentStore, DuplicatePolicy
|
||||||
from haystack.preview.document_stores.errors import FilterError, MissingDocumentError, DuplicateDocumentError
|
from haystack.preview.document_stores.errors import MissingDocumentError, DuplicateDocumentError
|
||||||
|
from haystack.preview.errors import FilterError
|
||||||
|
|
||||||
|
|
||||||
class DocumentStoreBaseTests:
|
class DocumentStoreBaseTests:
|
||||||
|
@ -1,2 +1,3 @@
|
|||||||
from haystack.preview.utils.expit import expit
|
from haystack.preview.utils.expit import expit
|
||||||
from haystack.preview.utils.requests_utils import request_with_retry
|
from haystack.preview.utils.requests_utils import request_with_retry
|
||||||
|
from haystack.preview.utils.filters import document_matches_filter
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
from typing import List, Any
|
from typing import List, Any, Union, Dict
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from haystack.preview.dataclasses import Document
|
from haystack.preview.dataclasses import Document
|
||||||
from haystack.preview.document_stores.memory.errors import MemoryDocumentStoreFilterError
|
from haystack.preview.errors import FilterError
|
||||||
|
|
||||||
|
|
||||||
GT_TYPES = (int, float, np.number)
|
GT_TYPES = (int, float, np.number)
|
||||||
@ -32,10 +32,10 @@ def and_operation(conditions: List[Any], document: Document, _current_key: str):
|
|||||||
:param _current_key: internal, don't use.
|
:param _current_key: internal, don't use.
|
||||||
:return: True if the document matches all the filters, False otherwise
|
:return: True if the document matches all the filters, False otherwise
|
||||||
"""
|
"""
|
||||||
for condition in conditions:
|
return all(
|
||||||
if not match(conditions=condition, document=document, _current_key=_current_key):
|
document_matches_filter(conditions=condition, document=document, _current_key=_current_key)
|
||||||
return False
|
for condition in conditions
|
||||||
return True
|
)
|
||||||
|
|
||||||
|
|
||||||
def or_operation(conditions: List[Any], document: Document, _current_key: str):
|
def or_operation(conditions: List[Any], document: Document, _current_key: str):
|
||||||
@ -45,12 +45,12 @@ def or_operation(conditions: List[Any], document: Document, _current_key: str):
|
|||||||
:param conditions: the filters dictionary.
|
:param conditions: the filters dictionary.
|
||||||
:param document: the document to test.
|
:param document: the document to test.
|
||||||
:param _current_key: internal, don't use.
|
:param _current_key: internal, don't use.
|
||||||
:return: True if the document matches ano of the filters, False otherwise
|
:return: True if the document matches any of the filters, False otherwise
|
||||||
"""
|
"""
|
||||||
for condition in conditions:
|
return any(
|
||||||
if match(conditions=condition, document=document, _current_key=_current_key):
|
document_matches_filter(conditions=condition, document=document, _current_key=_current_key)
|
||||||
return True
|
for condition in conditions
|
||||||
return False
|
)
|
||||||
|
|
||||||
|
|
||||||
def _safe_eq(first: Any, second: Any) -> bool:
|
def _safe_eq(first: Any, second: Any) -> bool:
|
||||||
@ -76,7 +76,7 @@ def _safe_gt(first: Any, second: Any) -> bool:
|
|||||||
Works only for numerical values and dates. Strings, lists, tables and tensors all raise exceptions.
|
Works only for numerical values and dates. Strings, lists, tables and tensors all raise exceptions.
|
||||||
"""
|
"""
|
||||||
if not isinstance(first, GT_TYPES) or not isinstance(second, GT_TYPES):
|
if not isinstance(first, GT_TYPES) or not isinstance(second, GT_TYPES):
|
||||||
raise MemoryDocumentStoreFilterError(
|
raise FilterError(
|
||||||
f"Can't evaluate '{type(first).__name__} > {type(second).__name__}'. "
|
f"Can't evaluate '{type(first).__name__} > {type(second).__name__}'. "
|
||||||
f"Convert these values into one of the following types: {[type_.__name__ for type_ in GT_TYPES]}"
|
f"Convert these values into one of the following types: {[type_.__name__ for type_ in GT_TYPES]}"
|
||||||
)
|
)
|
||||||
@ -111,7 +111,7 @@ def in_operation(fields, field_name, value):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
if not isinstance(value, IN_TYPES):
|
if not isinstance(value, IN_TYPES):
|
||||||
raise MemoryDocumentStoreFilterError("$in accepts only iterable values like lists, sets and tuples.")
|
raise FilterError("$in accepts only iterable values like lists, sets and tuples.")
|
||||||
|
|
||||||
return any(_safe_eq(fields[field_name], v) for v in value)
|
return any(_safe_eq(fields[field_name], v) for v in value)
|
||||||
|
|
||||||
@ -208,19 +208,22 @@ OPERATORS = {
|
|||||||
RESERVED_KEYS = [*LOGICAL_STATEMENTS.keys(), *OPERATORS.keys()]
|
RESERVED_KEYS = [*LOGICAL_STATEMENTS.keys(), *OPERATORS.keys()]
|
||||||
|
|
||||||
|
|
||||||
def match(conditions: Any, document: Document, _current_key=None):
|
def document_matches_filter(conditions: Union[Dict, List], document: Document, _current_key=None):
|
||||||
"""
|
"""
|
||||||
This method applies the filters to any given document and returns True when the documents
|
Check if a document's metadata matches the provided filter conditions.
|
||||||
metadata matches the filters, False otherwise.
|
|
||||||
|
|
||||||
:param conditions: the filters dictionary.
|
This function evaluates the specified conditions against the metadata of the given document
|
||||||
:param document: the document to test.
|
and returns True if the conditions are met, otherwise it returns False.
|
||||||
:return: True if the document matches the filters, False otherwise
|
|
||||||
|
:param conditions: A dictionary or list containing filter conditions to be applied to the document's metadata.
|
||||||
|
:param document: The document whose metadata will be evaluated against the conditions.
|
||||||
|
:param _current_key: internal parameter, don't use.
|
||||||
|
:return: True if the document's metadata matches the filter conditions, False otherwise.
|
||||||
"""
|
"""
|
||||||
if isinstance(conditions, dict):
|
if isinstance(conditions, dict):
|
||||||
# Check for malformed filters, like {"name": {"year": "2020"}}
|
# Check for malformed filters, like {"name": {"year": "2020"}}
|
||||||
if _current_key and any(key not in RESERVED_KEYS for key in conditions.keys()):
|
if _current_key and any(key not in RESERVED_KEYS for key in conditions.keys()):
|
||||||
raise MemoryDocumentStoreFilterError(
|
raise FilterError(
|
||||||
f"This filter ({{{_current_key}: {conditions}}}) seems to be malformed. "
|
f"This filter ({{{_current_key}: {conditions}}}) seems to be malformed. "
|
||||||
"Comparisons between dictionaries are not currently supported. "
|
"Comparisons between dictionaries are not currently supported. "
|
||||||
"Check the documentation to learn more about filters syntax."
|
"Check the documentation to learn more about filters syntax."
|
||||||
@ -241,7 +244,7 @@ def match(conditions: Any, document: Document, _current_key=None):
|
|||||||
# A comparison operator ($eq, $in, $gte, ...)
|
# A comparison operator ($eq, $in, $gte, ...)
|
||||||
if field_key in OPERATORS.keys():
|
if field_key in OPERATORS.keys():
|
||||||
if not _current_key:
|
if not _current_key:
|
||||||
raise MemoryDocumentStoreFilterError(
|
raise FilterError(
|
||||||
"Filters can't start with an operator like $eq and $in. You have to specify the field name first. "
|
"Filters can't start with an operator like $eq and $in. You have to specify the field name first. "
|
||||||
"See the examples in the documentation."
|
"See the examples in the documentation."
|
||||||
)
|
)
|
||||||
@ -264,9 +267,7 @@ def match(conditions: Any, document: Document, _current_key=None):
|
|||||||
# The default operator for a {key: value} filter is $eq
|
# The default operator for a {key: value} filter is $eq
|
||||||
return eq_operation(fields=document.flatten(), field_name=_current_key, value=conditions)
|
return eq_operation(fields=document.flatten(), field_name=_current_key, value=conditions)
|
||||||
|
|
||||||
raise MemoryDocumentStoreFilterError(
|
raise FilterError("Filters must be dictionaries or lists. See the examples in the documentation.")
|
||||||
"Filters must be dictionaries or lists. See the examples in the documentation."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _list_conditions(conditions: Any) -> List[Any]:
|
def _list_conditions(conditions: Any) -> List[Any]:
|
Loading…
x
Reference in New Issue
Block a user