mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-31 11:56:35 +00:00
refactor: Move filter utilities (2.0) (#5797)
* Move filter utilities * PR feedback
This commit is contained in:
parent
ad5b615503
commit
1a212420b7
@ -2,10 +2,6 @@ class DocumentStoreError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class FilterError(DocumentStoreError):
|
||||
pass
|
||||
|
||||
|
||||
class DuplicateDocumentError(DocumentStoreError):
|
||||
pass
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
from haystack.preview.document_stores.memory.document_store import MemoryDocumentStore
|
||||
from haystack.preview.document_stores.memory.errors import MemoryDocumentStoreFilterError
|
||||
|
||||
__all__ = ["MemoryDocumentStore", "MemoryDocumentStoreFilterError"]
|
||||
__all__ = ["MemoryDocumentStore"]
|
||||
|
@ -11,7 +11,7 @@ from haystack.preview import default_from_dict, default_to_dict
|
||||
from haystack.preview.document_stores.decorator import document_store
|
||||
from haystack.preview.dataclasses import Document
|
||||
from haystack.preview.document_stores.protocols import DuplicatePolicy, DocumentStore
|
||||
from haystack.preview.document_stores.memory._filters import match
|
||||
from haystack.preview.utils.filters import document_matches_filter
|
||||
from haystack.preview.document_stores.errors import DuplicateDocumentError, MissingDocumentError, DocumentStoreError
|
||||
from haystack.preview.utils import expit
|
||||
|
||||
@ -160,7 +160,7 @@ class MemoryDocumentStore:
|
||||
:return: A list of Documents that match the given filters.
|
||||
"""
|
||||
if filters:
|
||||
return [doc for doc in self.storage.values() if match(conditions=filters, document=doc)]
|
||||
return [doc for doc in self.storage.values() if document_matches_filter(conditions=filters, document=doc)]
|
||||
return list(self.storage.values())
|
||||
|
||||
def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.FAIL) -> None:
|
||||
|
@ -1,5 +0,0 @@
|
||||
from haystack.preview.document_stores.errors import FilterError
|
||||
|
||||
|
||||
class MemoryDocumentStoreFilterError(FilterError):
|
||||
pass
|
2
haystack/preview/errors.py
Normal file
2
haystack/preview/errors.py
Normal file
@ -0,0 +1,2 @@
|
||||
class FilterError(Exception):
|
||||
pass
|
@ -7,7 +7,8 @@ import pandas as pd
|
||||
|
||||
from haystack.preview.dataclasses import Document
|
||||
from haystack.preview.document_stores import DocumentStore, DuplicatePolicy
|
||||
from haystack.preview.document_stores.errors import FilterError, MissingDocumentError, DuplicateDocumentError
|
||||
from haystack.preview.document_stores.errors import MissingDocumentError, DuplicateDocumentError
|
||||
from haystack.preview.errors import FilterError
|
||||
|
||||
|
||||
class DocumentStoreBaseTests:
|
||||
|
@ -1,2 +1,3 @@
|
||||
from haystack.preview.utils.expit import expit
|
||||
from haystack.preview.utils.requests_utils import request_with_retry
|
||||
from haystack.preview.utils.filters import document_matches_filter
|
||||
|
@ -1,10 +1,10 @@
|
||||
from typing import List, Any
|
||||
from typing import List, Any, Union, Dict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from haystack.preview.dataclasses import Document
|
||||
from haystack.preview.document_stores.memory.errors import MemoryDocumentStoreFilterError
|
||||
from haystack.preview.errors import FilterError
|
||||
|
||||
|
||||
GT_TYPES = (int, float, np.number)
|
||||
@ -32,10 +32,10 @@ def and_operation(conditions: List[Any], document: Document, _current_key: str):
|
||||
:param _current_key: internal, don't use.
|
||||
:return: True if the document matches all the filters, False otherwise
|
||||
"""
|
||||
for condition in conditions:
|
||||
if not match(conditions=condition, document=document, _current_key=_current_key):
|
||||
return False
|
||||
return True
|
||||
return all(
|
||||
document_matches_filter(conditions=condition, document=document, _current_key=_current_key)
|
||||
for condition in conditions
|
||||
)
|
||||
|
||||
|
||||
def or_operation(conditions: List[Any], document: Document, _current_key: str):
|
||||
@ -45,12 +45,12 @@ def or_operation(conditions: List[Any], document: Document, _current_key: str):
|
||||
:param conditions: the filters dictionary.
|
||||
:param document: the document to test.
|
||||
:param _current_key: internal, don't use.
|
||||
:return: True if the document matches ano of the filters, False otherwise
|
||||
:return: True if the document matches any of the filters, False otherwise
|
||||
"""
|
||||
for condition in conditions:
|
||||
if match(conditions=condition, document=document, _current_key=_current_key):
|
||||
return True
|
||||
return False
|
||||
return any(
|
||||
document_matches_filter(conditions=condition, document=document, _current_key=_current_key)
|
||||
for condition in conditions
|
||||
)
|
||||
|
||||
|
||||
def _safe_eq(first: Any, second: Any) -> bool:
|
||||
@ -76,7 +76,7 @@ def _safe_gt(first: Any, second: Any) -> bool:
|
||||
Works only for numerical values and dates. Strings, lists, tables and tensors all raise exceptions.
|
||||
"""
|
||||
if not isinstance(first, GT_TYPES) or not isinstance(second, GT_TYPES):
|
||||
raise MemoryDocumentStoreFilterError(
|
||||
raise FilterError(
|
||||
f"Can't evaluate '{type(first).__name__} > {type(second).__name__}'. "
|
||||
f"Convert these values into one of the following types: {[type_.__name__ for type_ in GT_TYPES]}"
|
||||
)
|
||||
@ -111,7 +111,7 @@ def in_operation(fields, field_name, value):
|
||||
return False
|
||||
|
||||
if not isinstance(value, IN_TYPES):
|
||||
raise MemoryDocumentStoreFilterError("$in accepts only iterable values like lists, sets and tuples.")
|
||||
raise FilterError("$in accepts only iterable values like lists, sets and tuples.")
|
||||
|
||||
return any(_safe_eq(fields[field_name], v) for v in value)
|
||||
|
||||
@ -208,19 +208,22 @@ OPERATORS = {
|
||||
RESERVED_KEYS = [*LOGICAL_STATEMENTS.keys(), *OPERATORS.keys()]
|
||||
|
||||
|
||||
def match(conditions: Any, document: Document, _current_key=None):
|
||||
def document_matches_filter(conditions: Union[Dict, List], document: Document, _current_key=None):
|
||||
"""
|
||||
This method applies the filters to any given document and returns True when the documents
|
||||
metadata matches the filters, False otherwise.
|
||||
Check if a document's metadata matches the provided filter conditions.
|
||||
|
||||
:param conditions: the filters dictionary.
|
||||
:param document: the document to test.
|
||||
:return: True if the document matches the filters, False otherwise
|
||||
This function evaluates the specified conditions against the metadata of the given document
|
||||
and returns True if the conditions are met, otherwise it returns False.
|
||||
|
||||
:param conditions: A dictionary or list containing filter conditions to be applied to the document's metadata.
|
||||
:param document: The document whose metadata will be evaluated against the conditions.
|
||||
:param _current_key: internal parameter, don't use.
|
||||
:return: True if the document's metadata matches the filter conditions, False otherwise.
|
||||
"""
|
||||
if isinstance(conditions, dict):
|
||||
# Check for malformed filters, like {"name": {"year": "2020"}}
|
||||
if _current_key and any(key not in RESERVED_KEYS for key in conditions.keys()):
|
||||
raise MemoryDocumentStoreFilterError(
|
||||
raise FilterError(
|
||||
f"This filter ({{{_current_key}: {conditions}}}) seems to be malformed. "
|
||||
"Comparisons between dictionaries are not currently supported. "
|
||||
"Check the documentation to learn more about filters syntax."
|
||||
@ -241,7 +244,7 @@ def match(conditions: Any, document: Document, _current_key=None):
|
||||
# A comparison operator ($eq, $in, $gte, ...)
|
||||
if field_key in OPERATORS.keys():
|
||||
if not _current_key:
|
||||
raise MemoryDocumentStoreFilterError(
|
||||
raise FilterError(
|
||||
"Filters can't start with an operator like $eq and $in. You have to specify the field name first. "
|
||||
"See the examples in the documentation."
|
||||
)
|
||||
@ -264,9 +267,7 @@ def match(conditions: Any, document: Document, _current_key=None):
|
||||
# The default operator for a {key: value} filter is $eq
|
||||
return eq_operation(fields=document.flatten(), field_name=_current_key, value=conditions)
|
||||
|
||||
raise MemoryDocumentStoreFilterError(
|
||||
"Filters must be dictionaries or lists. See the examples in the documentation."
|
||||
)
|
||||
raise FilterError("Filters must be dictionaries or lists. See the examples in the documentation.")
|
||||
|
||||
|
||||
def _list_conditions(conditions: Any) -> List[Any]:
|
Loading…
x
Reference in New Issue
Block a user