mirror of
				https://github.com/deepset-ai/haystack.git
				synced 2025-10-31 17:59:27 +00:00 
			
		
		
		
	refactor: Move filter utilities (2.0) (#5797)
* Move filter utilities * PR feedback
This commit is contained in:
		
							parent
							
								
									ad5b615503
								
							
						
					
					
						commit
						1a212420b7
					
				| @ -2,10 +2,6 @@ class DocumentStoreError(Exception): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| class FilterError(DocumentStoreError): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| class DuplicateDocumentError(DocumentStoreError): | ||||
|     pass | ||||
| 
 | ||||
|  | ||||
| @ -1,4 +1,3 @@ | ||||
| from haystack.preview.document_stores.memory.document_store import MemoryDocumentStore | ||||
| from haystack.preview.document_stores.memory.errors import MemoryDocumentStoreFilterError | ||||
| 
 | ||||
| __all__ = ["MemoryDocumentStore", "MemoryDocumentStoreFilterError"] | ||||
| __all__ = ["MemoryDocumentStore"] | ||||
|  | ||||
| @ -11,7 +11,7 @@ from haystack.preview import default_from_dict, default_to_dict | ||||
| from haystack.preview.document_stores.decorator import document_store | ||||
| from haystack.preview.dataclasses import Document | ||||
| from haystack.preview.document_stores.protocols import DuplicatePolicy, DocumentStore | ||||
| from haystack.preview.document_stores.memory._filters import match | ||||
| from haystack.preview.utils.filters import document_matches_filter | ||||
| from haystack.preview.document_stores.errors import DuplicateDocumentError, MissingDocumentError, DocumentStoreError | ||||
| from haystack.preview.utils import expit | ||||
| 
 | ||||
| @ -160,7 +160,7 @@ class MemoryDocumentStore: | ||||
|         :return: A list of Documents that match the given filters. | ||||
|         """ | ||||
|         if filters: | ||||
|             return [doc for doc in self.storage.values() if match(conditions=filters, document=doc)] | ||||
|             return [doc for doc in self.storage.values() if document_matches_filter(conditions=filters, document=doc)] | ||||
|         return list(self.storage.values()) | ||||
| 
 | ||||
|     def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.FAIL) -> None: | ||||
|  | ||||
| @ -1,5 +0,0 @@ | ||||
| from haystack.preview.document_stores.errors import FilterError | ||||
| 
 | ||||
| 
 | ||||
| class MemoryDocumentStoreFilterError(FilterError): | ||||
|     pass | ||||
							
								
								
									
										2
									
								
								haystack/preview/errors.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								haystack/preview/errors.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,2 @@ | ||||
| class FilterError(Exception): | ||||
|     pass | ||||
| @ -7,7 +7,8 @@ import pandas as pd | ||||
| 
 | ||||
| from haystack.preview.dataclasses import Document | ||||
| from haystack.preview.document_stores import DocumentStore, DuplicatePolicy | ||||
| from haystack.preview.document_stores.errors import FilterError, MissingDocumentError, DuplicateDocumentError | ||||
| from haystack.preview.document_stores.errors import MissingDocumentError, DuplicateDocumentError | ||||
| from haystack.preview.errors import FilterError | ||||
| 
 | ||||
| 
 | ||||
| class DocumentStoreBaseTests: | ||||
|  | ||||
| @ -1,2 +1,3 @@ | ||||
| from haystack.preview.utils.expit import expit | ||||
| from haystack.preview.utils.requests_utils import request_with_retry | ||||
| from haystack.preview.utils.filters import document_matches_filter | ||||
|  | ||||
| @ -1,10 +1,10 @@ | ||||
| from typing import List, Any | ||||
| from typing import List, Any, Union, Dict | ||||
| 
 | ||||
| import numpy as np | ||||
| import pandas as pd | ||||
| 
 | ||||
| from haystack.preview.dataclasses import Document | ||||
| from haystack.preview.document_stores.memory.errors import MemoryDocumentStoreFilterError | ||||
| from haystack.preview.errors import FilterError | ||||
| 
 | ||||
| 
 | ||||
| GT_TYPES = (int, float, np.number) | ||||
| @ -32,10 +32,10 @@ def and_operation(conditions: List[Any], document: Document, _current_key: str): | ||||
|     :param _current_key: internal, don't use. | ||||
|     :return: True if the document matches all the filters, False otherwise | ||||
|     """ | ||||
|     for condition in conditions: | ||||
|         if not match(conditions=condition, document=document, _current_key=_current_key): | ||||
|             return False | ||||
|     return True | ||||
|     return all( | ||||
|         document_matches_filter(conditions=condition, document=document, _current_key=_current_key) | ||||
|         for condition in conditions | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| def or_operation(conditions: List[Any], document: Document, _current_key: str): | ||||
| @ -45,12 +45,12 @@ def or_operation(conditions: List[Any], document: Document, _current_key: str): | ||||
|     :param conditions: the filters dictionary. | ||||
|     :param document: the document to test. | ||||
|     :param _current_key: internal, don't use. | ||||
|     :return: True if the document matches ano of the filters, False otherwise | ||||
|     :return: True if the document matches any of the filters, False otherwise | ||||
|     """ | ||||
|     for condition in conditions: | ||||
|         if match(conditions=condition, document=document, _current_key=_current_key): | ||||
|             return True | ||||
|     return False | ||||
|     return any( | ||||
|         document_matches_filter(conditions=condition, document=document, _current_key=_current_key) | ||||
|         for condition in conditions | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| def _safe_eq(first: Any, second: Any) -> bool: | ||||
| @ -76,7 +76,7 @@ def _safe_gt(first: Any, second: Any) -> bool: | ||||
|     Works only for numerical values and dates. Strings, lists, tables and tensors all raise exceptions. | ||||
|     """ | ||||
|     if not isinstance(first, GT_TYPES) or not isinstance(second, GT_TYPES): | ||||
|         raise MemoryDocumentStoreFilterError( | ||||
|         raise FilterError( | ||||
|             f"Can't evaluate '{type(first).__name__} > {type(second).__name__}'. " | ||||
|             f"Convert these values into one of the following types: {[type_.__name__ for type_ in GT_TYPES]}" | ||||
|         ) | ||||
| @ -111,7 +111,7 @@ def in_operation(fields, field_name, value): | ||||
|         return False | ||||
| 
 | ||||
|     if not isinstance(value, IN_TYPES): | ||||
|         raise MemoryDocumentStoreFilterError("$in accepts only iterable values like lists, sets and tuples.") | ||||
|         raise FilterError("$in accepts only iterable values like lists, sets and tuples.") | ||||
| 
 | ||||
|     return any(_safe_eq(fields[field_name], v) for v in value) | ||||
| 
 | ||||
| @ -208,19 +208,22 @@ OPERATORS = { | ||||
| RESERVED_KEYS = [*LOGICAL_STATEMENTS.keys(), *OPERATORS.keys()] | ||||
| 
 | ||||
| 
 | ||||
| def match(conditions: Any, document: Document, _current_key=None): | ||||
| def document_matches_filter(conditions: Union[Dict, List], document: Document, _current_key=None): | ||||
|     """ | ||||
|     This method applies the filters to any given document and returns True when the documents | ||||
|     metadata matches the filters, False otherwise. | ||||
|     Check if a document's metadata matches the provided filter conditions. | ||||
| 
 | ||||
|     :param conditions: the filters dictionary. | ||||
|     :param document: the document to test. | ||||
|     :return: True if the document matches the filters, False otherwise | ||||
|     This function evaluates the specified conditions against the metadata of the given document | ||||
|     and returns True if the conditions are met, otherwise it returns False. | ||||
| 
 | ||||
|     :param conditions: A dictionary or list containing filter conditions to be applied to the document's metadata. | ||||
|     :param document: The document whose metadata will be evaluated against the conditions. | ||||
|     :param _current_key: internal parameter, don't use. | ||||
|     :return: True if the document's metadata matches the filter conditions, False otherwise. | ||||
|     """ | ||||
|     if isinstance(conditions, dict): | ||||
|         # Check for malformed filters, like {"name": {"year": "2020"}} | ||||
|         if _current_key and any(key not in RESERVED_KEYS for key in conditions.keys()): | ||||
|             raise MemoryDocumentStoreFilterError( | ||||
|             raise FilterError( | ||||
|                 f"This filter ({{{_current_key}: {conditions}}}) seems to be malformed. " | ||||
|                 "Comparisons between dictionaries are not currently supported. " | ||||
|                 "Check the documentation to learn more about filters syntax." | ||||
| @ -241,7 +244,7 @@ def match(conditions: Any, document: Document, _current_key=None): | ||||
|         # A comparison operator ($eq, $in, $gte, ...) | ||||
|         if field_key in OPERATORS.keys(): | ||||
|             if not _current_key: | ||||
|                 raise MemoryDocumentStoreFilterError( | ||||
|                 raise FilterError( | ||||
|                     "Filters can't start with an operator like $eq and $in. You have to specify the field name first. " | ||||
|                     "See the examples in the documentation." | ||||
|                 ) | ||||
| @ -264,9 +267,7 @@ def match(conditions: Any, document: Document, _current_key=None): | ||||
|         # The default operator for a {key: value} filter is $eq | ||||
|         return eq_operation(fields=document.flatten(), field_name=_current_key, value=conditions) | ||||
| 
 | ||||
|     raise MemoryDocumentStoreFilterError( | ||||
|         "Filters must be dictionaries or lists. See the examples in the documentation." | ||||
|     ) | ||||
|     raise FilterError("Filters must be dictionaries or lists. See the examples in the documentation.") | ||||
| 
 | ||||
| 
 | ||||
| def _list_conditions(conditions: Any) -> List[Any]: | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 bogdankostic
						bogdankostic