mirror of
				https://github.com/deepset-ai/haystack.git
				synced 2025-10-25 14:59:01 +00:00 
			
		
		
		
	refactor: Move filter utilities (2.0) (#5797)
* Move filter utilities * PR feedback
This commit is contained in:
		
							parent
							
								
									ad5b615503
								
							
						
					
					
						commit
						1a212420b7
					
				| @ -2,10 +2,6 @@ class DocumentStoreError(Exception): | |||||||
|     pass |     pass | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class FilterError(DocumentStoreError): |  | ||||||
|     pass |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class DuplicateDocumentError(DocumentStoreError): | class DuplicateDocumentError(DocumentStoreError): | ||||||
|     pass |     pass | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1,4 +1,3 @@ | |||||||
| from haystack.preview.document_stores.memory.document_store import MemoryDocumentStore | from haystack.preview.document_stores.memory.document_store import MemoryDocumentStore | ||||||
| from haystack.preview.document_stores.memory.errors import MemoryDocumentStoreFilterError |  | ||||||
| 
 | 
 | ||||||
| __all__ = ["MemoryDocumentStore", "MemoryDocumentStoreFilterError"] | __all__ = ["MemoryDocumentStore"] | ||||||
|  | |||||||
| @ -11,7 +11,7 @@ from haystack.preview import default_from_dict, default_to_dict | |||||||
| from haystack.preview.document_stores.decorator import document_store | from haystack.preview.document_stores.decorator import document_store | ||||||
| from haystack.preview.dataclasses import Document | from haystack.preview.dataclasses import Document | ||||||
| from haystack.preview.document_stores.protocols import DuplicatePolicy, DocumentStore | from haystack.preview.document_stores.protocols import DuplicatePolicy, DocumentStore | ||||||
| from haystack.preview.document_stores.memory._filters import match | from haystack.preview.utils.filters import document_matches_filter | ||||||
| from haystack.preview.document_stores.errors import DuplicateDocumentError, MissingDocumentError, DocumentStoreError | from haystack.preview.document_stores.errors import DuplicateDocumentError, MissingDocumentError, DocumentStoreError | ||||||
| from haystack.preview.utils import expit | from haystack.preview.utils import expit | ||||||
| 
 | 
 | ||||||
| @ -160,7 +160,7 @@ class MemoryDocumentStore: | |||||||
|         :return: A list of Documents that match the given filters. |         :return: A list of Documents that match the given filters. | ||||||
|         """ |         """ | ||||||
|         if filters: |         if filters: | ||||||
|             return [doc for doc in self.storage.values() if match(conditions=filters, document=doc)] |             return [doc for doc in self.storage.values() if document_matches_filter(conditions=filters, document=doc)] | ||||||
|         return list(self.storage.values()) |         return list(self.storage.values()) | ||||||
| 
 | 
 | ||||||
|     def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.FAIL) -> None: |     def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.FAIL) -> None: | ||||||
|  | |||||||
| @ -1,5 +0,0 @@ | |||||||
| from haystack.preview.document_stores.errors import FilterError |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class MemoryDocumentStoreFilterError(FilterError): |  | ||||||
|     pass |  | ||||||
							
								
								
									
										2
									
								
								haystack/preview/errors.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								haystack/preview/errors.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,2 @@ | |||||||
|  | class FilterError(Exception): | ||||||
|  |     pass | ||||||
| @ -7,7 +7,8 @@ import pandas as pd | |||||||
| 
 | 
 | ||||||
| from haystack.preview.dataclasses import Document | from haystack.preview.dataclasses import Document | ||||||
| from haystack.preview.document_stores import DocumentStore, DuplicatePolicy | from haystack.preview.document_stores import DocumentStore, DuplicatePolicy | ||||||
| from haystack.preview.document_stores.errors import FilterError, MissingDocumentError, DuplicateDocumentError | from haystack.preview.document_stores.errors import MissingDocumentError, DuplicateDocumentError | ||||||
|  | from haystack.preview.errors import FilterError | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class DocumentStoreBaseTests: | class DocumentStoreBaseTests: | ||||||
|  | |||||||
| @ -1,2 +1,3 @@ | |||||||
| from haystack.preview.utils.expit import expit | from haystack.preview.utils.expit import expit | ||||||
| from haystack.preview.utils.requests_utils import request_with_retry | from haystack.preview.utils.requests_utils import request_with_retry | ||||||
|  | from haystack.preview.utils.filters import document_matches_filter | ||||||
|  | |||||||
| @ -1,10 +1,10 @@ | |||||||
| from typing import List, Any | from typing import List, Any, Union, Dict | ||||||
| 
 | 
 | ||||||
| import numpy as np | import numpy as np | ||||||
| import pandas as pd | import pandas as pd | ||||||
| 
 | 
 | ||||||
| from haystack.preview.dataclasses import Document | from haystack.preview.dataclasses import Document | ||||||
| from haystack.preview.document_stores.memory.errors import MemoryDocumentStoreFilterError | from haystack.preview.errors import FilterError | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| GT_TYPES = (int, float, np.number) | GT_TYPES = (int, float, np.number) | ||||||
| @ -32,10 +32,10 @@ def and_operation(conditions: List[Any], document: Document, _current_key: str): | |||||||
|     :param _current_key: internal, don't use. |     :param _current_key: internal, don't use. | ||||||
|     :return: True if the document matches all the filters, False otherwise |     :return: True if the document matches all the filters, False otherwise | ||||||
|     """ |     """ | ||||||
|     for condition in conditions: |     return all( | ||||||
|         if not match(conditions=condition, document=document, _current_key=_current_key): |         document_matches_filter(conditions=condition, document=document, _current_key=_current_key) | ||||||
|             return False |         for condition in conditions | ||||||
|     return True |     ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def or_operation(conditions: List[Any], document: Document, _current_key: str): | def or_operation(conditions: List[Any], document: Document, _current_key: str): | ||||||
| @ -45,12 +45,12 @@ def or_operation(conditions: List[Any], document: Document, _current_key: str): | |||||||
|     :param conditions: the filters dictionary. |     :param conditions: the filters dictionary. | ||||||
|     :param document: the document to test. |     :param document: the document to test. | ||||||
|     :param _current_key: internal, don't use. |     :param _current_key: internal, don't use. | ||||||
|     :return: True if the document matches ano of the filters, False otherwise |     :return: True if the document matches any of the filters, False otherwise | ||||||
|     """ |     """ | ||||||
|     for condition in conditions: |     return any( | ||||||
|         if match(conditions=condition, document=document, _current_key=_current_key): |         document_matches_filter(conditions=condition, document=document, _current_key=_current_key) | ||||||
|             return True |         for condition in conditions | ||||||
|     return False |     ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _safe_eq(first: Any, second: Any) -> bool: | def _safe_eq(first: Any, second: Any) -> bool: | ||||||
| @ -76,7 +76,7 @@ def _safe_gt(first: Any, second: Any) -> bool: | |||||||
|     Works only for numerical values and dates. Strings, lists, tables and tensors all raise exceptions. |     Works only for numerical values and dates. Strings, lists, tables and tensors all raise exceptions. | ||||||
|     """ |     """ | ||||||
|     if not isinstance(first, GT_TYPES) or not isinstance(second, GT_TYPES): |     if not isinstance(first, GT_TYPES) or not isinstance(second, GT_TYPES): | ||||||
|         raise MemoryDocumentStoreFilterError( |         raise FilterError( | ||||||
|             f"Can't evaluate '{type(first).__name__} > {type(second).__name__}'. " |             f"Can't evaluate '{type(first).__name__} > {type(second).__name__}'. " | ||||||
|             f"Convert these values into one of the following types: {[type_.__name__ for type_ in GT_TYPES]}" |             f"Convert these values into one of the following types: {[type_.__name__ for type_ in GT_TYPES]}" | ||||||
|         ) |         ) | ||||||
| @ -111,7 +111,7 @@ def in_operation(fields, field_name, value): | |||||||
|         return False |         return False | ||||||
| 
 | 
 | ||||||
|     if not isinstance(value, IN_TYPES): |     if not isinstance(value, IN_TYPES): | ||||||
|         raise MemoryDocumentStoreFilterError("$in accepts only iterable values like lists, sets and tuples.") |         raise FilterError("$in accepts only iterable values like lists, sets and tuples.") | ||||||
| 
 | 
 | ||||||
|     return any(_safe_eq(fields[field_name], v) for v in value) |     return any(_safe_eq(fields[field_name], v) for v in value) | ||||||
| 
 | 
 | ||||||
| @ -208,19 +208,22 @@ OPERATORS = { | |||||||
| RESERVED_KEYS = [*LOGICAL_STATEMENTS.keys(), *OPERATORS.keys()] | RESERVED_KEYS = [*LOGICAL_STATEMENTS.keys(), *OPERATORS.keys()] | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def match(conditions: Any, document: Document, _current_key=None): | def document_matches_filter(conditions: Union[Dict, List], document: Document, _current_key=None): | ||||||
|     """ |     """ | ||||||
|     This method applies the filters to any given document and returns True when the documents |     Check if a document's metadata matches the provided filter conditions. | ||||||
|     metadata matches the filters, False otherwise. |  | ||||||
| 
 | 
 | ||||||
|     :param conditions: the filters dictionary. |     This function evaluates the specified conditions against the metadata of the given document | ||||||
|     :param document: the document to test. |     and returns True if the conditions are met, otherwise it returns False. | ||||||
|     :return: True if the document matches the filters, False otherwise | 
 | ||||||
|  |     :param conditions: A dictionary or list containing filter conditions to be applied to the document's metadata. | ||||||
|  |     :param document: The document whose metadata will be evaluated against the conditions. | ||||||
|  |     :param _current_key: internal parameter, don't use. | ||||||
|  |     :return: True if the document's metadata matches the filter conditions, False otherwise. | ||||||
|     """ |     """ | ||||||
|     if isinstance(conditions, dict): |     if isinstance(conditions, dict): | ||||||
|         # Check for malformed filters, like {"name": {"year": "2020"}} |         # Check for malformed filters, like {"name": {"year": "2020"}} | ||||||
|         if _current_key and any(key not in RESERVED_KEYS for key in conditions.keys()): |         if _current_key and any(key not in RESERVED_KEYS for key in conditions.keys()): | ||||||
|             raise MemoryDocumentStoreFilterError( |             raise FilterError( | ||||||
|                 f"This filter ({{{_current_key}: {conditions}}}) seems to be malformed. " |                 f"This filter ({{{_current_key}: {conditions}}}) seems to be malformed. " | ||||||
|                 "Comparisons between dictionaries are not currently supported. " |                 "Comparisons between dictionaries are not currently supported. " | ||||||
|                 "Check the documentation to learn more about filters syntax." |                 "Check the documentation to learn more about filters syntax." | ||||||
| @ -241,7 +244,7 @@ def match(conditions: Any, document: Document, _current_key=None): | |||||||
|         # A comparison operator ($eq, $in, $gte, ...) |         # A comparison operator ($eq, $in, $gte, ...) | ||||||
|         if field_key in OPERATORS.keys(): |         if field_key in OPERATORS.keys(): | ||||||
|             if not _current_key: |             if not _current_key: | ||||||
|                 raise MemoryDocumentStoreFilterError( |                 raise FilterError( | ||||||
|                     "Filters can't start with an operator like $eq and $in. You have to specify the field name first. " |                     "Filters can't start with an operator like $eq and $in. You have to specify the field name first. " | ||||||
|                     "See the examples in the documentation." |                     "See the examples in the documentation." | ||||||
|                 ) |                 ) | ||||||
| @ -264,9 +267,7 @@ def match(conditions: Any, document: Document, _current_key=None): | |||||||
|         # The default operator for a {key: value} filter is $eq |         # The default operator for a {key: value} filter is $eq | ||||||
|         return eq_operation(fields=document.flatten(), field_name=_current_key, value=conditions) |         return eq_operation(fields=document.flatten(), field_name=_current_key, value=conditions) | ||||||
| 
 | 
 | ||||||
|     raise MemoryDocumentStoreFilterError( |     raise FilterError("Filters must be dictionaries or lists. See the examples in the documentation.") | ||||||
|         "Filters must be dictionaries or lists. See the examples in the documentation." |  | ||||||
|     ) |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _list_conditions(conditions: Any) -> List[Any]: | def _list_conditions(conditions: Any) -> List[Any]: | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 bogdankostic
						bogdankostic