mirror of
				https://github.com/deepset-ai/haystack.git
				synced 2025-10-31 09:49:48 +00:00 
			
		
		
		
	feat: Implement function to convert legacy filters to new style (#6314)
* Implement function to convert legacy filters to new style * Reduce return statements in conversion to fix linting * Move convert function in different module * Fix typos in docstrings Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> --------- Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									497299c27a
								
							
						
					
					
						commit
						83c245db74
					
				| @ -292,3 +292,121 @@ def _list_conditions(conditions: Any) -> List[Any]: | |||||||
|     if isinstance(conditions, dict): |     if isinstance(conditions, dict): | ||||||
|         return [{key: value} for key, value in conditions.items()] |         return [{key: value} for key, value in conditions.items()] | ||||||
|     return [conditions] |     return [conditions] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def convert(filters: Dict[str, Any]) -> Dict[str, Any]: | ||||||
|  |     """ | ||||||
|  |     Convert a filter declared using the legacy style into the new style. | ||||||
|  |     This is mostly meant to ease migration from Haystack 1.x to 2.x for developers | ||||||
|  |     of Document Stores and Components that use filters. | ||||||
|  | 
 | ||||||
|  |     This function doesn't verify if `filters` are declared using the legacy style. | ||||||
|  | 
 | ||||||
|  |     Example usage: | ||||||
|  |     ```python | ||||||
|  |     legacy_filter = { | ||||||
|  |         "$and": { | ||||||
|  |             "type": {"$eq": "article"}, | ||||||
|  |             "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"}, | ||||||
|  |             "rating": {"$gte": 3}, | ||||||
|  |             "$or": {"genre": {"$in": ["economy", "politics"]}, "publisher": {"$eq": "nytimes"}}, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     assert convert(legacy_filter) == { | ||||||
|  |         "operator": "AND", | ||||||
|  |         "conditions": [ | ||||||
|  |             {"field": "type", "operator": "==", "value": "article"}, | ||||||
|  |             {"field": "date", "operator": ">=", "value": "2015-01-01"}, | ||||||
|  |             {"field": "date", "operator": "<", "value": "2021-01-01"}, | ||||||
|  |             {"field": "rating", "operator": ">=", "value": 3}, | ||||||
|  |             { | ||||||
|  |                 "operator": "OR", | ||||||
|  |                 "conditions": [ | ||||||
|  |                     {"field": "genre", "operator": "in", "value": ["economy", "politics"]}, | ||||||
|  |                     {"field": "publisher", "operator": "==", "value": "nytimes"}, | ||||||
|  |                 ], | ||||||
|  |             }, | ||||||
|  |         ], | ||||||
|  |     } | ||||||
|  |     ``` | ||||||
|  |     """ | ||||||
|  |     converted = _internal_convert(filters) | ||||||
|  |     if "conditions" not in converted: | ||||||
|  |         # This is done to handle a corner case when filter is really simple like so: | ||||||
|  |         #   {"text": "A Foo Document 1"} | ||||||
|  |         # The root '$and' operator is implicit so the conversion doesn't handle | ||||||
|  |         # it and it must be added explicitly like so. | ||||||
|  |         # This only happens for simple filters like the one above. | ||||||
|  |         return {"operator": "AND", "conditions": [converted]} | ||||||
|  |     return converted | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _internal_convert(filters: Union[List[Any], Dict[str, Any]], previous_key=None) -> Any: | ||||||
|  |     """ | ||||||
|  |     Recursively convert filters from legacy to new style. | ||||||
|  |     """ | ||||||
|  |     conditions = [] | ||||||
|  | 
 | ||||||
|  |     if isinstance(filters, list) and (result := _handle_list(filters, previous_key)) is not None: | ||||||
|  |         return result | ||||||
|  | 
 | ||||||
|  |     if not isinstance(filters, dict): | ||||||
|  |         return _handle_non_dict(filters, previous_key) | ||||||
|  | 
 | ||||||
|  |     for key, value in filters.items(): | ||||||
|  |         if key not in ALL_OPERATORS: | ||||||
|  |             converted = _internal_convert(value, previous_key=key) | ||||||
|  |             if isinstance(converted, list): | ||||||
|  |                 conditions.extend(converted) | ||||||
|  |             else: | ||||||
|  |                 conditions.append(converted) | ||||||
|  |         elif key in LOGIC_OPERATORS: | ||||||
|  |             if previous_key not in ALL_OPERATORS and isinstance(value, list): | ||||||
|  |                 converted = [_internal_convert({previous_key: v}) for v in value] | ||||||
|  |                 conditions.append({"operator": ALL_OPERATORS[key], "conditions": converted}) | ||||||
|  |             else: | ||||||
|  |                 converted = _internal_convert(value, previous_key=key) | ||||||
|  |                 if not isinstance(converted, list): | ||||||
|  |                     converted = [converted] | ||||||
|  |                 conditions.append({"operator": ALL_OPERATORS[key], "conditions": converted}) | ||||||
|  |         elif key in COMPARISON_OPERATORS: | ||||||
|  |             conditions.append({"field": previous_key, "operator": ALL_OPERATORS[key], "value": value}) | ||||||
|  | 
 | ||||||
|  |     if len(conditions) == 1: | ||||||
|  |         return conditions[0] | ||||||
|  | 
 | ||||||
|  |     if previous_key is None: | ||||||
|  |         return {"operator": "AND", "conditions": conditions} | ||||||
|  | 
 | ||||||
|  |     return conditions | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _handle_list(filters, previous_key): | ||||||
|  |     if previous_key in LOGIC_OPERATORS: | ||||||
|  |         return [_internal_convert(f) for f in filters] | ||||||
|  |     elif previous_key not in COMPARISON_OPERATORS: | ||||||
|  |         return {"field": previous_key, "operator": "in", "value": filters} | ||||||
|  |     return None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _handle_non_dict(filters, previous_key): | ||||||
|  |     if previous_key not in ALL_OPERATORS: | ||||||
|  |         return {"field": previous_key, "operator": "==", "value": filters} | ||||||
|  |     return filters | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Operator mappings from legacy style to new one | ||||||
|  | LOGIC_OPERATORS = {"$and": "AND", "$or": "OR", "$not": "NOT"} | ||||||
|  | 
 | ||||||
|  | COMPARISON_OPERATORS = { | ||||||
|  |     "$eq": "==", | ||||||
|  |     "$ne": "!=", | ||||||
|  |     "$gt": ">", | ||||||
|  |     "$gte": ">=", | ||||||
|  |     "$lt": "<", | ||||||
|  |     "$lte": "<=", | ||||||
|  |     "$in": "in", | ||||||
|  |     "$nin": "not in", | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | ALL_OPERATORS = {**LOGIC_OPERATORS, **COMPARISON_OPERATORS} | ||||||
|  | |||||||
							
								
								
									
										42
									
								
								releasenotes/notes/filters-converter-485cd24cf38407d0.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								releasenotes/notes/filters-converter-485cd24cf38407d0.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,42 @@ | |||||||
|  | --- | ||||||
|  | prelude: > | ||||||
|  |   Following the proposal to introduce a new way of declaring filters | ||||||
|  |   in Haystack 2.x for Document Stores and all Components that use them, | ||||||
|  |   we introduce a utility function to convert the legacy style to the new style. | ||||||
|  | 
 | ||||||
|  |   This will make life easier for developers when implementing new Document Stores | ||||||
|  |   as it will only be necessary for filtering logic for the new style filters, as | ||||||
|  |   conversion will be completely handled by the utility function. | ||||||
|  | 
 | ||||||
|  |   An example usage would be something similar to this: | ||||||
|  |   ```python | ||||||
|  |   legacy_filter = { | ||||||
|  |       "$and": { | ||||||
|  |           "type": {"$eq": "article"}, | ||||||
|  |           "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"}, | ||||||
|  |           "rating": {"$gte": 3}, | ||||||
|  |           "$or": {"genre": {"$in": ["economy", "politics"]}, "publisher": {"$eq": "nytimes"}}, | ||||||
|  |       } | ||||||
|  |   } | ||||||
|  |   assert convert(legacy_filter) == { | ||||||
|  |       "operator": "AND", | ||||||
|  |       "conditions": [ | ||||||
|  |           {"field": "type", "operator": "==", "value": "article"}, | ||||||
|  |           {"field": "date", "operator": ">=", "value": "2015-01-01"}, | ||||||
|  |           {"field": "date", "operator": "<", "value": "2021-01-01"}, | ||||||
|  |           {"field": "rating", "operator": ">=", "value": 3}, | ||||||
|  |           { | ||||||
|  |               "operator": "OR", | ||||||
|  |               "conditions": [ | ||||||
|  |                   {"field": "genre", "operator": "in", "value": ["economy", "politics"]}, | ||||||
|  |                   {"field": "publisher", "operator": "==", "value": "nytimes"}, | ||||||
|  |               ], | ||||||
|  |           }, | ||||||
|  |       ], | ||||||
|  |   } | ||||||
|  |   ``` | ||||||
|  | 
 | ||||||
|  |   For more information on the new filters technical specification see [proposal #6001](https://github.com/deepset-ai/haystack/blob/main/proposals/text/6001-document-store-filter-rework.md) | ||||||
|  | preview: | ||||||
|  |   - | | ||||||
|  |     Introduce a function to convert legacy filters to the new style | ||||||
| @ -4,7 +4,7 @@ import numpy as np | |||||||
| 
 | 
 | ||||||
| from haystack.preview import Document | from haystack.preview import Document | ||||||
| from haystack.preview.errors import FilterError | from haystack.preview.errors import FilterError | ||||||
| from haystack.preview.utils.filters import document_matches_filter | from haystack.preview.utils.filters import convert, document_matches_filter | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class TestFilterUtils:  # pylint: disable=R0904 | class TestFilterUtils:  # pylint: disable=R0904 | ||||||
| @ -503,3 +503,159 @@ class TestFilterUtils:  # pylint: disable=R0904 | |||||||
|         document = Document(meta={"age": 17}) |         document = Document(meta={"age": 17}) | ||||||
|         filter = {"age": {"$not": {"$gt": 18}}} |         filter = {"age": {"$not": {"$gt": 18}}} | ||||||
|         assert document_matches_filter(filter, document) |         assert document_matches_filter(filter, document) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | filters_data = [ | ||||||
|  |     pytest.param( | ||||||
|  |         { | ||||||
|  |             "$and": { | ||||||
|  |                 "type": {"$eq": "article"}, | ||||||
|  |                 "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"}, | ||||||
|  |                 "rating": {"$gte": 3}, | ||||||
|  |                 "$or": {"genre": {"$in": ["economy", "politics"]}, "publisher": {"$eq": "nytimes"}}, | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             "operator": "AND", | ||||||
|  |             "conditions": [ | ||||||
|  |                 {"field": "type", "operator": "==", "value": "article"}, | ||||||
|  |                 {"field": "date", "operator": ">=", "value": "2015-01-01"}, | ||||||
|  |                 {"field": "date", "operator": "<", "value": "2021-01-01"}, | ||||||
|  |                 {"field": "rating", "operator": ">=", "value": 3}, | ||||||
|  |                 { | ||||||
|  |                     "operator": "OR", | ||||||
|  |                     "conditions": [ | ||||||
|  |                         {"field": "genre", "operator": "in", "value": ["economy", "politics"]}, | ||||||
|  |                         {"field": "publisher", "operator": "==", "value": "nytimes"}, | ||||||
|  |                     ], | ||||||
|  |                 }, | ||||||
|  |             ], | ||||||
|  |         }, | ||||||
|  |         id="All operators explicit", | ||||||
|  |     ), | ||||||
|  |     pytest.param( | ||||||
|  |         { | ||||||
|  |             "type": "article", | ||||||
|  |             "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"}, | ||||||
|  |             "rating": {"$gte": 3}, | ||||||
|  |             "$or": {"genre": ["economy", "politics"], "publisher": "nytimes"}, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             "operator": "AND", | ||||||
|  |             "conditions": [ | ||||||
|  |                 {"field": "type", "operator": "==", "value": "article"}, | ||||||
|  |                 {"field": "date", "operator": ">=", "value": "2015-01-01"}, | ||||||
|  |                 {"field": "date", "operator": "<", "value": "2021-01-01"}, | ||||||
|  |                 {"field": "rating", "operator": ">=", "value": 3}, | ||||||
|  |                 { | ||||||
|  |                     "operator": "OR", | ||||||
|  |                     "conditions": [ | ||||||
|  |                         {"field": "genre", "operator": "in", "value": ["economy", "politics"]}, | ||||||
|  |                         {"field": "publisher", "operator": "==", "value": "nytimes"}, | ||||||
|  |                     ], | ||||||
|  |                 }, | ||||||
|  |             ], | ||||||
|  |         }, | ||||||
|  |         id="Root $and implicit", | ||||||
|  |     ), | ||||||
|  |     pytest.param( | ||||||
|  |         { | ||||||
|  |             "$or": [ | ||||||
|  |                 {"Type": "News Paper", "Date": {"$lt": "2019-01-01"}}, | ||||||
|  |                 {"Type": "Blog Post", "Date": {"$gte": "2019-01-01"}}, | ||||||
|  |             ] | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             "operator": "OR", | ||||||
|  |             "conditions": [ | ||||||
|  |                 { | ||||||
|  |                     "operator": "AND", | ||||||
|  |                     "conditions": [ | ||||||
|  |                         {"field": "Type", "operator": "==", "value": "News Paper"}, | ||||||
|  |                         {"field": "Date", "operator": "<", "value": "2019-01-01"}, | ||||||
|  |                     ], | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "operator": "AND", | ||||||
|  |                     "conditions": [ | ||||||
|  |                         {"field": "Type", "operator": "==", "value": "Blog Post"}, | ||||||
|  |                         {"field": "Date", "operator": ">=", "value": "2019-01-01"}, | ||||||
|  |                     ], | ||||||
|  |                 }, | ||||||
|  |             ], | ||||||
|  |         }, | ||||||
|  |         id="Root $or with list and multiple comparisons", | ||||||
|  |     ), | ||||||
|  |     pytest.param( | ||||||
|  |         {"text": "A Foo Document 1"}, | ||||||
|  |         {"operator": "AND", "conditions": [{"field": "text", "operator": "==", "value": "A Foo Document 1"}]}, | ||||||
|  |         id="Implicit root $and and field $eq", | ||||||
|  |     ), | ||||||
|  |     pytest.param( | ||||||
|  |         {"$or": {"name": {"$or": [{"$eq": "name_0"}, {"$eq": "name_1"}]}, "number": {"$lt": 1.0}}}, | ||||||
|  |         { | ||||||
|  |             "operator": "OR", | ||||||
|  |             "conditions": [ | ||||||
|  |                 { | ||||||
|  |                     "operator": "OR", | ||||||
|  |                     "conditions": [ | ||||||
|  |                         {"field": "name", "operator": "==", "value": "name_0"}, | ||||||
|  |                         {"field": "name", "operator": "==", "value": "name_1"}, | ||||||
|  |                     ], | ||||||
|  |                 }, | ||||||
|  |                 {"field": "number", "operator": "<", "value": 1.0}, | ||||||
|  |             ], | ||||||
|  |         }, | ||||||
|  |         id="Root $or with dict and field $or with list", | ||||||
|  |     ), | ||||||
|  |     pytest.param( | ||||||
|  |         {"number": {"$lte": 2, "$gte": 0}, "name": ["name_0", "name_1"]}, | ||||||
|  |         { | ||||||
|  |             "operator": "AND", | ||||||
|  |             "conditions": [ | ||||||
|  |                 {"field": "number", "operator": "<=", "value": 2}, | ||||||
|  |                 {"field": "number", "operator": ">=", "value": 0}, | ||||||
|  |                 {"field": "name", "operator": "in", "value": ["name_0", "name_1"]}, | ||||||
|  |             ], | ||||||
|  |         }, | ||||||
|  |         id="Implicit $and and field $in", | ||||||
|  |     ), | ||||||
|  |     pytest.param( | ||||||
|  |         {"number": {"$and": [{"$lte": 2}, {"$gte": 0}]}}, | ||||||
|  |         { | ||||||
|  |             "operator": "AND", | ||||||
|  |             "conditions": [ | ||||||
|  |                 {"field": "number", "operator": "<=", "value": 2}, | ||||||
|  |                 {"field": "number", "operator": ">=", "value": 0}, | ||||||
|  |             ], | ||||||
|  |         }, | ||||||
|  |         id="Implicit root $and and field $and with list", | ||||||
|  |     ), | ||||||
|  |     pytest.param( | ||||||
|  |         { | ||||||
|  |             "$not": { | ||||||
|  |                 "number": {"$lt": 1.0}, | ||||||
|  |                 "$and": {"name": {"$in": ["name_0", "name_1"]}, "$not": {"chapter": {"$eq": "intro"}}}, | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             "operator": "NOT", | ||||||
|  |             "conditions": [ | ||||||
|  |                 {"field": "number", "operator": "<", "value": 1.0}, | ||||||
|  |                 { | ||||||
|  |                     "operator": "AND", | ||||||
|  |                     "conditions": [ | ||||||
|  |                         {"field": "name", "operator": "in", "value": ["name_0", "name_1"]}, | ||||||
|  |                         {"operator": "NOT", "conditions": [{"field": "chapter", "operator": "==", "value": "intro"}]}, | ||||||
|  |                     ], | ||||||
|  |                 }, | ||||||
|  |             ], | ||||||
|  |         }, | ||||||
|  |         id="Root explicit $not", | ||||||
|  |     ), | ||||||
|  | ] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @pytest.mark.parametrize("old_style, new_style", filters_data) | ||||||
|  | def test_convert(old_style, new_style): | ||||||
|  |     assert convert(old_style) == new_style | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Silvano Cerza
						Silvano Cerza