mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-10-29 16:59:47 +00:00
feat: Implement function to convert legacy filters to new style (#6314)
* Implement function to convert legacy filters to new style * Reduce return statements in conversion to fix linting * Move convert function in different module * Fix typos in docstrings Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> --------- Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com>
This commit is contained in:
parent
497299c27a
commit
83c245db74
@ -292,3 +292,121 @@ def _list_conditions(conditions: Any) -> List[Any]:
|
||||
if isinstance(conditions, dict):
|
||||
return [{key: value} for key, value in conditions.items()]
|
||||
return [conditions]
|
||||
|
||||
|
||||
def convert(filters: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert a filter declared using the legacy style into the new style.
|
||||
This is mostly meant to ease migration from Haystack 1.x to 2.x for developers
|
||||
of Document Stores and Components that use filters.
|
||||
|
||||
This function doesn't verify if `filters` are declared using the legacy style.
|
||||
|
||||
Example usage:
|
||||
```python
|
||||
legacy_filter = {
|
||||
"$and": {
|
||||
"type": {"$eq": "article"},
|
||||
"date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
|
||||
"rating": {"$gte": 3},
|
||||
"$or": {"genre": {"$in": ["economy", "politics"]}, "publisher": {"$eq": "nytimes"}},
|
||||
}
|
||||
}
|
||||
assert convert(legacy_filter) == {
|
||||
"operator": "AND",
|
||||
"conditions": [
|
||||
{"field": "type", "operator": "==", "value": "article"},
|
||||
{"field": "date", "operator": ">=", "value": "2015-01-01"},
|
||||
{"field": "date", "operator": "<", "value": "2021-01-01"},
|
||||
{"field": "rating", "operator": ">=", "value": 3},
|
||||
{
|
||||
"operator": "OR",
|
||||
"conditions": [
|
||||
{"field": "genre", "operator": "in", "value": ["economy", "politics"]},
|
||||
{"field": "publisher", "operator": "==", "value": "nytimes"},
|
||||
],
|
||||
},
|
||||
],
|
||||
}
|
||||
```
|
||||
"""
|
||||
converted = _internal_convert(filters)
|
||||
if "conditions" not in converted:
|
||||
# This is done to handle a corner case when filter is really simple like so:
|
||||
# {"text": "A Foo Document 1"}
|
||||
# The root '$and' operator is implicit so the conversion doesn't handle
|
||||
# it and it must be added explicitly like so.
|
||||
# This only happens for simple filters like the one above.
|
||||
return {"operator": "AND", "conditions": [converted]}
|
||||
return converted
|
||||
|
||||
|
||||
def _internal_convert(filters: Union[List[Any], Dict[str, Any]], previous_key=None) -> Any:
|
||||
"""
|
||||
Recursively convert filters from legacy to new style.
|
||||
"""
|
||||
conditions = []
|
||||
|
||||
if isinstance(filters, list) and (result := _handle_list(filters, previous_key)) is not None:
|
||||
return result
|
||||
|
||||
if not isinstance(filters, dict):
|
||||
return _handle_non_dict(filters, previous_key)
|
||||
|
||||
for key, value in filters.items():
|
||||
if key not in ALL_OPERATORS:
|
||||
converted = _internal_convert(value, previous_key=key)
|
||||
if isinstance(converted, list):
|
||||
conditions.extend(converted)
|
||||
else:
|
||||
conditions.append(converted)
|
||||
elif key in LOGIC_OPERATORS:
|
||||
if previous_key not in ALL_OPERATORS and isinstance(value, list):
|
||||
converted = [_internal_convert({previous_key: v}) for v in value]
|
||||
conditions.append({"operator": ALL_OPERATORS[key], "conditions": converted})
|
||||
else:
|
||||
converted = _internal_convert(value, previous_key=key)
|
||||
if not isinstance(converted, list):
|
||||
converted = [converted]
|
||||
conditions.append({"operator": ALL_OPERATORS[key], "conditions": converted})
|
||||
elif key in COMPARISON_OPERATORS:
|
||||
conditions.append({"field": previous_key, "operator": ALL_OPERATORS[key], "value": value})
|
||||
|
||||
if len(conditions) == 1:
|
||||
return conditions[0]
|
||||
|
||||
if previous_key is None:
|
||||
return {"operator": "AND", "conditions": conditions}
|
||||
|
||||
return conditions
|
||||
|
||||
|
||||
def _handle_list(filters, previous_key):
|
||||
if previous_key in LOGIC_OPERATORS:
|
||||
return [_internal_convert(f) for f in filters]
|
||||
elif previous_key not in COMPARISON_OPERATORS:
|
||||
return {"field": previous_key, "operator": "in", "value": filters}
|
||||
return None
|
||||
|
||||
|
||||
def _handle_non_dict(filters, previous_key):
|
||||
if previous_key not in ALL_OPERATORS:
|
||||
return {"field": previous_key, "operator": "==", "value": filters}
|
||||
return filters
|
||||
|
||||
|
||||
# Operator mappings from legacy style to new one
|
||||
LOGIC_OPERATORS = {"$and": "AND", "$or": "OR", "$not": "NOT"}
|
||||
|
||||
COMPARISON_OPERATORS = {
|
||||
"$eq": "==",
|
||||
"$ne": "!=",
|
||||
"$gt": ">",
|
||||
"$gte": ">=",
|
||||
"$lt": "<",
|
||||
"$lte": "<=",
|
||||
"$in": "in",
|
||||
"$nin": "not in",
|
||||
}
|
||||
|
||||
ALL_OPERATORS = {**LOGIC_OPERATORS, **COMPARISON_OPERATORS}
|
||||
|
||||
42
releasenotes/notes/filters-converter-485cd24cf38407d0.yaml
Normal file
42
releasenotes/notes/filters-converter-485cd24cf38407d0.yaml
Normal file
@ -0,0 +1,42 @@
|
||||
---
|
||||
prelude: >
|
||||
Following the proposal to introduce a new way of declaring filters
|
||||
in Haystack 2.x for Document Stores and all Components that use them,
|
||||
we introduce a utility function to convert the legacy style to the new style.
|
||||
|
||||
This will make life easier for developers when implementing new Document Stores
|
||||
as it will only be necessary for filtering logic for the new style filters, as
|
||||
conversion will be completely handled by the utility function.
|
||||
|
||||
An example usage would be something similar to this:
|
||||
```python
|
||||
legacy_filter = {
|
||||
"$and": {
|
||||
"type": {"$eq": "article"},
|
||||
"date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
|
||||
"rating": {"$gte": 3},
|
||||
"$or": {"genre": {"$in": ["economy", "politics"]}, "publisher": {"$eq": "nytimes"}},
|
||||
}
|
||||
}
|
||||
assert convert(legacy_filter) == {
|
||||
"operator": "AND",
|
||||
"conditions": [
|
||||
{"field": "type", "operator": "==", "value": "article"},
|
||||
{"field": "date", "operator": ">=", "value": "2015-01-01"},
|
||||
{"field": "date", "operator": "<", "value": "2021-01-01"},
|
||||
{"field": "rating", "operator": ">=", "value": 3},
|
||||
{
|
||||
"operator": "OR",
|
||||
"conditions": [
|
||||
{"field": "genre", "operator": "in", "value": ["economy", "politics"]},
|
||||
{"field": "publisher", "operator": "==", "value": "nytimes"},
|
||||
],
|
||||
},
|
||||
],
|
||||
}
|
||||
```
|
||||
|
||||
For more information on the new filters technical specification see [proposal #6001](https://github.com/deepset-ai/haystack/blob/main/proposals/text/6001-document-store-filter-rework.md)
|
||||
preview:
|
||||
- |
|
||||
Introduce a function to convert legacy filters to the new style
|
||||
@ -4,7 +4,7 @@ import numpy as np
|
||||
|
||||
from haystack.preview import Document
|
||||
from haystack.preview.errors import FilterError
|
||||
from haystack.preview.utils.filters import document_matches_filter
|
||||
from haystack.preview.utils.filters import convert, document_matches_filter
|
||||
|
||||
|
||||
class TestFilterUtils: # pylint: disable=R0904
|
||||
@ -503,3 +503,159 @@ class TestFilterUtils: # pylint: disable=R0904
|
||||
document = Document(meta={"age": 17})
|
||||
filter = {"age": {"$not": {"$gt": 18}}}
|
||||
assert document_matches_filter(filter, document)
|
||||
|
||||
|
||||
filters_data = [
|
||||
pytest.param(
|
||||
{
|
||||
"$and": {
|
||||
"type": {"$eq": "article"},
|
||||
"date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
|
||||
"rating": {"$gte": 3},
|
||||
"$or": {"genre": {"$in": ["economy", "politics"]}, "publisher": {"$eq": "nytimes"}},
|
||||
}
|
||||
},
|
||||
{
|
||||
"operator": "AND",
|
||||
"conditions": [
|
||||
{"field": "type", "operator": "==", "value": "article"},
|
||||
{"field": "date", "operator": ">=", "value": "2015-01-01"},
|
||||
{"field": "date", "operator": "<", "value": "2021-01-01"},
|
||||
{"field": "rating", "operator": ">=", "value": 3},
|
||||
{
|
||||
"operator": "OR",
|
||||
"conditions": [
|
||||
{"field": "genre", "operator": "in", "value": ["economy", "politics"]},
|
||||
{"field": "publisher", "operator": "==", "value": "nytimes"},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
id="All operators explicit",
|
||||
),
|
||||
pytest.param(
|
||||
{
|
||||
"type": "article",
|
||||
"date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
|
||||
"rating": {"$gte": 3},
|
||||
"$or": {"genre": ["economy", "politics"], "publisher": "nytimes"},
|
||||
},
|
||||
{
|
||||
"operator": "AND",
|
||||
"conditions": [
|
||||
{"field": "type", "operator": "==", "value": "article"},
|
||||
{"field": "date", "operator": ">=", "value": "2015-01-01"},
|
||||
{"field": "date", "operator": "<", "value": "2021-01-01"},
|
||||
{"field": "rating", "operator": ">=", "value": 3},
|
||||
{
|
||||
"operator": "OR",
|
||||
"conditions": [
|
||||
{"field": "genre", "operator": "in", "value": ["economy", "politics"]},
|
||||
{"field": "publisher", "operator": "==", "value": "nytimes"},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
id="Root $and implicit",
|
||||
),
|
||||
pytest.param(
|
||||
{
|
||||
"$or": [
|
||||
{"Type": "News Paper", "Date": {"$lt": "2019-01-01"}},
|
||||
{"Type": "Blog Post", "Date": {"$gte": "2019-01-01"}},
|
||||
]
|
||||
},
|
||||
{
|
||||
"operator": "OR",
|
||||
"conditions": [
|
||||
{
|
||||
"operator": "AND",
|
||||
"conditions": [
|
||||
{"field": "Type", "operator": "==", "value": "News Paper"},
|
||||
{"field": "Date", "operator": "<", "value": "2019-01-01"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"operator": "AND",
|
||||
"conditions": [
|
||||
{"field": "Type", "operator": "==", "value": "Blog Post"},
|
||||
{"field": "Date", "operator": ">=", "value": "2019-01-01"},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
id="Root $or with list and multiple comparisons",
|
||||
),
|
||||
pytest.param(
|
||||
{"text": "A Foo Document 1"},
|
||||
{"operator": "AND", "conditions": [{"field": "text", "operator": "==", "value": "A Foo Document 1"}]},
|
||||
id="Implicit root $and and field $eq",
|
||||
),
|
||||
pytest.param(
|
||||
{"$or": {"name": {"$or": [{"$eq": "name_0"}, {"$eq": "name_1"}]}, "number": {"$lt": 1.0}}},
|
||||
{
|
||||
"operator": "OR",
|
||||
"conditions": [
|
||||
{
|
||||
"operator": "OR",
|
||||
"conditions": [
|
||||
{"field": "name", "operator": "==", "value": "name_0"},
|
||||
{"field": "name", "operator": "==", "value": "name_1"},
|
||||
],
|
||||
},
|
||||
{"field": "number", "operator": "<", "value": 1.0},
|
||||
],
|
||||
},
|
||||
id="Root $or with dict and field $or with list",
|
||||
),
|
||||
pytest.param(
|
||||
{"number": {"$lte": 2, "$gte": 0}, "name": ["name_0", "name_1"]},
|
||||
{
|
||||
"operator": "AND",
|
||||
"conditions": [
|
||||
{"field": "number", "operator": "<=", "value": 2},
|
||||
{"field": "number", "operator": ">=", "value": 0},
|
||||
{"field": "name", "operator": "in", "value": ["name_0", "name_1"]},
|
||||
],
|
||||
},
|
||||
id="Implicit $and and field $in",
|
||||
),
|
||||
pytest.param(
|
||||
{"number": {"$and": [{"$lte": 2}, {"$gte": 0}]}},
|
||||
{
|
||||
"operator": "AND",
|
||||
"conditions": [
|
||||
{"field": "number", "operator": "<=", "value": 2},
|
||||
{"field": "number", "operator": ">=", "value": 0},
|
||||
],
|
||||
},
|
||||
id="Implicit root $and and field $and with list",
|
||||
),
|
||||
pytest.param(
|
||||
{
|
||||
"$not": {
|
||||
"number": {"$lt": 1.0},
|
||||
"$and": {"name": {"$in": ["name_0", "name_1"]}, "$not": {"chapter": {"$eq": "intro"}}},
|
||||
}
|
||||
},
|
||||
{
|
||||
"operator": "NOT",
|
||||
"conditions": [
|
||||
{"field": "number", "operator": "<", "value": 1.0},
|
||||
{
|
||||
"operator": "AND",
|
||||
"conditions": [
|
||||
{"field": "name", "operator": "in", "value": ["name_0", "name_1"]},
|
||||
{"operator": "NOT", "conditions": [{"field": "chapter", "operator": "==", "value": "intro"}]},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
id="Root explicit $not",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("old_style, new_style", filters_data)
|
||||
def test_convert(old_style, new_style):
|
||||
assert convert(old_style) == new_style
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user