refactor: Add support for new filters declaration (#6397)

* Rework filter logic for InMemoryDocumentStore to support new filters declaration * Fix legacy filters tests * Simplify logic and handle dates comparison * Rework MetadataRouter to support new filters * Update docstrings * Add release notes * Fix linting * Avoid duplicating filters specifications * Handle corner case * Simplify docstring * Fix filters logic and tests * Fix Document Store testing legacy filters tests
2025-12-24 05:28:42 +00:00 · 2023-11-24 11:22:46 +01:00 · 2023-11-24 11:22:46 +01:00 · fd16ec63cb
commit fd16ec63cb
parent 28c2b09d90
10 changed files with 883 additions and 890 deletions
--- a/e2e/preview/pipelines/test_preprocessing_pipeline.py
+++ b/e2e/preview/pipelines/test_preprocessing_pipeline.py
@ -18,7 +18,7 @@ def test_preprocessing_pipeline(tmp_path):
    preprocessing_pipeline.add_component(instance=TextFileToDocument(), name="text_file_converter")
    preprocessing_pipeline.add_component(instance=DocumentLanguageClassifier(), name="language_classifier")
    preprocessing_pipeline.add_component(
-        instance=MetadataRouter(rules={"en": {"language": {"$eq": "en"}}}), name="router"
+        instance=MetadataRouter(rules={"en": {"field": "language", "operator": "==", "value": "en"}}), name="router"
    )
    preprocessing_pipeline.add_component(instance=DocumentCleaner(), name="cleaner")
    preprocessing_pipeline.add_component(
--- a/haystack/preview/components/routers/metadata_router.py
+++ b/haystack/preview/components/routers/metadata_router.py
@ -1,7 +1,7 @@
 from typing import Dict, List

 from haystack.preview import component, Document
-from haystack.preview.utils.filters import document_matches_filter
+from haystack.preview.utils.filters import document_matches_filter, convert


@component
@ -19,12 +19,36 @@ class MetadataRouter:
                      follow the format of filtering expressions in Haystack. For example:
                      ```python
                      {
-                            "edge_1": {"created_at": {"$gte": "2023-01-01", "$lt": "2023-04-01"}},
-                            "edge_2": {"created_at": {"$gte": "2023-04-01", "$lt": "2023-07-01"}},
-                            "edge_3": {"created_at": {"$gte": "2023-07-01", "$lt": "2023-10-01"}},
-                            "edge_4": {"created_at": {"$gte": "2023-10-01", "$lt": "2024-01-01"}},
-                      }
-                      ```
+                        "edge_1": {
+                            "operator": "AND",
+                            "conditions": [
+                                {"field": "meta.created_at", "operator": ">=", "value": "2023-01-01"},
+                                {"field": "meta.created_at", "operator": "<", "value": "2023-04-01"},
+                            ],
+                        },
+                        "edge_2": {
+                            "operator": "AND",
+                            "conditions": [
+                                {"field": "meta.created_at", "operator": ">=", "value": "2023-04-01"},
+                                {"field": "meta.created_at", "operator": "<", "value": "2023-07-01"},
+                            ],
+                        },
+                        "edge_3": {
+                            "operator": "AND",
+                            "conditions": [
+                                {"field": "meta.created_at", "operator": ">=", "value": "2023-07-01"},
+                                {"field": "meta.created_at", "operator": "<", "value": "2023-10-01"},
+                            ],
+                        },
+                        "edge_4": {
+                            "operator": "AND",
+                            "conditions": [
+                                {"field": "meta.created_at", "operator": ">=", "value": "2023-10-01"},
+                                {"field": "meta.created_at", "operator": "<", "value": "2024-01-01"},
+                            ],
+                        },
+                    }
+                    ```
        """
        self.rules = rules
        component.set_output_types(self, unmatched=List[Document], **{edge: List[Document] for edge in rules})
@ -43,6 +67,9 @@ class MetadataRouter:
        for document in documents:
            cur_document_matched = False
            for edge, rule in self.rules.items():
+                if "operator" not in rule:
+                    # Must be a legacy filter, convert it
+                    rule = convert(rule)
                if document_matches_filter(rule, document):
                    output[edge].append(document)
                    cur_document_matched = True
--- a/haystack/preview/document_stores/in_memory/document_store.py
+++ b/haystack/preview/document_stores/in_memory/document_store.py
@ -11,7 +11,7 @@ from haystack.preview import default_from_dict, default_to_dict
 from haystack.preview.document_stores.decorator import document_store
 from haystack.preview.dataclasses import Document
 from haystack.preview.document_stores.protocols import DuplicatePolicy
-from haystack.preview.utils.filters import document_matches_filter
+from haystack.preview.utils.filters import document_matches_filter, convert
 from haystack.preview.document_stores.errors import DuplicateDocumentError, DocumentStoreError
 from haystack.preview.utils import expit

@ -92,75 +92,15 @@ class InMemoryDocumentStore:
        """
        Returns the documents that match the filters provided.

-        Filters are defined as nested dictionaries. The keys of the dictionaries can be a logical operator (`"$and"`,
-        `"$or"`, `"$not"`), a comparison operator (`"$eq"`, `$ne`, `"$in"`, `$nin`, `"$gt"`, `"$gte"`, `"$lt"`,
-        `"$lte"`) or a metadata field name.
-
-        Logical operator keys take a dictionary of metadata field names and/or logical operators as value. Metadata
-        field names take a dictionary of comparison operators as value. Comparison operator keys take a single value or
-        (in case of `"$in"`) a list of values as value. If no logical operator is provided, `"$and"` is used as default
-        operation. If no comparison operator is provided, `"$eq"` (or `"$in"` if the comparison value is a list) is used
-        as default operation.
-
-        Example:
-
-        ```python
-        filters = {
-            "$and": {
-                "type": {"$eq": "article"},
-                "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
-                "rating": {"$gte": 3},
-                "$or": {
-                    "genre": {"$in": ["economy", "politics"]},
-                    "publisher": {"$eq": "nytimes"}
-                }
-            }
-        }
-        # or simpler using default operators
-        filters = {
-            "type": "article",
-            "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
-            "rating": {"$gte": 3},
-            "$or": {
-                "genre": ["economy", "politics"],
-                "publisher": "nytimes"
-            }
-        }
-        ```
-
-        To use the same logical operator multiple times on the same level, logical operators can take a list of
-        dictionaries as value.
-
-        Example:
-
-        ```python
-        filters = {
-            "$or": [
-                {
-                    "$and": {
-                        "Type": "News Paper",
-                        "Date": {
-                            "$lt": "2019-01-01"
-                        }
-                    }
-                },
-                {
-                    "$and": {
-                        "Type": "Blog Post",
-                        "Date": {
-                            "$gte": "2019-01-01"
-                        }
-                    }
-                }
-            ]
-        }
-        ```
+        For a detailed specification of the filters, refer to the DocumentStore.filter_documents() protocol documentation.

        :param filters: The filters to apply to the document list.
        :return: A list of Documents that match the given filters.
        """
        if filters:
-            return [doc for doc in self.storage.values() if document_matches_filter(conditions=filters, document=doc)]
+            if "operator" not in filters:
+                filters = convert(filters)
+            return [doc for doc in self.storage.values() if document_matches_filter(filters=filters, document=doc)]
        return list(self.storage.values())

    def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.FAIL) -> int:
@ -220,9 +160,17 @@ class InMemoryDocumentStore:
        if not query:
            raise ValueError("Query should be a non-empty string")

-        content_type_filter = {"$or": {"content": {"$not": None}, "dataframe": {"$not": None}}}
+        content_type_filter = {
+            "operator": "OR",
+            "conditions": [
+                {"field": "content", "operator": "!=", "value": None},
+                {"field": "dataframe", "operator": "!=", "value": None},
+            ],
+        }
        if filters:
-            filters = {"$and": [content_type_filter, filters]}
+            if "operator" not in filters:
+                filters = convert(filters)
+            filters = {"operator": "AND", "conditions": [content_type_filter, filters]}
        else:
            filters = content_type_filter
        all_documents = self.filter_documents(filters=filters)
--- a/haystack/preview/document_stores/protocols.py
+++ b/haystack/preview/document_stores/protocols.py
@ -51,69 +51,64 @@ class DocumentStore(Protocol):
        """
        Returns the documents that match the filters provided.

-        Filters are defined as nested dictionaries. The keys of the dictionaries can be a logical operator (`"$and"`,
-        `"$or"`, `"$not"`), a comparison operator (`"$eq"`, `$ne`, `"$in"`, `$nin`, `"$gt"`, `"$gte"`, `"$lt"`,
-        `"$lte"`) or a metadata field name.
+        Filters are defined as nested dictionaries that can be of two types:
+        - Comparison
+        - Logic

-        Logical operator keys take a dictionary of metadata field names and/or logical operators as value. Metadata
-        field names take a dictionary of comparison operators as value. Comparison operator keys take a single value or
-        (in case of `"$in"`) a list of values as value. If no logical operator is provided, `"$and"` is used as default
-        operation. If no comparison operator is provided, `"$eq"` (or `"$in"` if the comparison value is a list) is used
-        as default operation.
+        Comparison dictionaries must contain the keys:

-        Example:
+        - `field`
+        - `operator`
+        - `value`

+        Logic dictionaries must contain the keys:
+
+        - `operator`
+        - `conditions`
+
+        The `conditions` key must be a list of dictionaries, either of type Comparison or Logic.
+
+        The `operator` value in Comparison dictionaries must be one of:
+
+        - `==`
+        - `!=`
+        - `>`
+        - `>=`
+        - `<`
+        - `<=`
+        - `in`
+        - `not in`
+
+        The `operator` values in Logic dictionaries must be one of:
+
+        - `NOT`
+        - `OR`
+        - `AND`
+
+
+        A simple filter:
        ```python
-        filters = {
-            "$and": {
-                "type": {"$eq": "article"},
-                "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
-                "rating": {"$gte": 3},
-                "$or": {
-                    "genre": {"$in": ["economy", "politics"]},
-                    "publisher": {"$eq": "nytimes"}
-                }
-            }
-        }
-        # or simpler using default operators
-        filters = {
-            "type": "article",
-            "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
-            "rating": {"$gte": 3},
-            "$or": {
-                "genre": ["economy", "politics"],
-                "publisher": "nytimes"
-            }
-        }
+        filters = {"field": "meta.type", "operator": "==", "value": "article"}
        ```

-        To use the same logical operator multiple times on the same level, logical operators can take a list of
-        dictionaries as value.
-
-        Example:
-
+        A more complex filter:
        ```python
        filters = {
-            "$or": [
+            "operator": "AND",
+            "conditions": [
+                {"field": "meta.type", "operator": "==", "value": "article"},
+                {"field": "meta.date", "operator": ">=", "value": 1420066800},
+                {"field": "meta.date", "operator": "<", "value": 1609455600},
+                {"field": "meta.rating", "operator": ">=", "value": 3},
                {
-                    "$and": {
-                        "Type": "News Paper",
-                        "Date": {
-                            "$lt": "2019-01-01"
-                        }
-                    }
+                    "operator": "OR",
+                    "conditions": [
+                        {"field": "meta.genre", "operator": "in", "value": ["economy", "politics"]},
+                        {"field": "meta.publisher", "operator": "==", "value": "nytimes"},
+                    ],
                },
-                {
-                    "$and": {
-                        "Type": "Blog Post",
-                        "Date": {
-                            "$gte": "2019-01-01"
-                        }
-                    }
-                }
-            ]
+            ],
        }
-        ```

        :param filters: the filters to apply to the document list.
        :return: a list of Documents that match the given filters.
--- a/haystack/preview/testing/document_store.py
+++ b/haystack/preview/testing/document_store.py
@ -236,7 +236,7 @@ class LegacyFilterDocumentsInvalidFiltersTest(FilterableDocsFixtureMixin):
    @pytest.mark.unit
    def test_incorrect_filter_type(self, document_store: DocumentStore, filterable_docs: List[Document]):
        document_store.write_documents(filterable_docs)
-        with pytest.raises(FilterError):
+        with pytest.raises(ValueError):
            document_store.filter_documents(filters="something odd")  # type: ignore

    @pytest.mark.unit
@ -574,7 +574,9 @@ class LegacyFilterDocumentsLessThanTest(FilterableDocsFixtureMixin):
    def test_lt_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
        document_store.write_documents(filterable_docs)
        result = document_store.filter_documents(filters={"number": {"$lt": 0.0}})
-        assert result == [doc for doc in filterable_docs if "number" in doc.meta and doc.meta["number"] < 0]
+        assert result == [
+            doc for doc in filterable_docs if doc.meta.get("number") is not None and doc.meta["number"] < 0
+        ]

    @pytest.mark.unit
    def test_lt_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]):
@ -614,7 +616,9 @@ class LegacyFilterDocumentsLessThanEqualTest(FilterableDocsFixtureMixin):
    def test_lte_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
        document_store.write_documents(filterable_docs)
        result = document_store.filter_documents(filters={"number": {"$lte": 2.0}})
-        assert result == [doc for doc in filterable_docs if "number" in doc.meta and doc.meta["number"] <= 2.0]
+        assert result == [
+            doc for doc in filterable_docs if doc.meta.get("number") is not None and doc.meta["number"] <= 2.0
+        ]

    @pytest.mark.unit
    def test_lte_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]):
@ -658,7 +662,8 @@ class LegacyFilterDocumentsSimpleLogicalTest(FilterableDocsFixtureMixin):
        assert result == [
            doc
            for doc in filterable_docs
-            if (("number" in doc.meta and doc.meta["number"] < 1) or doc.meta.get("name") in ["name_0", "name_1"])
+            if (doc.meta.get("number") is not None and doc.meta["number"] < 1)
+            or doc.meta.get("name") in ["name_0", "name_1"]
        ]

    @pytest.mark.unit
@ -733,7 +738,10 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
        assert result == [
            doc
            for doc in filterable_docs
-            if (doc.meta.get("name") in ["name_0", "name_1"] or ("number" in doc.meta and doc.meta["number"] < 1))
+            if (
+                doc.meta.get("name") in ["name_0", "name_1"]
+                or (doc.meta.get("number") is not None and doc.meta["number"] < 1)
+            )
        ]

    @pytest.mark.unit
@ -783,11 +791,8 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
            doc
            for doc in filterable_docs
            if (
-                ("number" in doc.meta and doc.meta["number"] < 1)
-                or (
-                    doc.meta.get("name") in ["name_0", "name_1"]
-                    and ("chapter" in doc.meta and doc.meta["chapter"] != "intro")
-                )
+                (doc.meta.get("number") is not None and doc.meta["number"] < 1)
+                or (doc.meta.get("name") in ["name_0", "name_1"] and (doc.meta.get("chapter") != "intro"))
            )
        ]

--- a/haystack/preview/utils/filters.py
+++ b/haystack/preview/utils/filters.py
@ -1,297 +1,174 @@
 from typing import List, Any, Union, Dict
+from dataclasses import fields
 from datetime import datetime

-import numpy as np
 import pandas as pd

 from haystack.preview.dataclasses import Document
 from haystack.preview.errors import FilterError


-GT_TYPES = (int, float, np.number)
-IN_TYPES = (list, set, tuple)
-
-
-def not_operation(conditions: List[Any], document: Document, _current_key: str):
+def document_matches_filter(filters: Dict[str, Any], document: Document) -> bool:
    """
-    Applies a NOT to all the nested conditions.
-
-    :param conditions: the filters dictionary.
-    :param document: the document to test.
-    :param _current_key: internal, don't use.
-    :return: True if the document matches the negated filters, False otherwise
+    Return whether `filters` match the Document.
+    For a detailed specification of the filters, refer to the DocumentStore.filter_documents() protocol documentation.
    """
-    return not and_operation(conditions=conditions, document=document, _current_key=_current_key)
+    if "field" in filters:
+        return _comparison_condition(filters, document)
+    return _logic_condition(filters, document)


-def and_operation(conditions: List[Any], document: Document, _current_key: str):
-    """
-    Applies an AND to all the nested conditions.
-
-    :param conditions: the filters dictionary.
-    :param document: the document to test.
-    :param _current_key: internal, don't use.
-    :return: True if the document matches all the filters, False otherwise
-    """
-    return all(
-        document_matches_filter(conditions=condition, document=document, _current_key=_current_key)
-        for condition in conditions
-    )
+def _and(document: Document, conditions: List[Dict[str, Any]]) -> bool:
+    return all(_comparison_condition(condition, document) for condition in conditions)


-def or_operation(conditions: List[Any], document: Document, _current_key: str):
-    """
-    Applies an OR to all the nested conditions.
-
-    :param conditions: the filters dictionary.
-    :param document: the document to test.
-    :param _current_key: internal, don't use.
-    :return: True if the document matches any of the filters, False otherwise
-    """
-    return any(
-        document_matches_filter(conditions=condition, document=document, _current_key=_current_key)
-        for condition in conditions
-    )
+def _or(document: Document, conditions: List[Dict[str, Any]]) -> bool:
+    return any(_comparison_condition(condition, document) for condition in conditions)


-def _safe_eq(first: Any, second: Any) -> bool:
-    """
-    Compares objects for equality, even np.ndarrays and pandas DataFrames.
-    """
-
-    if isinstance(first, pd.DataFrame):
-        first = first.to_json()
-
-    if isinstance(second, pd.DataFrame):
-        second = second.to_json()
-
-    if isinstance(first, np.ndarray):
-        first = first.tolist()
-
-    if isinstance(second, np.ndarray):
-        second = second.tolist()
-
-    return first == second
+def _not(document: Document, conditions: List[Dict[str, Any]]) -> bool:
+    return not _and(document, conditions)


-def _safe_gt(first: Any, second: Any) -> bool:
-    """
-    Checks if first is bigger than second.
+LOGICAL_OPERATORS = {"NOT": _not, "OR": _or, "AND": _and}

-    Works only for numerical values and dates in ISO format (YYYY-MM-DD). Strings, lists, tables and tensors all raise exceptions.
-    """
-    if not isinstance(first, GT_TYPES) or not isinstance(second, GT_TYPES):
+
+def _equal(document_value: Any, filter_value: Any) -> bool:
+    if isinstance(document_value, pd.DataFrame):
+        document_value = document_value.to_json()
+
+    if isinstance(filter_value, pd.DataFrame):
+        filter_value = filter_value.to_json()
+
+    return document_value == filter_value
+
+
+def _not_equal(document_value: Any, filter_value: Any) -> bool:
+    return not _equal(document_value=document_value, filter_value=filter_value)
+
+
+def _greater_than(document_value: Any, filter_value: Any) -> bool:
+    if document_value is None or filter_value is None:
+        # We can't compare None values reliably using operators '>', '>=', '<', '<='
+        return False
+
+    if isinstance(document_value, str) or isinstance(filter_value, str):
        try:
-            first = datetime.fromisoformat(first)
-            second = datetime.fromisoformat(second)
-        except (ValueError, TypeError):
-            raise FilterError(
-                f"Can't evaluate '{type(first).__name__} > {type(second).__name__}'. "
-                f"Convert these values into one of the following types: {[type_.__name__ for type_ in GT_TYPES]} "
-                f"or a datetime string in ISO 8601 format."
+            document_value = datetime.fromisoformat(document_value)
+            filter_value = datetime.fromisoformat(filter_value)
+        except (ValueError, TypeError) as exc:
+            msg = (
+                "Can't compare strings using operators '>', '>=', '<', '<='. "
+                "Strings are only comparable if they are ISO formatted dates."
            )
-    return bool(first > second)
+            raise FilterError(msg) from exc
+    if type(filter_value) in [list, pd.DataFrame]:
+        msg = f"Filter value can't be of type {type(filter_value)} using operators '>', '>=', '<', '<='"
+        raise FilterError(msg)
+    return document_value > filter_value


-def eq_operation(fields, field_name, value):
-    """
-    Checks for equality between the document's field value value and a fixed value.
-
-    :param fields: all the document's field value
-    :param field_name: the field to test
-    :param value: the fixed value to compare against
-    :return: True if the values are equal, False otherwise
-    """
-    if not field_name in fields:
+def _greater_than_equal(document_value: Any, filter_value: Any) -> bool:
+    if document_value is None or filter_value is None:
+        # We can't compare None values reliably using operators '>', '>=', '<', '<='
        return False

-    return _safe_eq(fields[field_name], value)
+    return _equal(document_value=document_value, filter_value=filter_value) or _greater_than(
+        document_value=document_value, filter_value=filter_value
+    )


-def in_operation(fields, field_name, value):
-    """
-    Checks for whether the document's field value value is present into the given list.
-
-    :param fields: all the document's field value
-    :param field_name: the field to test
-    :param value; the fixed value to compare against
-    :return: True if the document's value is included in the given list, False otherwise
-    """
-    if not field_name in fields:
+def _less_than(document_value: Any, filter_value: Any) -> bool:
+    if document_value is None or filter_value is None:
+        # We can't compare None values reliably using operators '>', '>=', '<', '<='
        return False

-    if not isinstance(value, IN_TYPES):
-        raise FilterError("$in accepts only iterable values like lists, sets and tuples.")
-
-    return any(_safe_eq(fields[field_name], v) for v in value)
+    return not _greater_than_equal(document_value=document_value, filter_value=filter_value)


-def ne_operation(fields, field_name, value):
-    """
-    Checks for inequality between the document's field value value and a fixed value.
-
-    :param fields: all the document's field value
-    :param field_name: the field to test
-    :param value; the fixed value to compare against
-    :return: True if the values are different, False otherwise
-    """
-    return not eq_operation(fields, field_name, value)
-
-
-def nin_operation(fields, field_name, value):
-    """
-    Checks whether the document's field value value is absent from the given list.
-
-    :param fields: all the document's field value
-    :param field_name: the field to test
-    :param value; the fixed value to compare against
-    :return: True if the document's value is not included in the given list, False otherwise
-    """
-    return not in_operation(fields, field_name, value)
-
-
-def gt_operation(fields, field_name, value):
-    """
-    Checks whether the document's field value value is (strictly) larger than the given value.
-
-    :param fields: all the document's field value
-    :param field_name: the field to test
-    :param value; the fixed value to compare against
-    :return: True if the document's value is strictly larger than the fixed value, False otherwise
-    """
-    if not field_name in fields:
+def _less_than_equal(document_value: Any, filter_value: Any) -> bool:
+    if document_value is None or filter_value is None:
+        # We can't compare None values reliably using operators '>', '>=', '<', '<='
        return False
-    return _safe_gt(fields[field_name], value)
+
+    return not _greater_than(document_value=document_value, filter_value=filter_value)


-def gte_operation(fields, field_name, value):
-    """
-    Checks whether the document's field value value is larger than or equal to the given value.
-
-    :param fields: all the document's field value
-    :param field_name: the field to test
-    :param value; the fixed value to compare against
-    :return: True if the document's value is larger than or equal to the fixed value, False otherwise
-    """
-    return gt_operation(fields, field_name, value) or eq_operation(fields, field_name, value)
+def _in(document_value: Any, filter_value: Any) -> bool:
+    if not isinstance(filter_value, list):
+        msg = (
+            f"Filter value must be a `list` when using operator 'in' or 'not in', received type '{type(filter_value)}'"
+        )
+        raise FilterError(msg)
+    return any(_equal(e, document_value) for e in filter_value)


-def lt_operation(fields, field_name, value):
-    """
-    Checks whether the document's field value value is (strictly) smaller than the given value.
-
-    :param fields: all the document's field value
-    :param field_name: the field to test
-    :param value; the fixed value to compare against
-    :return: True if the document's value is strictly smaller than the fixed value, False otherwise
-    """
-    if not field_name in fields:
-        return False
-    return not _safe_gt(fields[field_name], value) and not _safe_eq(fields[field_name], value)
+def _not_in(document_value: Any, filter_value: Any) -> bool:
+    return not _in(document_value=document_value, filter_value=filter_value)


-def lte_operation(fields, field_name, value):
-    """
-    Checks whether the document's field value value is smaller than or equal to the given value.
-
-    :param fields: all the document's field value
-    :param field_name: the field to test
-    :param value; the fixed value to compare against
-    :return: True if the document's value is smaller than or equal to the fixed value, False otherwise
-    """
-    if not field_name in fields:
-        return False
-    return not _safe_gt(fields[field_name], value)
-
-
-LOGICAL_STATEMENTS = {"$not": not_operation, "$and": and_operation, "$or": or_operation}
-OPERATORS = {
-    "$eq": eq_operation,
-    "$in": in_operation,
-    "$ne": ne_operation,
-    "$nin": nin_operation,
-    "$gt": gt_operation,
-    "$gte": gte_operation,
-    "$lt": lt_operation,
-    "$lte": lte_operation,
+COMPARISON_OPERATORS = {
+    "==": _equal,
+    "!=": _not_equal,
+    ">": _greater_than,
+    ">=": _greater_than_equal,
+    "<": _less_than,
+    "<=": _less_than_equal,
+    "in": _in,
+    "not in": _not_in,
 }
-RESERVED_KEYS = [*LOGICAL_STATEMENTS.keys(), *OPERATORS.keys()]


-def document_matches_filter(conditions: Union[Dict, List], document: Document, _current_key=None):
-    """
-    Check if a document's metadata matches the provided filter conditions.
-
-    This function evaluates the specified conditions against the metadata of the given document
-    and returns True if the conditions are met, otherwise it returns False.
-
-    :param conditions: A dictionary or list containing filter conditions to be applied to the document's metadata.
-    :param document: The document whose metadata will be evaluated against the conditions.
-    :param _current_key: internal parameter, don't use.
-    :return: True if the document's metadata matches the filter conditions, False otherwise.
-    """
-    if isinstance(conditions, dict):
-        # Check for malformed filters, like {"name": {"year": "2020"}}
-        if _current_key and any(key not in RESERVED_KEYS for key in conditions.keys()):
-            raise FilterError(
-                f"This filter ({{{_current_key}: {conditions}}}) seems to be malformed. "
-                "Comparisons between dictionaries are not currently supported. "
-                "Check the documentation to learn more about filters syntax."
-            )
-
-        if len(conditions.keys()) > 1:
-            # The default operation for a list of sibling conditions is $and
-            return and_operation(conditions=_list_conditions(conditions), document=document, _current_key=_current_key)
-
-        field_key, field_value = list(conditions.items())[0]
-
-        # Nested logical statement ($and, $or, $not)
-        if field_key in LOGICAL_STATEMENTS.keys():
-            return LOGICAL_STATEMENTS[field_key](
-                conditions=_list_conditions(field_value), document=document, _current_key=_current_key
-            )
-
-        # A comparison operator ($eq, $in, $gte, ...)
-        if field_key in OPERATORS.keys():
-            if not _current_key:
-                raise FilterError(
-                    "Filters can't start with an operator like $eq and $in. You have to specify the field name first. "
-                    "See the examples in the documentation."
-                )
-            return OPERATORS[field_key](fields=document.to_dict(), field_name=_current_key, value=field_value)
-
-        # Otherwise fall back to the defaults
-        conditions = _list_conditions(field_value)
-        _current_key = field_key
-
-    # Defaults for implicit filters
-    if isinstance(conditions, list):
-        if all(isinstance(cond, dict) for cond in conditions):
-            # The default operation for a list of sibling conditions is $and
-            return and_operation(conditions=_list_conditions(conditions), document=document, _current_key=_current_key)
-        else:
-            # The default operator for a {key: [value1, value2]} filter is $in
-            return in_operation(fields=document.to_dict(), field_name=_current_key, value=conditions)
-
-    if _current_key:
-        # The default operator for a {key: value} filter is $eq
-        return eq_operation(fields=document.to_dict(), field_name=_current_key, value=conditions)
-
-    raise FilterError("Filters must be dictionaries or lists. See the examples in the documentation.")
+def _logic_condition(condition: Dict[str, Any], document: Document) -> bool:
+    if "operator" not in condition:
+        msg = f"'operator' key missing in {condition}"
+        raise FilterError(msg)
+    if "conditions" not in condition:
+        msg = f"'conditions' key missing in {condition}"
+        raise FilterError(msg)
+    operator: str = condition["operator"]
+    conditions: List[Dict[str, Any]] = condition["conditions"]
+    return LOGICAL_OPERATORS[operator](document, conditions)


-def _list_conditions(conditions: Any) -> List[Any]:
-    """
-    Make sure all nested conditions are not dictionaries or single values, but always lists.
+def _comparison_condition(condition: Dict[str, Any], document: Document) -> bool:
+    if "field" not in condition:
+        # 'field' key is only found in comparison dictionaries.
+        # We assume this is a logic dictionary since it's not present.
+        return _logic_condition(condition, document)
+    field: str = condition["field"]

-    :param conditions: the conditions to transform into a list
-    :returns: a list of filters
-    """
-    if isinstance(conditions, list):
-        return conditions
-    if isinstance(conditions, dict):
-        return [{key: value} for key, value in conditions.items()]
-    return [conditions]
+    if "operator" not in condition:
+        msg = f"'operator' key missing in {condition}"
+        raise FilterError(msg)
+    if "value" not in condition:
+        msg = f"'value' key missing in {condition}"
+        raise FilterError(msg)
+
+    if "." in field:
+        # Handles fields formatted like so:
+        # 'meta.person.name'
+        parts = field.split(".")
+        document_value = getattr(document, parts[0])
+        for part in parts[1:]:
+            if part not in document_value:
+                # If a field is not found we treat it as None
+                document_value = None
+                break
+            document_value = document_value[part]
+    elif field not in [f.name for f in fields(document)]:
+        # Converted legacy filters don't add the `meta.` prefix, so we assume
+        # that all filter fields that are not actual fields in Document are converted
+        # filters.
+        #
+        # We handle this to avoid breaking compatibility with converted legacy filters.
+        # This will be removed as soon as we stop supporting legacy filters.
+        document_value = document.meta.get(field)
+    else:
+        document_value = getattr(document, field)
+    operator: str = condition["operator"]
+    filter_value: Any = condition["value"]
+    return COMPARISON_OPERATORS[operator](filter_value=filter_value, document_value=document_value)


 def convert(filters: Dict[str, Any]) -> Dict[str, Any]:
--- a/releasenotes/notes/rework-filters-1bb103d196a1912b.yaml
+++ b/releasenotes/notes/rework-filters-1bb103d196a1912b.yaml
@ -0,0 +1,87 @@
+---
+prelude: >
+  With proposal [#6001](https://github.com/deepset-ai/haystack/pull/6001) we introduced a better specification to declare filters in Haystack 2.x.
+  The new syntax is a bit more verbose but less confusing and ambiguous as there are no implicit operators.
+  This will simplify conversion from this common syntax to a Document Store specific filtering logic, so it will ease
+  development of new Document Store.
+  Since everything must be declared explicitly it will also make it easier for user to understand the filters just
+  by reading them.
+
+  The full specification is as follow.
+
+  ---
+
+  Filters top level must be a dictionary.
+
+  There are two types of dictionaries:
+
+  - Comparison
+  - Logic
+
+  Top level can be either be a Comparison or Logic dictionary.
+
+  Comparison dictionaries must contain the keys:
+
+  - `field`
+  - `operator`
+  - `value`
+
+  Logic dictionaries must contain the keys:
+
+  - `operator`
+  - `conditions`
+
+  `conditions` key must be a list of dictionaries, either Comparison or Logic.
+
+  `operator` values in Comparison dictionaries must be:
+
+  - `==`
+  - `!=`
+  - `>`
+  - `>=`
+  - `<`
+  - `<=`
+  - `in`
+  - `not in`
+
+  `operator` values in Logic dictionaries must be:
+
+  - `NOT`
+  - `OR`
+  - `AND`
+
+  ---
+
+  A simple filter:
+
+  ```python
+  filters = {"field": "meta.type", "operator": "==", "value": "article"}
+  ```
+
+  A more complex filter:
+  ```python
+  filters = {
+      "operator": "AND",
+      "conditions": [
+          {"field": "meta.type", "operator": "==", "value": "article"},
+          {"field": "meta.date", "operator": ">=", "value": 1420066800},
+          {"field": "meta.date", "operator": "<", "value": 1609455600},
+          {"field": "meta.rating", "operator": ">=", "value": 3},
+          {
+              "operator": "OR",
+              "conditions": [
+                  {"field": "meta.genre", "operator": "in", "value": ["economy", "politics"]},
+                  {"field": "meta.publisher", "operator": "==", "value": "nytimes"},
+              ],
+          },
+      ],
+  }
+  ```
+
+  ---
+
+  To avoid causing too much disruption for users using legacy filters we'll keep supporting them for the time being.
+  We also provide a utility `convert` function for developers implementing their Document Store to do the same.
+preview:
+  - |
+    Refactored `InMemoryDocumentStore` and `MetadataRouter` filtering logic to support new filters declaration.
--- a/test/preview/components/routers/test_metadata_router.py
+++ b/test/preview/components/routers/test_metadata_router.py
@ -8,8 +8,20 @@ class TestMetadataRouter:
    @pytest.mark.unit
    def test_run(self):
        rules = {
-            "edge_1": {"created_at": {"$gte": "2023-01-01", "$lt": "2023-04-01"}},
-            "edge_2": {"created_at": {"$gte": "2023-04-01", "$lt": "2023-07-01"}},
+            "edge_1": {
+                "operator": "AND",
+                "conditions": [
+                    {"field": "meta.created_at", "operator": ">=", "value": "2023-01-01"},
+                    {"field": "meta.created_at", "operator": "<", "value": "2023-04-01"},
+                ],
+            },
+            "edge_2": {
+                "operator": "AND",
+                "conditions": [
+                    {"field": "meta.created_at", "operator": ">=", "value": "2023-04-01"},
+                    {"field": "meta.created_at", "operator": "<", "value": "2023-07-01"},
+                ],
+            },
        }
        router = MetadataRouter(rules=rules)
        documents = [
--- a/test/preview/document_stores/test_in_memory.py
+++ b/test/preview/document_stores/test_in_memory.py
@ -146,10 +146,6 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests):  # pylint: disable=R0904
        results = document_store.bm25_retrieval(query="Python", top_k=1)
        assert results[0].content == "Python is a popular programming language"

-    @pytest.mark.skip(reason="Filter is not working properly, see https://github.com/deepset-ai/haystack/issues/6153")
-    def test_eq_filter_embedding(self, document_store: InMemoryDocumentStore, filterable_docs):
-        pass
-
    # Test a query, add a new document and make sure results are appropriately updated
    @pytest.mark.unit
    def test_bm25_retrieval_with_updated_docs(self, document_store: InMemoryDocumentStore):
--- a/test/preview/utils/test_filters.py
+++ b/test/preview/utils/test_filters.py