fix(sdk): make Filter type permissive of implicit and dict (#14569)

2025-11-03 12:16:10 +00:00 · 2025-08-28 15:22:00 +05:30 · 2025-08-28 15:22:00 +05:30 · fe8f108746
commit fe8f108746
parent 340b1bf930
2 changed files with 47 additions and 0 deletions
--- a/metadata-ingestion/src/datahub/sdk/search_filters.py
+++ b/metadata-ingestion/src/datahub/sdk/search_filters.py
@ -384,6 +384,21 @@ def _filter_discriminator(v: Any) -> Optional[str]:
    return None


+def _parse_and_like_filter(value: Any) -> Any:
+    # Do not parse if filter is already of type and/or/not or a custom condition
+    # also do not parse container filter if direct_descendants_only is specified
+    if (
+        isinstance(value, dict)
+        and not set(value.keys()).intersection(
+            {"and", "or", "not", "field", "condition", "direct_descendants_only"}
+        )
+        and len(value) > 1
+    ):
+        return {"and": [{k: v} for k, v in value.items()]}
+
+    return value
+
+
 if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
    # The `not TYPE_CHECKING` bit is required to make the linter happy,
    # since we currently only run mypy with pydantic v1.
@ -445,6 +460,7 @@ else:
            ],
            Discriminator(_filter_discriminator),
        ],
+        pydantic.BeforeValidator(_parse_and_like_filter),
        pydantic.BeforeValidator(_parse_json_from_string),
    ]

--- a/metadata-ingestion/tests/unit/sdk_v2/test_search_client.py
+++ b/metadata-ingestion/tests/unit/sdk_v2/test_search_client.py
@ -334,6 +334,12 @@ def test_tagged_union_error_messages() -> None:
    ):
        load_filters({"and": [{"unknown_field": 6}]})

+
+@pytest.mark.skipif(
+    not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR,
+    reason="Tagged union w/ callable discriminator is not supported by the current pydantic version",
+)
+def test_filter_before_validators() -> None:
    # Test that we can load a filter from a string.
    # Sometimes we get filters encoded as JSON, and we want to handle those gracefully.
    filter_str = '{\n  "and": [\n    {"entity_type": ["dataset"]},\n    {"entity_subtype": ["Table"]},\n    {"platform": ["snowflake"]}\n  ]\n}'
@ -350,6 +356,31 @@ def test_tagged_union_error_messages() -> None:
    ):
        load_filters("this is invalid json but should not raise a json error")

+    # Test that we can load a filter from and-like dictionary.
+    # Sometimes we get filters that are not wrapped in an "and" clause.
+    filter_str = '{"entity_type": ["dataset"], "entity_subtype": ["Table"], "platform": ["snowflake"]}'
+    assert load_filters(filter_str) == F.and_(
+        F.entity_type("dataset"),
+        F.entity_subtype("Table"),
+        F.platform("snowflake"),
+    )
+
+    filter_str = '{"entity_type": ["dataset"], "container": ["urn:li:container:f784c48c306ba1c775ef917e2f8c1560"]}'
+    assert load_filters(filter_str) == F.and_(
+        F.entity_type("dataset"),
+        F.container("urn:li:container:f784c48c306ba1c775ef917e2f8c1560"),
+    )
+
+    filter_str = '{"entity_type": ["dataset"], "container": ["urn:li:container:f784c48c306ba1c775ef917e2f8c1560"], "direct_descendants_only": true}'
+    with pytest.raises(
+        ValidationError,
+        match=re.compile(
+            r"1 validation error.*container\.entity_type.*Extra inputs are not permitted.*",
+            re.DOTALL,
+        ),
+    ):
+        load_filters(filter_str)
+

 def test_invalid_filter() -> None:
    with pytest.raises(InvalidUrnError):