From 8d13b03e85f9ff09cc7ec55facc61adac9bf141e Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Mon, 29 Sep 2025 10:05:29 +0530 Subject: [PATCH] feat(sdk/search): add owner filter (#14649) Co-authored-by: Mayuri N --- .../library/search_filter_by_owner.py | 6 +++ .../src/datahub/sdk/search_filters.py | 45 ++++++++++++++++++- .../tests/unit/sdk_v2/test_search_client.py | 35 +++++++++++++++ 3 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 metadata-ingestion/examples/library/search_filter_by_owner.py diff --git a/metadata-ingestion/examples/library/search_filter_by_owner.py b/metadata-ingestion/examples/library/search_filter_by_owner.py new file mode 100644 index 0000000000..0673d02d13 --- /dev/null +++ b/metadata-ingestion/examples/library/search_filter_by_owner.py @@ -0,0 +1,6 @@ +from datahub.sdk import DataHubClient +from datahub.sdk.search_filters import FilterDsl as F + +# search for all assets having user urn:li:corpuser:jdoe as owner +client = DataHubClient(server="", token="") +results = client.search.get_urns(filter=F.owner("urn:li:corpuser:jdoe")) diff --git a/metadata-ingestion/src/datahub/sdk/search_filters.py b/metadata-ingestion/src/datahub/sdk/search_filters.py index 00874f858a..7b63a20a1c 100644 --- a/metadata-ingestion/src/datahub/sdk/search_filters.py +++ b/metadata-ingestion/src/datahub/sdk/search_filters.py @@ -30,7 +30,14 @@ from datahub.ingestion.graph.filters import ( _get_status_filter, ) from datahub.metadata.schema_classes import EntityTypeName -from datahub.metadata.urns import ContainerUrn, DataPlatformUrn, DomainUrn +from datahub.metadata.urns import ( + ContainerUrn, + CorpGroupUrn, + CorpUserUrn, + DataPlatformUrn, + DomainUrn, +) +from datahub.utilities.urns.urn import guess_entity_type _AndSearchFilterRule = TypedDict( "_AndSearchFilterRule", {"and": List[SearchFilterRule]} @@ -235,6 +242,36 @@ class _EnvFilter(_BaseFilter): ] +class _OwnerFilter(_BaseFilter): + """Filter for entities owned by specific users or groups.""" + + owner: List[str] = pydantic.Field( + description="The owner to filter on. Should be user or group URNs.", + ) + + @pydantic.validator("owner", each_item=True) + def validate_owner(cls, v: str) -> str: + if not v.startswith("urn:li:"): + raise ValueError(f"Owner must be a valid User or Group URN, got: {v}") + _type = guess_entity_type(v) + if _type == CorpUserUrn.ENTITY_TYPE: + return str(CorpUserUrn.from_string(v)) + elif _type == CorpGroupUrn.ENTITY_TYPE: + return str(CorpGroupUrn.from_string(v)) + else: + raise ValueError(f"Owner must be a valid User or Group URN, got: {v}") + + def _build_rule(self) -> SearchFilterRule: + return SearchFilterRule( + field="owners", + condition="EQUAL", + values=self.owner, + ) + + def compile(self) -> _OrFilters: + return [{"and": [self._build_rule()]}] + + class _CustomCondition(_BaseFilter): """Represents a single field condition.""" @@ -407,6 +444,7 @@ if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR: _DomainFilter, _ContainerFilter, _EnvFilter, + _OwnerFilter, _CustomCondition, ] @@ -448,6 +486,7 @@ else: _ContainerFilter, Tag(_ContainerFilter._field_discriminator()) ], Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())], + Annotated[_OwnerFilter, Tag(_OwnerFilter._field_discriminator())], Annotated[ _CustomCondition, Tag(_CustomCondition._field_discriminator()) ], @@ -551,6 +590,10 @@ class FilterDsl: def env(env: Union[str, Sequence[str]], /) -> _EnvFilter: return _EnvFilter(env=[env] if isinstance(env, str) else env) + @staticmethod + def owner(owner: Union[str, Sequence[str]], /) -> _OwnerFilter: + return _OwnerFilter(owner=[owner] if isinstance(owner, str) else owner) + @staticmethod def has_custom_property(key: str, value: str) -> _CustomCondition: return _CustomCondition( diff --git a/metadata-ingestion/tests/unit/sdk_v2/test_search_client.py b/metadata-ingestion/tests/unit/sdk_v2/test_search_client.py index 7bba3c0246..929a112602 100644 --- a/metadata-ingestion/tests/unit/sdk_v2/test_search_client.py +++ b/metadata-ingestion/tests/unit/sdk_v2/test_search_client.py @@ -382,6 +382,41 @@ def test_filter_before_validators() -> None: load_filters(filter_str) +def test_owner_filter() -> None: + """Test basic owner filter functionality.""" + filter_obj: Filter = load_filters({"owner": ["urn:li:corpuser:john"]}) + assert filter_obj == F.owner("urn:li:corpuser:john") + assert filter_obj.compile() == [ + { + "and": [ + SearchFilterRule( + field="owners", + condition="EQUAL", + values=["urn:li:corpuser:john"], + ) + ] + } + ] + + +def test_owner_filter_mixed_types() -> None: + """Test owner filter with both user and group URNs.""" + filter_obj: Filter = load_filters( + {"owner": ["urn:li:corpuser:john", "urn:li:corpGroup:engineering"]} + ) + assert filter_obj == F.owner( + ["urn:li:corpuser:john", "urn:li:corpGroup:engineering"] + ) + + +def test_invalid_owner_filter() -> None: + """Test validation error for invalid owner URN.""" + with pytest.raises( + ValidationError, match="Owner must be a valid User or Group URN" + ): + F.owner("invalid-owner") + + def test_invalid_filter() -> None: with pytest.raises(InvalidUrnError): F.domain("marketing")