diff --git a/metadata-ingestion/src/datahub/sdk/_shared.py b/metadata-ingestion/src/datahub/sdk/_shared.py index 559b30ddd5..80651115ca 100644 --- a/metadata-ingestion/src/datahub/sdk/_shared.py +++ b/metadata-ingestion/src/datahub/sdk/_shared.py @@ -212,6 +212,13 @@ class HasContainer(Entity): while parent_key is not None: browse_path_reversed.append(parent_key.as_urn()) parent_key = parent_key.parent_key() + if container.instance is not None: + browse_path_reversed.append( + DataPlatformInstanceUrn( + container.platform, container.instance + ).urn() + ) + browse_path = list(reversed(browse_path_reversed)) else: container_urn = None diff --git a/metadata-ingestion/src/datahub/sdk/container.py b/metadata-ingestion/src/datahub/sdk/container.py index b4edae693b..0555773cd2 100644 --- a/metadata-ingestion/src/datahub/sdk/container.py +++ b/metadata-ingestion/src/datahub/sdk/container.py @@ -147,7 +147,7 @@ class Container( return self._ensure_container_props().name def set_display_name(self, value: str) -> None: - self._ensure_container_props().name = value + self._ensure_container_props(name=value).name = value @property def description(self) -> Optional[str]: diff --git a/metadata-ingestion/tests/test_helpers/sdk_v2_helpers.py b/metadata-ingestion/tests/test_helpers/sdk_v2_helpers.py new file mode 100644 index 0000000000..76715b93f1 --- /dev/null +++ b/metadata-ingestion/tests/test_helpers/sdk_v2_helpers.py @@ -0,0 +1,17 @@ +import pathlib + +import pytest + +from datahub.sdk._entity import Entity +from tests.test_helpers import mce_helpers + + +def assert_entity_golden( + pytestconfig: pytest.Config, entity: Entity, golden_path: pathlib.Path +) -> None: + mce_helpers.check_goldens_stream( + pytestconfig=pytestconfig, + outputs=entity._as_mcps(), + golden_path=golden_path, + ignore_order=False, + ) diff --git a/metadata-ingestion/tests/unit/sdk_v2/container_golden/test_container_basic_golden.json b/metadata-ingestion/tests/unit/sdk_v2/container_golden/test_container_basic_golden.json new file mode 100644 index 0000000000..d812e83fa3 --- /dev/null +++ b/metadata-ingestion/tests/unit/sdk_v2/container_golden/test_container_basic_golden.json @@ -0,0 +1,52 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:1e476e4c36434ae8a7ea78e467e5b59d", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "bigquery", + "database": "my_bq_project" + }, + "name": "my_bq_project" + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1e476e4c36434ae8a7ea78e467e5b59d", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1e476e4c36434ae8a7ea78e467e5b59d", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1e476e4c36434ae8a7ea78e467e5b59d", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sdk_v2/container_golden/test_container_complex_golden.json b/metadata-ingestion/tests/unit/sdk_v2/container_golden/test_container_complex_golden.json new file mode 100644 index 0000000000..6735cccdf5 --- /dev/null +++ b/metadata-ingestion/tests/unit/sdk_v2/container_golden/test_container_complex_golden.json @@ -0,0 +1,157 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:66c5ac35f0bfc521dee6f7d9533a8056", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "snowflake", + "instance": "my_instance", + "database": "MY_DB", + "schema": "MY_SCHEMA", + "key1": "value1", + "key2": "value2" + }, + "externalUrl": "https://example.com", + "name": "MY_SCHEMA", + "qualifiedName": "MY_DB.MY_SCHEMA", + "description": "test", + "created": { + "time": 1735787045000 + }, + "lastModified": { + "time": 1736391846000 + } + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:66c5ac35f0bfc521dee6f7d9533a8056", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,my_instance)" + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:66c5ac35f0bfc521dee6f7d9533a8056", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:37d6500021cda2a0aa7ae1900eab5a9c" + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:66c5ac35f0bfc521dee6f7d9533a8056", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,my_instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,my_instance)" + }, + { + "id": "urn:li:container:37d6500021cda2a0aa7ae1900eab5a9c", + "urn": "urn:li:container:37d6500021cda2a0aa7ae1900eab5a9c" + } + ] + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:66c5ac35f0bfc521dee6f7d9533a8056", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:66c5ac35f0bfc521dee6f7d9533a8056", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:admin@datahubproject.io", + "type": "TECHNICAL_OWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:66c5ac35f0bfc521dee6f7d9533a8056", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:tag1" + }, + { + "tag": "urn:li:tag:tag2" + } + ] + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:66c5ac35f0bfc521dee6f7d9533a8056", + "changeType": "UPSERT", + "aspectName": "glossaryTerms", + "aspect": { + "json": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:AccountBalance" + } + ], + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:__ingestion" + } + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:66c5ac35f0bfc521dee6f7d9533a8056", + "changeType": "UPSERT", + "aspectName": "domains", + "aspect": { + "json": { + "domains": [ + "urn:li:domain:Marketing" + ] + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sdk_v2/dataset_golden/test_dataset_complex_golden.json b/metadata-ingestion/tests/unit/sdk_v2/dataset_golden/test_dataset_complex_golden.json index 3b96b4d26e..b224c8c709 100644 --- a/metadata-ingestion/tests/unit/sdk_v2/dataset_golden/test_dataset_complex_golden.json +++ b/metadata-ingestion/tests/unit/sdk_v2/dataset_golden/test_dataset_complex_golden.json @@ -138,6 +138,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,my_instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,my_instance)" + }, { "id": "urn:li:container:37d6500021cda2a0aa7ae1900eab5a9c", "urn": "urn:li:container:37d6500021cda2a0aa7ae1900eab5a9c" diff --git a/metadata-ingestion/tests/unit/sdk_v2/dataset_golden/test_dataset_ingestion_golden.json b/metadata-ingestion/tests/unit/sdk_v2/dataset_golden/test_dataset_ingestion_golden.json index 3325e5e080..ca3a9a6d64 100644 --- a/metadata-ingestion/tests/unit/sdk_v2/dataset_golden/test_dataset_ingestion_golden.json +++ b/metadata-ingestion/tests/unit/sdk_v2/dataset_golden/test_dataset_ingestion_golden.json @@ -145,6 +145,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,my_instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,my_instance)" + }, { "id": "urn:li:container:37d6500021cda2a0aa7ae1900eab5a9c", "urn": "urn:li:container:37d6500021cda2a0aa7ae1900eab5a9c" diff --git a/metadata-ingestion/tests/unit/sdk_v2/test_container.py b/metadata-ingestion/tests/unit/sdk_v2/test_container.py new file mode 100644 index 0000000000..3cd2d928fa --- /dev/null +++ b/metadata-ingestion/tests/unit/sdk_v2/test_container.py @@ -0,0 +1,135 @@ +import pathlib +from datetime import datetime, timezone + +import pytest + +from datahub.emitter.mcp_builder import DatabaseKey, SchemaKey +from datahub.ingestion.source.common.subtypes import ( + DatasetContainerSubTypes, +) +from datahub.metadata.urns import ( + ContainerUrn, + CorpUserUrn, + DomainUrn, + GlossaryTermUrn, + TagUrn, +) +from datahub.sdk.container import Container +from tests.test_helpers.sdk_v2_helpers import assert_entity_golden + +_GOLDEN_DIR = pathlib.Path(__file__).parent / "container_golden" + + +def test_container_basic(pytestconfig: pytest.Config) -> None: + db_key = DatabaseKey( + platform="bigquery", + database="my_bq_project", + ) + + c = Container( + db_key, + display_name="my_bq_project", + subtype=DatasetContainerSubTypes.BIGQUERY_PROJECT, + ) + + # Check urn setup. + assert Container.get_urn_type() == ContainerUrn + assert isinstance(c.urn, ContainerUrn) + assert str(c.urn) == "urn:li:container:1e476e4c36434ae8a7ea78e467e5b59d" + assert str(c.urn) in repr(c) + + # Check most attributes. + assert c.platform_instance is None + assert c.tags is None + assert c.terms is None + assert c.created is None + assert c.last_modified is None + assert c.description is None + assert c.custom_properties == { + "platform": "bigquery", + "database": "my_bq_project", + } + assert c.domain is None + + # Check slots. + with pytest.raises(AttributeError): + assert c.extra_attribute # type: ignore + with pytest.raises(AttributeError): + c.extra_attribute = "slots should reject extra fields" # type: ignore + with pytest.raises(AttributeError): + # This should fail. Eventually we should make it suggest calling set_owners instead. + c.owners = [] # type: ignore + + assert_entity_golden( + pytestconfig, c, _GOLDEN_DIR / "test_container_basic_golden.json" + ) + + +def test_container_complex(pytestconfig: pytest.Config) -> None: + schema_key = SchemaKey( + platform="snowflake", + instance="my_instance", + database="MY_DB", + schema="MY_SCHEMA", + ) + created = datetime(2025, 1, 2, 3, 4, 5, tzinfo=timezone.utc) + updated = datetime(2025, 1, 9, 3, 4, 6, tzinfo=timezone.utc) + + d = Container( + schema_key, + display_name="MY_SCHEMA", + qualified_name="MY_DB.MY_SCHEMA", + subtype=DatasetContainerSubTypes.SCHEMA, + created=created, + last_modified=updated, + extra_properties={ + "key1": "value1", + "key2": "value2", + }, + description="test", + external_url="https://example.com", + owners=[ + CorpUserUrn("admin@datahubproject.io"), + ], + tags=[ + TagUrn("tag1"), + TagUrn("tag2"), + ], + terms=[ + GlossaryTermUrn("AccountBalance"), + ], + domain=DomainUrn("Marketing"), + ) + assert d.platform_instance is not None + assert ( + str(d.platform_instance) + == "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,my_instance)" + ) + assert d.subtype == "Schema" + assert d.description == "test" + assert d.display_name == "MY_SCHEMA" + assert d.qualified_name == "MY_DB.MY_SCHEMA" + assert d.external_url == "https://example.com" + assert d.created == created + assert d.last_modified == updated + assert d.custom_properties == { + "platform": "snowflake", + "instance": "my_instance", + "database": "MY_DB", + "schema": "MY_SCHEMA", + "key1": "value1", + "key2": "value2", + } + + # Check standard aspects. + assert d.domain == DomainUrn("Marketing") + assert d.tags is not None + assert len(d.tags) == 2 + assert d.terms is not None + assert len(d.terms) == 1 + assert d.owners is not None + assert len(d.owners) == 1 + + assert_entity_golden( + pytestconfig, d, _GOLDEN_DIR / "test_container_complex_golden.json" + ) diff --git a/metadata-ingestion/tests/unit/sdk_v2/test_dataset.py b/metadata-ingestion/tests/unit/sdk_v2/test_dataset.py index da041ba3d4..d64302d8fd 100644 --- a/metadata-ingestion/tests/unit/sdk_v2/test_dataset.py +++ b/metadata-ingestion/tests/unit/sdk_v2/test_dataset.py @@ -14,24 +14,12 @@ from datahub.metadata.urns import ( TagUrn, ) from datahub.sdk._attribution import KnownAttribution, change_default_attribution -from datahub.sdk._entity import Entity from datahub.sdk.dataset import Dataset -from tests.test_helpers import mce_helpers +from tests.test_helpers.sdk_v2_helpers import assert_entity_golden _GOLDEN_DIR = pathlib.Path(__file__).parent / "dataset_golden" -def assert_entity_golden( - pytestconfig: pytest.Config, entity: Entity, golden_path: pathlib.Path -) -> None: - mce_helpers.check_goldens_stream( - pytestconfig=pytestconfig, - outputs=entity._as_mcps(), - golden_path=golden_path, - ignore_order=False, - ) - - def test_dataset_basic(pytestconfig: pytest.Config) -> None: d = Dataset( platform="bigquery",