147 lines
4.3 KiB
Python

import pathlib
from datetime import datetime, timezone
import pytest
from datahub.emitter.mcp_builder import DatabaseKey, SchemaKey
from datahub.ingestion.source.common.subtypes import (
DatasetContainerSubTypes,
)
from datahub.metadata.urns import (
ContainerUrn,
CorpUserUrn,
DomainUrn,
GlossaryTermUrn,
TagUrn,
)
from datahub.sdk.container import Container
from tests.test_helpers.sdk_v2_helpers import assert_entity_golden
_GOLDEN_DIR = pathlib.Path(__file__).parent / "container_golden"
def test_container_basic() -> None:
db_key = DatabaseKey(
platform="bigquery",
database="my_bq_project",
)
c = Container(
db_key,
display_name="my_bq_project",
subtype=DatasetContainerSubTypes.BIGQUERY_PROJECT,
)
# Check urn setup.
assert Container.get_urn_type() == ContainerUrn
assert isinstance(c.urn, ContainerUrn)
assert str(c.urn) == "urn:li:container:1e476e4c36434ae8a7ea78e467e5b59d"
assert str(c.urn) in repr(c)
# Check most attributes.
assert c.platform is not None
assert c.platform.platform_name == "bigquery"
assert c.platform_instance is None
assert c.browse_path == []
assert c.owners is None
assert c.links is None
assert c.tags is None
assert c.terms is None
assert c.created is None
assert c.last_modified is None
assert c.description is None
assert c.custom_properties == {
"platform": "bigquery",
"database": "my_bq_project",
}
assert c.domain is None
# Check slots.
with pytest.raises(AttributeError):
assert c.extra_attribute # type: ignore
with pytest.raises(AttributeError):
c.extra_attribute = "slots should reject extra fields" # type: ignore
with pytest.raises(AttributeError):
# This should fail. Eventually we should make it suggest calling set_owners instead.
c.owners = [] # type: ignore
assert_entity_golden(c, _GOLDEN_DIR / "test_container_basic_golden.json")
def test_container_complex() -> None:
schema_key = SchemaKey(
platform="snowflake",
instance="my_instance",
database="MY_DB",
schema="MY_SCHEMA",
)
db_key = schema_key.parent_key()
assert db_key is not None
created = datetime(2025, 1, 2, 3, 4, 5, tzinfo=timezone.utc)
updated = datetime(2025, 1, 9, 3, 4, 6, tzinfo=timezone.utc)
c = Container(
schema_key,
display_name="MY_SCHEMA",
qualified_name="MY_DB.MY_SCHEMA",
subtype=DatasetContainerSubTypes.SCHEMA,
created=created,
last_modified=updated,
extra_properties={
"key1": "value1",
"key2": "value2",
},
description="test",
external_url="https://example.com",
owners=[
CorpUserUrn("admin@datahubproject.io"),
],
links=["https://example.com/doc1"],
tags=[
TagUrn("tag1"),
TagUrn("tag2"),
],
terms=[
GlossaryTermUrn("AccountBalance"),
],
domain=DomainUrn("Marketing"),
)
assert c.platform is not None
assert c.platform.platform_name == "snowflake"
assert c.platform_instance is not None
assert (
str(c.platform_instance)
== "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,my_instance)"
)
assert c.browse_path == [
c.platform_instance,
db_key.as_urn_typed(),
]
# Properties.
assert c.description == "test"
assert c.display_name == "MY_SCHEMA"
assert c.qualified_name == "MY_DB.MY_SCHEMA"
assert c.external_url == "https://example.com"
assert c.created == created
assert c.last_modified == updated
assert c.custom_properties == {
"platform": "snowflake",
"instance": "my_instance",
"database": "MY_DB",
"schema": "MY_SCHEMA",
"key1": "value1",
"key2": "value2",
}
# Check standard aspects.
assert c.subtype == "Schema"
assert c.owners is not None and len(c.owners) == 1
assert c.links is not None and len(c.links) == 1
assert c.tags is not None and len(c.tags) == 2
assert c.terms is not None and len(c.terms) == 1
assert c.domain == DomainUrn("Marketing")
assert_entity_golden(c, _GOLDEN_DIR / "test_container_complex_golden.json")