mihai103 8e1fbaffad
Adding support for domains when upserting datasets with the cli (#14152)
Co-authored-by: Mihai Ciocirdel <mihai.ciocirdel@swisscom.com>
Co-authored-by: Hyejin Yoon <0327jane@gmail.com>
2025-07-24 17:35:17 -04:00

70 lines
2.3 KiB
Python

import pathlib
from pathlib import Path
from typing import Iterable, List, Union
from freezegun import freeze_time
from datahub.api.entities.dataset.dataset import Dataset, Ownership
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.metadata.schema_classes import MetadataChangeProposalClass
from datahub.testing.mce_helpers import check_goldens_stream
from tests.test_helpers.graph_helpers import MockDataHubGraph
FROZEN_TIME = "2023-04-14 07:00:00"
RESOURCE_DIR = pathlib.Path(__file__).parent
@freeze_time(FROZEN_TIME)
def test_dataset_from_yaml() -> None:
example_dataset_file = RESOURCE_DIR / "dataset.yml"
datasets: Iterable[Dataset] = Dataset.from_yaml(str(example_dataset_file))
mcps: List[Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]] = []
for dataset in datasets:
mcps.extend(dataset.generate_mcp())
check_goldens_stream(
mcps,
golden_path=RESOURCE_DIR / "golden_dataset_out_upsert.json",
)
@freeze_time(FROZEN_TIME)
def test_dataset_from_datahub() -> None:
mock_graph = MockDataHubGraph()
golden_file = Path(RESOURCE_DIR / "golden_dataset_out.json")
mock_graph.import_file(golden_file)
dataset: Dataset = Dataset.from_datahub(
mock_graph,
urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,CustomerAnalytics,PROD)",
)
assert (
dataset.urn
== "urn:li:dataset:(urn:li:dataPlatform:snowflake,CustomerAnalytics,PROD)"
)
assert dataset.domains == ["urn:li:domain:retail"]
assert (
dataset.description
== "Analytics data concerning customer interactions and behaviors."
)
assert dataset.name == "CustomerAnalytics"
assert dataset.properties is not None
assert dataset.properties == {}
assert dataset.schema_metadata is not None
assert dataset.tags == ["sales"]
assert dataset.glossary_terms == [
"data-quality.high",
"data-privacy.sensitive",
"data-security.confidential",
]
assert dataset.owners is not None
assert len(dataset.owners) == 1
assert isinstance(dataset.owners[0], Ownership), "Expected an Ownership object"
assert dataset.owners[0].id == "urn:li:corpuser:jsmith"
assert dataset.owners[0].type == "urn:li:ownershipType:dataSteward"