datahub/metadata-ingestion/tests/unit/test_mce_builder.py

108 lines
3.7 KiB
Python

from typing import Dict, Optional, Tuple
import pytest
import datahub.emitter.mce_builder as builder
from datahub.metadata.schema_classes import (
DataFlowInfoClass,
DatasetPropertiesClass,
DatasetSnapshotClass,
MetadataChangeEventClass,
OwnershipClass,
)
def test_can_add_aspect():
dataset_mce: MetadataChangeEventClass = builder.make_lineage_mce(
[
builder.make_dataset_urn("bigquery", "upstream1"),
builder.make_dataset_urn("bigquery", "upstream2"),
],
builder.make_dataset_urn("bigquery", "downstream"),
)
assert isinstance(dataset_mce.proposedSnapshot, DatasetSnapshotClass)
assert builder.can_add_aspect(dataset_mce, DatasetPropertiesClass)
assert builder.can_add_aspect(dataset_mce, OwnershipClass)
assert not builder.can_add_aspect(dataset_mce, DataFlowInfoClass)
test_make_dataset_urns_params: Dict[
str, Tuple[Tuple[str, str, Optional[str], str], str]
] = {
"athena": (
("athena", "ATABLE", "MY_INSTANCE", "PROD"),
"urn:li:dataset:(urn:li:dataPlatform:athena,MY_INSTANCE.atable,PROD)",
),
"bigquery": (
("bigquery", "ATable", "MY_INSTANCE", "PROD"),
"urn:li:dataset:(urn:li:dataPlatform:bigquery,MY_INSTANCE.atable,PROD)",
),
"bigquery_no_instance": (
("bigquery", "ATable", None, "PROD"),
"urn:li:dataset:(urn:li:dataPlatform:bigquery,atable,PROD)",
),
"druid": (
("druid", "AtaBLE", "MY_INSTANCE", "PROD"),
"urn:li:dataset:(urn:li:dataPlatform:druid,MY_INSTANCE.atable,PROD)",
),
"hive": (
("hive", "ataBLE", "MY_INSTANCE", "PROD"),
"urn:li:dataset:(urn:li:dataPlatform:hive,MY_INSTANCE.atable,PROD)",
),
"mariadb": (
("mariadb", "aTAble", "MY_INSTANCE", "PROD"),
"urn:li:dataset:(urn:li:dataPlatform:mariadb,MY_INSTANCE.atable,PROD)",
),
"mssql": (
("mssql", "aTAblE", "MY_INSTANCE", "PROD"),
"urn:li:dataset:(urn:li:dataPlatform:mssql,MY_INSTANCE.atable,PROD)",
),
"mysql": (
("mysql", "aTABlE", "MY_INSTANCE", "PROD"),
"urn:li:dataset:(urn:li:dataPlatform:mysql,MY_INSTANCE.atable,PROD)",
),
"oracle": (
("oracle", "AtAbLe", "MY_INSTANCE", "PROD"),
"urn:li:dataset:(urn:li:dataPlatform:oracle,MY_INSTANCE.atable,PROD)",
),
"postgres": (
("postgres", "AtAbLE", "MY_INSTANCE", "PROD"),
"urn:li:dataset:(urn:li:dataPlatform:postgres,MY_INSTANCE.atable,PROD)",
),
"redshift": (
("redshift", "atAbLE", "MY_INSTANCE", "PROD"),
"urn:li:dataset:(urn:li:dataPlatform:redshift,MY_INSTANCE.atable,PROD)",
),
"snowflake": (
("snowflake", "atABle", "MY_INSTANCE", "PROD"),
"urn:li:dataset:(urn:li:dataPlatform:snowflake,MY_INSTANCE.atable,PROD)",
),
"trino": (
("trino", "AtaBle", "MY_INSTANCE", "PROD"),
"urn:li:dataset:(urn:li:dataPlatform:trino,MY_INSTANCE.atable,PROD)",
),
"kafka_no_lower_casing": (
("kafka", "MyKafkaTopic", "MY_INSTANCE", "PROD"),
"urn:li:dataset:(urn:li:dataPlatform:kafka,MY_INSTANCE.MyKafkaTopic,PROD)",
),
"kafka_no_instance_no_lower_casing": (
("kafka", "MyKafkaTopic", None, "PROD"),
"urn:li:dataset:(urn:li:dataPlatform:kafka,MyKafkaTopic,PROD)",
),
}
@pytest.mark.parametrize(
"urnParts, expected",
test_make_dataset_urns_params.values(),
ids=test_make_dataset_urns_params.keys(),
)
def test_make_dataset_urns(
urnParts: Tuple[str, str, Optional[str], str], expected: str
) -> None:
dataset_urn = builder.make_dataset_urn_with_platform_instance(
urnParts[0], urnParts[1], urnParts[2], urnParts[3]
)
assert dataset_urn == expected