2022-01-05 16:32:05 -08:00
|
|
|
from typing import Dict, List, Union
|
|
|
|
from unittest import mock
|
|
|
|
|
2022-08-07 06:42:53 +02:00
|
|
|
from pydantic import ValidationError
|
|
|
|
|
2022-01-05 16:32:05 -08:00
|
|
|
from datahub.emitter import mce_builder
|
|
|
|
from datahub.ingestion.api.common import PipelineContext
|
2022-11-21 14:14:33 -05:00
|
|
|
from datahub.ingestion.source.dbt.dbt_core import DBTCoreConfig, DBTCoreSource
|
2022-01-05 16:32:05 -08:00
|
|
|
from datahub.metadata.schema_classes import (
|
|
|
|
OwnerClass,
|
|
|
|
OwnershipSourceClass,
|
|
|
|
OwnershipSourceTypeClass,
|
|
|
|
OwnershipTypeClass,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def create_owners_list_from_urn_list(
|
|
|
|
owner_urns: List[str], source_type: str
|
|
|
|
) -> List[OwnerClass]:
|
|
|
|
ownership_source_type: Union[None, OwnershipSourceClass] = None
|
|
|
|
if source_type:
|
|
|
|
ownership_source_type = OwnershipSourceClass(type=source_type)
|
|
|
|
owners_list = [
|
|
|
|
OwnerClass(
|
|
|
|
owner=owner_urn,
|
|
|
|
type=OwnershipTypeClass.DATAOWNER,
|
|
|
|
source=ownership_source_type,
|
|
|
|
)
|
|
|
|
for owner_urn in owner_urns
|
|
|
|
]
|
|
|
|
return owners_list
|
|
|
|
|
|
|
|
|
2022-11-21 14:14:33 -05:00
|
|
|
def create_mocked_dbt_source() -> DBTCoreSource:
|
2022-01-05 16:32:05 -08:00
|
|
|
ctx = PipelineContext("test-run-id")
|
|
|
|
graph = mock.MagicMock()
|
|
|
|
graph.get_ownership.return_value = mce_builder.make_ownership_aspect_from_urn_list(
|
|
|
|
["urn:li:corpuser:test_user"], "AUDIT"
|
|
|
|
)
|
|
|
|
graph.get_glossary_terms.return_value = (
|
|
|
|
mce_builder.make_glossary_terms_aspect_from_urn_list(
|
|
|
|
["urn:li:glossaryTerm:old", "urn:li:glossaryTerm:old2"]
|
|
|
|
)
|
|
|
|
)
|
|
|
|
graph.get_tags.return_value = mce_builder.make_global_tag_aspect_with_tag_list(
|
|
|
|
["non_dbt_existing", "dbt:existing"]
|
|
|
|
)
|
|
|
|
ctx.graph = graph
|
2022-11-21 14:14:33 -05:00
|
|
|
return DBTCoreSource(DBTCoreConfig(**create_base_dbt_config()), ctx, "dbt")
|
2022-01-05 16:32:05 -08:00
|
|
|
|
|
|
|
|
|
|
|
def create_base_dbt_config() -> Dict:
|
|
|
|
return dict(
|
|
|
|
{
|
|
|
|
"manifest_path": "temp/",
|
|
|
|
"catalog_path": "temp/",
|
|
|
|
"sources_path": "temp/",
|
|
|
|
"target_platform": "postgres",
|
|
|
|
"enable_meta_mapping": False,
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def test_dbt_source_patching_no_new():
|
|
|
|
source = create_mocked_dbt_source()
|
|
|
|
|
|
|
|
# verifying when there are no new owners to be added
|
|
|
|
assert source.ctx.graph
|
|
|
|
transformed_owner_list = source.get_transformed_owners_by_source_type(
|
|
|
|
[], "urn:li:dataset:dummy", "SERVICE"
|
|
|
|
)
|
|
|
|
assert len(transformed_owner_list) == 1
|
|
|
|
|
|
|
|
|
|
|
|
def test_dbt_source_patching_no_conflict():
|
|
|
|
# verifying when new owners to be added do not conflict with existing source types
|
|
|
|
source = create_mocked_dbt_source()
|
|
|
|
new_owner_urns = ["urn:li:corpuser:new_test"]
|
|
|
|
new_owners_list = create_owners_list_from_urn_list(new_owner_urns, "SERVICE")
|
|
|
|
transformed_owner_list = source.get_transformed_owners_by_source_type(
|
|
|
|
new_owners_list, "urn:li:dataset:dummy", "DATABASE"
|
|
|
|
)
|
|
|
|
assert len(transformed_owner_list) == 2
|
|
|
|
owner_set = {"urn:li:corpuser:test_user", "urn:li:corpuser:new_test"}
|
|
|
|
for single_owner in transformed_owner_list:
|
|
|
|
assert single_owner.owner in owner_set
|
|
|
|
assert single_owner.source and single_owner.source.type in {
|
|
|
|
OwnershipSourceTypeClass.AUDIT,
|
|
|
|
OwnershipSourceTypeClass.SERVICE,
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def test_dbt_source_patching_with_conflict():
|
|
|
|
# verifying when new owner overrides existing owner
|
|
|
|
source = create_mocked_dbt_source()
|
|
|
|
new_owner_urns = ["urn:li:corpuser:new_test", "urn:li:corpuser:new_test2"]
|
|
|
|
new_owners_list = create_owners_list_from_urn_list(new_owner_urns, "AUDIT")
|
|
|
|
transformed_owner_list = source.get_transformed_owners_by_source_type(
|
|
|
|
new_owners_list, "urn:li:dataset:dummy", "AUDIT"
|
|
|
|
)
|
|
|
|
assert len(transformed_owner_list) == 2
|
|
|
|
expected_owner_set = {"urn:li:corpuser:new_test", "urn:li:corpuser:new_test2"}
|
|
|
|
for single_owner in transformed_owner_list:
|
|
|
|
assert single_owner.owner in expected_owner_set
|
|
|
|
assert (
|
|
|
|
single_owner.source
|
|
|
|
and single_owner.source.type == OwnershipSourceTypeClass.AUDIT
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def test_dbt_source_patching_with_conflict_null_source_type_in_existing_owner():
|
|
|
|
# verifying when existing owners have null source_type and new owners are present.
|
|
|
|
# So the existing owners will null type will be removed.
|
|
|
|
source = create_mocked_dbt_source()
|
|
|
|
graph = mock.MagicMock()
|
|
|
|
graph.get_ownership.return_value = mce_builder.make_ownership_aspect_from_urn_list(
|
|
|
|
["urn:li:corpuser:existing_test_user"], None
|
|
|
|
)
|
|
|
|
source.ctx.graph = graph
|
|
|
|
new_owner_urns = ["urn:li:corpuser:new_test", "urn:li:corpuser:new_test2"]
|
|
|
|
new_owners_list = create_owners_list_from_urn_list(new_owner_urns, "AUDIT")
|
|
|
|
transformed_owner_list = source.get_transformed_owners_by_source_type(
|
|
|
|
new_owners_list, "urn:li:dataset:dummy", "AUDIT"
|
|
|
|
)
|
|
|
|
assert len(transformed_owner_list) == 2
|
|
|
|
expected_owner_set = {"urn:li:corpuser:new_test", "urn:li:corpuser:new_test2"}
|
|
|
|
for single_owner in transformed_owner_list:
|
|
|
|
assert single_owner.owner in expected_owner_set
|
|
|
|
assert (
|
|
|
|
single_owner.source
|
|
|
|
and single_owner.source.type == OwnershipSourceTypeClass.AUDIT
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def test_dbt_source_patching_tags():
|
|
|
|
# two existing tags out of which one as a prefix that we want to filter on.
|
|
|
|
# two new tags out of which one has a prefix we are filtering on existing tags, so this tag will
|
|
|
|
# override the existing one with the same prefix.
|
|
|
|
source = create_mocked_dbt_source()
|
|
|
|
new_tag_aspect = mce_builder.make_global_tag_aspect_with_tag_list(
|
|
|
|
["new_non_dbt", "dbt:new_dbt"]
|
|
|
|
)
|
|
|
|
transformed_tags = source.get_transformed_tags_by_prefix(
|
|
|
|
new_tag_aspect.tags, "urn:li:dataset:dummy", "urn:li:tag:dbt:"
|
|
|
|
)
|
|
|
|
expected_tags = {
|
|
|
|
"urn:li:tag:new_non_dbt",
|
|
|
|
"urn:li:tag:non_dbt_existing",
|
|
|
|
"urn:li:tag:dbt:new_dbt",
|
|
|
|
}
|
|
|
|
assert len(transformed_tags) == 3
|
|
|
|
for transformed_tag in transformed_tags:
|
|
|
|
assert transformed_tag.tag in expected_tags
|
|
|
|
|
|
|
|
|
|
|
|
def test_dbt_source_patching_terms():
|
|
|
|
# existing terms and new terms have two terms each and one common. After deduping we should only get 3 unique terms
|
|
|
|
source = create_mocked_dbt_source()
|
|
|
|
new_terms = mce_builder.make_glossary_terms_aspect_from_urn_list(
|
|
|
|
["urn:li:glossaryTerm:old", "urn:li:glossaryTerm:new"]
|
|
|
|
)
|
|
|
|
transformed_terms = source.get_transformed_terms(
|
|
|
|
new_terms.terms, "urn:li:dataset:dummy"
|
|
|
|
)
|
|
|
|
expected_terms = {
|
|
|
|
"urn:li:glossaryTerm:old",
|
|
|
|
"urn:li:glossaryTerm:old2",
|
|
|
|
"urn:li:glossaryTerm:new",
|
|
|
|
}
|
|
|
|
assert len(transformed_terms) == 3
|
|
|
|
for transformed_term in transformed_terms:
|
|
|
|
assert transformed_term.urn in expected_terms
|
2022-08-07 06:42:53 +02:00
|
|
|
|
|
|
|
|
|
|
|
def test_dbt_entity_emission_configuration():
|
|
|
|
config_dict = {
|
|
|
|
"manifest_path": "dummy_path",
|
|
|
|
"catalog_path": "dummy_path",
|
|
|
|
"target_platform": "dummy_platform",
|
|
|
|
"entities_enabled": {"models": "Only", "seeds": "Only"},
|
|
|
|
}
|
|
|
|
try:
|
2022-11-21 14:14:33 -05:00
|
|
|
DBTCoreConfig.parse_obj(config_dict)
|
2022-08-07 06:42:53 +02:00
|
|
|
except ValidationError as ve:
|
|
|
|
assert len(ve.errors()) == 1
|
|
|
|
assert (
|
|
|
|
"Cannot have more than 1 type of entity emission set to ONLY"
|
|
|
|
in ve.errors()[0]["msg"]
|
|
|
|
)
|
|
|
|
# valid config
|
|
|
|
config_dict = {
|
|
|
|
"manifest_path": "dummy_path",
|
|
|
|
"catalog_path": "dummy_path",
|
|
|
|
"target_platform": "dummy_platform",
|
|
|
|
"entities_enabled": {"models": "Yes", "seeds": "Only"},
|
|
|
|
}
|
2022-11-21 14:14:33 -05:00
|
|
|
DBTCoreConfig.parse_obj(config_dict)
|
2022-08-07 06:42:53 +02:00
|
|
|
|
|
|
|
|
|
|
|
def test_dbt_entity_emission_configuration_helpers():
|
|
|
|
config_dict = {
|
|
|
|
"manifest_path": "dummy_path",
|
|
|
|
"catalog_path": "dummy_path",
|
|
|
|
"target_platform": "dummy_platform",
|
|
|
|
"entities_enabled": {
|
|
|
|
"models": "Only",
|
|
|
|
},
|
|
|
|
}
|
2022-11-21 14:14:33 -05:00
|
|
|
config = DBTCoreConfig.parse_obj(config_dict)
|
2022-08-07 06:42:53 +02:00
|
|
|
assert config.entities_enabled.can_emit_node_type("model")
|
|
|
|
assert not config.entities_enabled.can_emit_node_type("source")
|
|
|
|
assert not config.entities_enabled.can_emit_node_type("test")
|
|
|
|
assert not config.entities_enabled.can_emit_test_results
|
|
|
|
|
|
|
|
config_dict = {
|
|
|
|
"manifest_path": "dummy_path",
|
|
|
|
"catalog_path": "dummy_path",
|
|
|
|
"target_platform": "dummy_platform",
|
|
|
|
}
|
2022-11-21 14:14:33 -05:00
|
|
|
config = DBTCoreConfig.parse_obj(config_dict)
|
2022-08-07 06:42:53 +02:00
|
|
|
assert config.entities_enabled.can_emit_node_type("model")
|
|
|
|
assert config.entities_enabled.can_emit_node_type("source")
|
|
|
|
assert config.entities_enabled.can_emit_node_type("test")
|
|
|
|
assert config.entities_enabled.can_emit_test_results
|
|
|
|
|
|
|
|
config_dict = {
|
|
|
|
"manifest_path": "dummy_path",
|
|
|
|
"catalog_path": "dummy_path",
|
|
|
|
"target_platform": "dummy_platform",
|
|
|
|
"entities_enabled": {
|
|
|
|
"test_results": "Only",
|
|
|
|
},
|
|
|
|
}
|
2022-11-21 14:14:33 -05:00
|
|
|
config = DBTCoreConfig.parse_obj(config_dict)
|
2022-08-07 06:42:53 +02:00
|
|
|
assert not config.entities_enabled.can_emit_node_type("model")
|
|
|
|
assert not config.entities_enabled.can_emit_node_type("source")
|
|
|
|
assert not config.entities_enabled.can_emit_node_type("test")
|
|
|
|
assert config.entities_enabled.can_emit_test_results
|
|
|
|
|
|
|
|
config_dict = {
|
|
|
|
"manifest_path": "dummy_path",
|
|
|
|
"catalog_path": "dummy_path",
|
|
|
|
"target_platform": "dummy_platform",
|
|
|
|
"entities_enabled": {
|
|
|
|
"test_results": "Yes",
|
|
|
|
"test_definitions": "Yes",
|
|
|
|
"models": "No",
|
|
|
|
"sources": "No",
|
|
|
|
},
|
|
|
|
}
|
2022-11-21 14:14:33 -05:00
|
|
|
config = DBTCoreConfig.parse_obj(config_dict)
|
2022-08-07 06:42:53 +02:00
|
|
|
assert not config.entities_enabled.can_emit_node_type("model")
|
|
|
|
assert not config.entities_enabled.can_emit_node_type("source")
|
|
|
|
assert config.entities_enabled.can_emit_node_type("test")
|
|
|
|
assert config.entities_enabled.can_emit_test_results
|