mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-09 23:40:05 +00:00
* Added support for sources key metadata fetch in dbt * address comments Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Fixes * fixed tests --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
parent
cb42409999
commit
30bceee580
@ -105,7 +105,6 @@ class SkipResourceTypeEnum(Enum):
|
|||||||
|
|
||||||
ANALYSIS = "analysis"
|
ANALYSIS = "analysis"
|
||||||
TEST = "test"
|
TEST = "test"
|
||||||
SOURCE = "source"
|
|
||||||
|
|
||||||
|
|
||||||
class CompiledQueriesEnum(Enum):
|
class CompiledQueriesEnum(Enum):
|
||||||
|
|||||||
@ -543,7 +543,7 @@ class DbtSource(DbtServiceSource):
|
|||||||
|
|
||||||
if (
|
if (
|
||||||
dbt_objects.dbt_sources
|
dbt_objects.dbt_sources
|
||||||
and resource_type == SkipResourceTypeEnum.SOURCE.value
|
and resource_type == DbtCommonEnum.SOURCE.value
|
||||||
):
|
):
|
||||||
self.add_dbt_sources(
|
self.add_dbt_sources(
|
||||||
key,
|
key,
|
||||||
@ -708,20 +708,6 @@ class DbtSource(DbtServiceSource):
|
|||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if dbt_node.resource_type == SkipResourceTypeEnum.SOURCE.value:
|
|
||||||
parent_fqn = fqn.build(
|
|
||||||
self.metadata,
|
|
||||||
entity_type=Table,
|
|
||||||
service_name=self.config.serviceName,
|
|
||||||
database_name=get_corrected_name(dbt_node.database),
|
|
||||||
schema_name=get_corrected_name(dbt_node.schema_),
|
|
||||||
table_name=dbt_node.name,
|
|
||||||
)
|
|
||||||
|
|
||||||
# check if the parent table exists in OM before adding it to the upstream list
|
|
||||||
if self._get_table_entity(table_fqn=parent_fqn):
|
|
||||||
upstream_nodes.append(parent_fqn)
|
|
||||||
|
|
||||||
return upstream_nodes
|
return upstream_nodes
|
||||||
|
|
||||||
def parse_data_model_columns(
|
def parse_data_model_columns(
|
||||||
|
|||||||
@ -1104,3 +1104,246 @@ class DbtUnitTest(TestCase):
|
|||||||
]
|
]
|
||||||
|
|
||||||
assert len(list(filter(lambda x: x is not None, parsed_exposures))) == 0
|
assert len(list(filter(lambda x: x is not None, parsed_exposures))) == 0
|
||||||
|
|
||||||
|
def test_constants_required_constraint_keys(self):
|
||||||
|
"""Test REQUIRED_CONSTRAINT_KEYS constant"""
|
||||||
|
from metadata.ingestion.source.database.dbt.constants import (
|
||||||
|
REQUIRED_CONSTRAINT_KEYS,
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_keys = [
|
||||||
|
"type",
|
||||||
|
"name",
|
||||||
|
"expression",
|
||||||
|
"warn_unenforced",
|
||||||
|
"warn_unsupported",
|
||||||
|
]
|
||||||
|
self.assertEqual(REQUIRED_CONSTRAINT_KEYS, expected_keys)
|
||||||
|
|
||||||
|
def test_constants_required_results_keys(self):
|
||||||
|
"""Test REQUIRED_RESULTS_KEYS constant"""
|
||||||
|
from metadata.ingestion.source.database.dbt.constants import (
|
||||||
|
REQUIRED_RESULTS_KEYS,
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_keys = {
|
||||||
|
"status",
|
||||||
|
"timing",
|
||||||
|
"thread_id",
|
||||||
|
"execution_time",
|
||||||
|
"message",
|
||||||
|
"adapter_response",
|
||||||
|
"unique_id",
|
||||||
|
}
|
||||||
|
self.assertEqual(REQUIRED_RESULTS_KEYS, expected_keys)
|
||||||
|
|
||||||
|
def test_constants_required_node_keys(self):
|
||||||
|
"""Test REQUIRED_NODE_KEYS constant"""
|
||||||
|
from metadata.ingestion.source.database.dbt.constants import REQUIRED_NODE_KEYS
|
||||||
|
|
||||||
|
expected_keys = {
|
||||||
|
"schema_",
|
||||||
|
"schema",
|
||||||
|
"freshness",
|
||||||
|
"name",
|
||||||
|
"resource_type",
|
||||||
|
"path",
|
||||||
|
"unique_id",
|
||||||
|
"source_name",
|
||||||
|
"source_description",
|
||||||
|
"source_meta",
|
||||||
|
"loader",
|
||||||
|
"identifier",
|
||||||
|
"relation_name",
|
||||||
|
"fqn",
|
||||||
|
"alias",
|
||||||
|
"checksum",
|
||||||
|
"config",
|
||||||
|
"column_name",
|
||||||
|
"test_metadata",
|
||||||
|
"original_file_path",
|
||||||
|
"root_path",
|
||||||
|
"database",
|
||||||
|
"tags",
|
||||||
|
"description",
|
||||||
|
"columns",
|
||||||
|
"meta",
|
||||||
|
"owner",
|
||||||
|
"created_at",
|
||||||
|
"group",
|
||||||
|
"sources",
|
||||||
|
"compiled",
|
||||||
|
"docs",
|
||||||
|
"version",
|
||||||
|
"latest_version",
|
||||||
|
"package_name",
|
||||||
|
"depends_on",
|
||||||
|
"compiled_code",
|
||||||
|
"compiled_sql",
|
||||||
|
"raw_code",
|
||||||
|
"raw_sql",
|
||||||
|
"language",
|
||||||
|
}
|
||||||
|
self.assertEqual(REQUIRED_NODE_KEYS, expected_keys)
|
||||||
|
|
||||||
|
def test_constants_none_keywords_list(self):
|
||||||
|
"""Test NONE_KEYWORDS_LIST constant"""
|
||||||
|
from metadata.ingestion.source.database.dbt.constants import NONE_KEYWORDS_LIST
|
||||||
|
|
||||||
|
expected_keywords = ["none", "null"]
|
||||||
|
self.assertEqual(NONE_KEYWORDS_LIST, expected_keywords)
|
||||||
|
|
||||||
|
def test_constants_exposure_type_map(self):
|
||||||
|
"""Test ExposureTypeMap constant"""
|
||||||
|
from metadata.ingestion.source.database.dbt.constants import ExposureTypeMap
|
||||||
|
|
||||||
|
self.assertIn("dashboard", ExposureTypeMap)
|
||||||
|
self.assertIn("ml", ExposureTypeMap)
|
||||||
|
self.assertIn("application", ExposureTypeMap)
|
||||||
|
|
||||||
|
dashboard_mapping = ExposureTypeMap["dashboard"]
|
||||||
|
self.assertEqual(dashboard_mapping["entity_type"], Dashboard)
|
||||||
|
self.assertEqual(dashboard_mapping["entity_type_name"], "dashboard")
|
||||||
|
|
||||||
|
ml_mapping = ExposureTypeMap["ml"]
|
||||||
|
self.assertEqual(ml_mapping["entity_type"], MlModel)
|
||||||
|
self.assertEqual(ml_mapping["entity_type_name"], "mlmodel")
|
||||||
|
|
||||||
|
app_mapping = ExposureTypeMap["application"]
|
||||||
|
self.assertEqual(app_mapping["entity_type"], APIEndpoint)
|
||||||
|
self.assertEqual(app_mapping["entity_type_name"], "apiEndpoint")
|
||||||
|
|
||||||
|
def test_parse_upstream_nodes_source_schema_handling(self):
|
||||||
|
"""Test that source nodes get schema name '*' in upstream parsing"""
|
||||||
|
from metadata.ingestion.source.database.dbt.constants import DbtCommonEnum
|
||||||
|
|
||||||
|
mock_source_node = MagicMock()
|
||||||
|
mock_source_node.resource_type = DbtCommonEnum.SOURCE.value
|
||||||
|
mock_source_node.database = "test_db"
|
||||||
|
mock_source_node.schema_ = "test_schema"
|
||||||
|
mock_source_node.name = "test_source"
|
||||||
|
|
||||||
|
mock_model_node = MagicMock()
|
||||||
|
mock_model_node.depends_on.nodes = ["source.test.test_source"]
|
||||||
|
|
||||||
|
manifest_entities = {"source.test.test_source": mock_source_node}
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
self.dbt_source_obj,
|
||||||
|
"_get_table_entity",
|
||||||
|
return_value=MOCK_TABLE_ENTITIES[0],
|
||||||
|
):
|
||||||
|
with patch.object(
|
||||||
|
self.dbt_source_obj,
|
||||||
|
"is_filtered",
|
||||||
|
return_value=MagicMock(is_filtered=False),
|
||||||
|
):
|
||||||
|
with patch(
|
||||||
|
"metadata.ingestion.source.database.dbt.dbt_utils.get_dbt_model_name",
|
||||||
|
return_value="test_source",
|
||||||
|
):
|
||||||
|
with patch(
|
||||||
|
"metadata.ingestion.source.database.dbt.dbt_utils.get_corrected_name",
|
||||||
|
side_effect=lambda x: x,
|
||||||
|
):
|
||||||
|
with patch("metadata.utils.fqn.build") as mock_fqn_build:
|
||||||
|
mock_fqn_build.return_value = (
|
||||||
|
"test.*.test_schema.test_source"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.dbt_source_obj.parse_upstream_nodes(
|
||||||
|
manifest_entities, mock_model_node
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify that schema_name="*" was used for source node
|
||||||
|
calls = mock_fqn_build.call_args_list
|
||||||
|
schema_name_used = None
|
||||||
|
for call in calls:
|
||||||
|
if "schema_name" in call[1]:
|
||||||
|
schema_name_used = call[1]["schema_name"]
|
||||||
|
break
|
||||||
|
self.assertEqual(schema_name_used, "test_schema")
|
||||||
|
|
||||||
|
def test_yield_data_models_processes_sources_key(self):
|
||||||
|
"""Test that yield_data_models processes both nodes and sources keys from manifest"""
|
||||||
|
mock_manifest = MagicMock()
|
||||||
|
mock_manifest.sources = {"source.test.table1": MagicMock()}
|
||||||
|
mock_manifest.nodes = {"model.test.table2": MagicMock()}
|
||||||
|
mock_manifest.exposures = {"exposure.test.dashboard1": MagicMock()}
|
||||||
|
|
||||||
|
mock_dbt_objects = MagicMock()
|
||||||
|
mock_dbt_objects.dbt_manifest = mock_manifest
|
||||||
|
mock_dbt_objects.dbt_catalog = None
|
||||||
|
mock_dbt_objects.dbt_run_results = None
|
||||||
|
mock_dbt_objects.dbt_sources = None
|
||||||
|
|
||||||
|
# Verify that manifest_entities includes both sources and nodes
|
||||||
|
for data_model in self.dbt_source_obj.yield_data_models(mock_dbt_objects):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# The method should process entities from sources, nodes, and exposures
|
||||||
|
# We expect the context to be populated with the combined entities
|
||||||
|
self.assertTrue(hasattr(self.dbt_source_obj.context.get(), "data_model_links"))
|
||||||
|
self.assertTrue(hasattr(self.dbt_source_obj.context.get(), "exposures"))
|
||||||
|
self.assertTrue(hasattr(self.dbt_source_obj.context.get(), "dbt_tests"))
|
||||||
|
|
||||||
|
def test_yield_data_models_source_node_schema_handling(self):
|
||||||
|
"""Test that source nodes has correct schema name in yield_data_models"""
|
||||||
|
from metadata.ingestion.source.database.dbt.constants import DbtCommonEnum
|
||||||
|
|
||||||
|
# Create a mock source node
|
||||||
|
mock_source_node = MagicMock()
|
||||||
|
mock_source_node.resource_type = DbtCommonEnum.SOURCE.value
|
||||||
|
mock_source_node.database = "test_db"
|
||||||
|
mock_source_node.schema_ = "actual_schema"
|
||||||
|
mock_source_node.name = "test_source"
|
||||||
|
mock_source_node.description = "Test source description"
|
||||||
|
mock_source_node.tags = []
|
||||||
|
mock_source_node.meta = {}
|
||||||
|
|
||||||
|
manifest_entities = {"source.test.test_source": mock_source_node}
|
||||||
|
|
||||||
|
mock_dbt_objects = MagicMock()
|
||||||
|
mock_dbt_objects.dbt_manifest.sources = manifest_entities
|
||||||
|
mock_dbt_objects.dbt_manifest.nodes = {}
|
||||||
|
mock_dbt_objects.dbt_manifest.exposures = {}
|
||||||
|
mock_dbt_objects.dbt_catalog = None
|
||||||
|
mock_dbt_objects.dbt_run_results = None
|
||||||
|
mock_dbt_objects.dbt_sources = None
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
self.dbt_source_obj,
|
||||||
|
"is_filtered",
|
||||||
|
return_value=MagicMock(is_filtered=False),
|
||||||
|
):
|
||||||
|
with patch.object(
|
||||||
|
self.dbt_source_obj,
|
||||||
|
"_get_table_entity",
|
||||||
|
return_value=MOCK_TABLE_ENTITIES[0],
|
||||||
|
):
|
||||||
|
with patch(
|
||||||
|
"metadata.ingestion.source.database.dbt.dbt_utils.get_dbt_model_name",
|
||||||
|
return_value="test_source",
|
||||||
|
):
|
||||||
|
with patch(
|
||||||
|
"metadata.ingestion.source.database.dbt.dbt_utils.get_corrected_name",
|
||||||
|
side_effect=lambda x: x,
|
||||||
|
):
|
||||||
|
with patch("metadata.utils.fqn.build") as mock_fqn_build:
|
||||||
|
mock_fqn_build.return_value = (
|
||||||
|
"test_service.test_db.*.test_source"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process the source node
|
||||||
|
list(
|
||||||
|
self.dbt_source_obj.yield_data_models(mock_dbt_objects)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify that schema_name="*" was used for source node
|
||||||
|
calls = mock_fqn_build.call_args_list
|
||||||
|
schema_name_used = None
|
||||||
|
for call in calls:
|
||||||
|
if "schema_name" in call[1]:
|
||||||
|
schema_name_used = call[1]["schema_name"]
|
||||||
|
break
|
||||||
|
self.assertEqual(schema_name_used, "actual_schema")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user