From b3bfa6433e2b450f1cf2a8fa5712cc65c8a1adb9 Mon Sep 17 00:00:00 2001 From: Suman Maharana Date: Tue, 24 Sep 2024 23:39:17 +0530 Subject: [PATCH] Fix Manifest is not parsed correctly on dbt versionless (#17975) --- .../source/database/dbt/dbt_service.py | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/ingestion/src/metadata/ingestion/source/database/dbt/dbt_service.py b/ingestion/src/metadata/ingestion/source/database/dbt/dbt_service.py index 409c3e4194a..911fbc75b96 100644 --- a/ingestion/src/metadata/ingestion/source/database/dbt/dbt_service.py +++ b/ingestion/src/metadata/ingestion/source/database/dbt/dbt_service.py @@ -160,7 +160,7 @@ class DbtServiceSource(TopologyRunnerMixin, Source, ABC): # This step is necessary as the manifest file may not always adhere to the schema definition # and the presence of other nodes can hinder the ingestion process from progressing any further. # Therefore, we are only retaining the essential data for further processing. - required_manifest_keys = ["nodes", "sources", "metadata"] + required_manifest_keys = {"nodes", "sources", "metadata"} manifest_dict.update( { key: {} @@ -169,6 +169,38 @@ class DbtServiceSource(TopologyRunnerMixin, Source, ABC): } ) + required_nodes_keys = { + "schema_", + "schema", + "name", + "resource_type", + "path", + "unique_id", + "fqn", + "alias", + "checksum", + "config", + "column_name", + "test_metadata", + "original_file_path", + "root_path", + "database", + "tags", + "description", + "columns", + "meta", + "package_name", + } + + for node, value in manifest_dict.get( + "nodes" + ).items(): # pylint: disable=unused_variable + keys_to_delete = [ + key for key in value if key.lower() not in required_nodes_keys + ] + for key in keys_to_delete: + del value[key] + def get_dbt_files(self) -> Iterable[DbtFiles]: dbt_files = get_dbt_details(self.source_config.dbtConfigSource) for dbt_file in dbt_files: