From 6d94a03fc7a638c724a8a47714fcdc0b89171d97 Mon Sep 17 00:00:00 2001 From: Onkar Ravgan Date: Wed, 8 Feb 2023 17:12:19 +0530 Subject: [PATCH] Added dbt fixes (#10132) --- .../ingestion/source/database/dbt/metadata.py | 25 +++++++++++------ ingestion/src/metadata/utils/dbt_config.py | 28 +++++++++++++------ 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/database/dbt/metadata.py b/ingestion/src/metadata/ingestion/source/database/dbt/metadata.py index dd8696341c8..276b2a4c581 100644 --- a/ingestion/src/metadata/ingestion/source/database/dbt/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/dbt/metadata.py @@ -369,9 +369,12 @@ class DbtSource(DbtServiceSource): # pylint: disable=too-many-public-methods DbtCommonEnum.UPSTREAM.value ] = self.parse_upstream_nodes(manifest_entities, manifest_node) self.context.dbt_tests[key][DbtCommonEnum.RESULTS.value] = next( - item - for item in dbt_objects.dbt_run_results.results - if item.unique_id == key + ( + item + for item in dbt_objects.dbt_run_results.results + if item.unique_id == key + ), + None, ) def yield_data_models(self, dbt_objects: DbtObjects) -> Iterable[DataModelLink]: @@ -418,7 +421,7 @@ class DbtSource(DbtServiceSource): # pylint: disable=too-many-public-methods model_name = ( manifest_node.alias - if manifest_node.alias + if hasattr(manifest_node, "alias") and manifest_node.alias else manifest_node.name ) logger.info(f"Processing DBT node: {model_name}") @@ -498,7 +501,9 @@ class DbtSource(DbtServiceSource): # pylint: disable=too-many-public-methods try: parent_node = manifest_entities[node] table_name = ( - parent_node.alias if parent_node.alias else parent_node.name + parent_node.alias + if hasattr(parent_node, "alias") and parent_node.alias + else parent_node.name ) parent_fqn = fqn.build( self.metadata, @@ -507,7 +512,7 @@ class DbtSource(DbtServiceSource): # pylint: disable=too-many-public-methods database_name=parent_node.database, schema_name=parent_node.schema_, table_name=table_name, - ).lower() + ) if parent_fqn: upstream_nodes.append(parent_fqn) except Exception as exc: # pylint: disable=broad-except @@ -536,12 +541,16 @@ class DbtSource(DbtServiceSource): # pylint: disable=too-many-public-methods column_name = ( catalog_column.name if catalog_column else manifest_column.name ) + column_description = None + if catalog_column and catalog_column.comment: + column_description = catalog_column.comment + columns.append( Column( name=column_name, description=manifest_column.description if manifest_column.description - else catalog_column.comment, + else column_description, dataType=ColumnTypeParser.get_column_type( catalog_column.type if catalog_column @@ -863,7 +872,7 @@ class DbtSource(DbtServiceSource): # pylint: disable=too-many-public-methods except Exception as err: # pylint: disable=broad-except logger.debug(traceback.format_exc()) logger.error( - f"Failed capture tests results for node: {manifest_node.name} {err}" + f"Failed to capture tests results for node: {manifest_node.name} {err}" ) def create_test_case_parameter_definitions(self, dbt_test): diff --git a/ingestion/src/metadata/utils/dbt_config.py b/ingestion/src/metadata/utils/dbt_config.py index d6fd713c6a1..918a066c7e6 100644 --- a/ingestion/src/metadata/utils/dbt_config.py +++ b/ingestion/src/metadata/utils/dbt_config.py @@ -182,18 +182,30 @@ def _(config: DbtCloudConfig): # pylint: disable=too-many-locals runs_data = response.get("data") if runs_data: run_id = runs_data[0]["id"] - logger.debug("Requesting [dbt_catalog]") - dbt_catalog = client.get( - f"/accounts/{account_id}/runs/{run_id}/artifacts/{DBT_CATALOG_FILE_NAME}" - ) + try: + logger.debug("Requesting [dbt_catalog]") + dbt_catalog = client.get( + f"/accounts/{account_id}/runs/{run_id}/artifacts/{DBT_CATALOG_FILE_NAME}" + ) + except Exception as exc: + logger.info( + f"dbt catalog file not found, skipping the catalog file: {exc}" + ) + logger.debug(traceback.format_exc()) logger.debug("Requesting [dbt_manifest]") dbt_manifest = client.get( f"/accounts/{account_id}/runs/{run_id}/artifacts/{DBT_MANIFEST_FILE_NAME}" ) - logger.debug("Requesting [dbt_run_results]") - dbt_run_results = client.get( - f"/accounts/{account_id}/runs/{run_id}/artifacts/{DBT_RUN_RESULTS_FILE_NAME}" - ) + try: + logger.debug("Requesting [dbt_run_results]") + dbt_run_results = client.get( + f"/accounts/{account_id}/runs/{run_id}/artifacts/{DBT_RUN_RESULTS_FILE_NAME}" + ) + except Exception as exc: + logger.info( + f"dbt run_results file not found, skipping dbt tests: {exc}" + ) + logger.debug(traceback.format_exc()) if not dbt_manifest: raise DBTConfigException("Manifest file not found in DBT Cloud")