From 3cede10ab30e22dcad286bd42bcd154732e40942 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 5 Oct 2023 13:29:47 -0400 Subject: [PATCH] feat(ingest/dbt): support `use_compiled_code` and `test_warnings_are_errors` (#8956) --- .../datahub/configuration/source_common.py | 2 +- ...ation.py => validate_field_deprecation.py} | 14 +++++-- .../ingestion/source/dbt/dbt_common.py | 41 ++++++++++++++----- .../src/datahub/ingestion/source/file.py | 2 +- .../ingestion/source/powerbi/config.py | 2 +- .../ingestion/source/redshift/config.py | 2 +- .../src/datahub/ingestion/source/s3/config.py | 2 +- .../ingestion/source/sql/clickhouse.py | 2 +- .../ingestion/source/sql/sql_config.py | 2 +- .../src/datahub/ingestion/source/tableau.py | 2 +- .../tests/unit/test_pydantic_validators.py | 2 +- 11 files changed, 51 insertions(+), 22 deletions(-) rename metadata-ingestion/src/datahub/configuration/{pydantic_field_deprecation.py => validate_field_deprecation.py} (74%) diff --git a/metadata-ingestion/src/datahub/configuration/source_common.py b/metadata-ingestion/src/datahub/configuration/source_common.py index 37b93f3e59..a9f891ddb7 100644 --- a/metadata-ingestion/src/datahub/configuration/source_common.py +++ b/metadata-ingestion/src/datahub/configuration/source_common.py @@ -4,7 +4,7 @@ from pydantic import validator from pydantic.fields import Field from datahub.configuration.common import ConfigModel, ConfigurationError -from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated +from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated from datahub.metadata.schema_classes import FabricTypeClass DEFAULT_ENV = FabricTypeClass.PROD diff --git a/metadata-ingestion/src/datahub/configuration/pydantic_field_deprecation.py b/metadata-ingestion/src/datahub/configuration/validate_field_deprecation.py similarity index 74% rename from metadata-ingestion/src/datahub/configuration/pydantic_field_deprecation.py rename to metadata-ingestion/src/datahub/configuration/validate_field_deprecation.py index ed82acb594..6134c4dab4 100644 --- a/metadata-ingestion/src/datahub/configuration/pydantic_field_deprecation.py +++ b/metadata-ingestion/src/datahub/configuration/validate_field_deprecation.py @@ -1,20 +1,28 @@ import warnings -from typing import Optional, Type +from typing import Any, Optional, Type import pydantic from datahub.configuration.common import ConfigurationWarning from datahub.utilities.global_warning_util import add_global_warning +_unset = object() -def pydantic_field_deprecated(field: str, message: Optional[str] = None) -> classmethod: + +def pydantic_field_deprecated( + field: str, + warn_if_value_is_not: Any = _unset, + message: Optional[str] = None, +) -> classmethod: if message: output = message else: output = f"{field} is deprecated and will be removed in a future release. Please remove it from your config." def _validate_deprecated(cls: Type, values: dict) -> dict: - if field in values: + if field in values and ( + warn_if_value_is_not is _unset or values[field] != warn_if_value_is_not + ): add_global_warning(output) warnings.warn(output, ConfigurationWarning, stacklevel=2) return values diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index f9b7189297..0f5c08eb6a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -18,8 +18,8 @@ from datahub.configuration.common import ( ConfigurationError, LineageConfig, ) -from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated from datahub.configuration.source_common import DatasetSourceConfigMixin +from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated from datahub.emitter import mce_builder from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext @@ -214,7 +214,9 @@ class DBTCommonConfig( default=False, description="Use model identifier instead of model name if defined (if not, default to model name).", ) - _deprecate_use_identifiers = pydantic_field_deprecated("use_identifiers") + _deprecate_use_identifiers = pydantic_field_deprecated( + "use_identifiers", warn_if_value_is_not=False + ) entities_enabled: DBTEntitiesEnabled = Field( DBTEntitiesEnabled(), @@ -278,6 +280,14 @@ class DBTCommonConfig( description="When enabled, converts column URNs to lowercase to ensure cross-platform compatibility. " "If `target_platform` is Snowflake, the default is True.", ) + use_compiled_code: bool = Field( + default=False, + description="When enabled, uses the compiled dbt code instead of the raw dbt node definition.", + ) + test_warnings_are_errors: bool = Field( + default=False, + description="When enabled, dbt test warnings will be treated as failures.", + ) @validator("target_platform") def validate_target_platform_value(cls, target_platform: str) -> str: @@ -811,7 +821,7 @@ class DBTSourceBase(StatefulIngestionSourceBase): mce_builder.make_schema_field_urn(upstream_urn, column_name) ], nativeType=node.name, - logic=node.compiled_code if node.compiled_code else node.raw_code, + logic=node.compiled_code or node.raw_code, aggregation=AssertionStdAggregationClass._NATIVE_, nativeParameters=string_map(kw_args), ), @@ -825,7 +835,7 @@ class DBTSourceBase(StatefulIngestionSourceBase): dataset=upstream_urn, scope=DatasetAssertionScopeClass.DATASET_ROWS, operator=AssertionStdOperatorClass._NATIVE_, - logic=node.compiled_code if node.compiled_code else node.raw_code, + logic=node.compiled_code or node.raw_code, nativeType=node.name, aggregation=AssertionStdAggregationClass._NATIVE_, nativeParameters=string_map(kw_args), @@ -856,6 +866,10 @@ class DBTSourceBase(StatefulIngestionSourceBase): result=AssertionResultClass( type=AssertionResultTypeClass.SUCCESS if test_result.status == "pass" + or ( + not self.config.test_warnings_are_errors + and test_result.status == "warn" + ) else AssertionResultTypeClass.FAILURE, nativeResults=test_result.native_results, ), @@ -1007,8 +1021,8 @@ class DBTSourceBase(StatefulIngestionSourceBase): aspects.append(upstream_lineage_class) # add view properties aspect - if node.raw_code and node.language == "sql": - view_prop_aspect = self._create_view_properties_aspect(node) + view_prop_aspect = self._create_view_properties_aspect(node) + if view_prop_aspect: aspects.append(view_prop_aspect) # emit subtype mcp @@ -1133,14 +1147,21 @@ class DBTSourceBase(StatefulIngestionSourceBase): def get_external_url(self, node: DBTNode) -> Optional[str]: pass - def _create_view_properties_aspect(self, node: DBTNode) -> ViewPropertiesClass: + def _create_view_properties_aspect( + self, node: DBTNode + ) -> Optional[ViewPropertiesClass]: + view_logic = ( + node.compiled_code if self.config.use_compiled_code else node.raw_code + ) + + if node.language != "sql" or not view_logic: + return None + materialized = node.materialization in {"table", "incremental", "snapshot"} - # this function is only called when raw sql is present. assert is added to satisfy lint checks - assert node.raw_code is not None view_properties = ViewPropertiesClass( materialized=materialized, viewLanguage="SQL", - viewLogic=node.raw_code, + viewLogic=view_logic, ) return view_properties diff --git a/metadata-ingestion/src/datahub/ingestion/source/file.py b/metadata-ingestion/src/datahub/ingestion/source/file.py index de61fa8481..590aa59f7b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/file.py +++ b/metadata-ingestion/src/datahub/ingestion/source/file.py @@ -16,7 +16,7 @@ from pydantic import validator from pydantic.fields import Field from datahub.configuration.common import ConfigEnum, ConfigModel, ConfigurationError -from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated +from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated from datahub.configuration.validate_field_rename import pydantic_renamed_field from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py index a8c7e48f37..96729f4c60 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py @@ -9,8 +9,8 @@ from pydantic.class_validators import root_validator import datahub.emitter.mce_builder as builder from datahub.configuration.common import AllowDenyPattern, ConfigModel -from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated from datahub.configuration.source_common import DEFAULT_ENV, DatasetSourceConfigMixin +from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated from datahub.ingestion.source.common.subtypes import BIAssetSubTypes from datahub.ingestion.source.state.stale_entity_removal_handler import ( StaleEntityRemovalSourceReport, diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py index 93850607e5..804a14b0fe 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py @@ -7,8 +7,8 @@ from pydantic.fields import Field from datahub.configuration import ConfigModel from datahub.configuration.common import AllowDenyPattern -from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated from datahub.configuration.source_common import DatasetLineageProviderConfigBase +from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated from datahub.ingestion.source.data_lake_common.path_spec import PathSpec from datahub.ingestion.source.sql.postgres import BasePostgresConfig from datahub.ingestion.source.state.stateful_ingestion_base import ( diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py index f1dd622efb..9b5296f0b9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py @@ -5,8 +5,8 @@ import pydantic from pydantic.fields import Field from datahub.configuration.common import AllowDenyPattern -from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated from datahub.configuration.source_common import DatasetSourceConfigMixin +from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated from datahub.configuration.validate_field_rename import pydantic_renamed_field from datahub.ingestion.source.aws.aws_common import AwsConnectionConfig from datahub.ingestion.source.data_lake_common.config import PathSpecsConfigMixin diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py index 1626f86b92..8873038079 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py @@ -19,9 +19,9 @@ from sqlalchemy.sql import sqltypes from sqlalchemy.types import BOOLEAN, DATE, DATETIME, INTEGER import datahub.emitter.mce_builder as builder -from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated from datahub.configuration.source_common import DatasetLineageProviderConfigBase from datahub.configuration.time_window_config import BaseTimeWindowConfig +from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated from datahub.emitter import mce_builder from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.decorators import ( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py index 8f1e04b915..677d32c8ba 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py @@ -7,8 +7,8 @@ import pydantic from pydantic import Field from datahub.configuration.common import AllowDenyPattern, ConfigModel -from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated from datahub.configuration.source_common import DatasetSourceConfigMixin +from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig from datahub.ingestion.source.state.stale_entity_removal_handler import ( StatefulStaleMetadataRemovalConfig, diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index 6214cba342..e347cd26d2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -37,11 +37,11 @@ from datahub.configuration.common import ( ConfigModel, ConfigurationError, ) -from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated from datahub.configuration.source_common import ( DatasetLineageProviderConfigBase, DatasetSourceConfigMixin, ) +from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_builder import ( ContainerKey, diff --git a/metadata-ingestion/tests/unit/test_pydantic_validators.py b/metadata-ingestion/tests/unit/test_pydantic_validators.py index 07d86043a3..3e9ec6cbaf 100644 --- a/metadata-ingestion/tests/unit/test_pydantic_validators.py +++ b/metadata-ingestion/tests/unit/test_pydantic_validators.py @@ -4,7 +4,7 @@ import pytest from pydantic import ValidationError from datahub.configuration.common import ConfigModel -from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated +from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated from datahub.configuration.validate_field_removal import pydantic_removed_field from datahub.configuration.validate_field_rename import pydantic_renamed_field from datahub.utilities.global_warning_util import get_global_warnings