feat(ingest/dbt): support use_compiled_code and test_warnings_are_errors (#8956)

This commit is contained in:
Harshal Sheth 2023-10-05 13:29:47 -04:00 committed by GitHub
parent c9309ff157
commit 3cede10ab3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 51 additions and 22 deletions

View File

@ -4,7 +4,7 @@ from pydantic import validator
from pydantic.fields import Field from pydantic.fields import Field
from datahub.configuration.common import ConfigModel, ConfigurationError from datahub.configuration.common import ConfigModel, ConfigurationError
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.metadata.schema_classes import FabricTypeClass from datahub.metadata.schema_classes import FabricTypeClass
DEFAULT_ENV = FabricTypeClass.PROD DEFAULT_ENV = FabricTypeClass.PROD

View File

@ -1,20 +1,28 @@
import warnings import warnings
from typing import Optional, Type from typing import Any, Optional, Type
import pydantic import pydantic
from datahub.configuration.common import ConfigurationWarning from datahub.configuration.common import ConfigurationWarning
from datahub.utilities.global_warning_util import add_global_warning from datahub.utilities.global_warning_util import add_global_warning
_unset = object()
def pydantic_field_deprecated(field: str, message: Optional[str] = None) -> classmethod:
def pydantic_field_deprecated(
field: str,
warn_if_value_is_not: Any = _unset,
message: Optional[str] = None,
) -> classmethod:
if message: if message:
output = message output = message
else: else:
output = f"{field} is deprecated and will be removed in a future release. Please remove it from your config." output = f"{field} is deprecated and will be removed in a future release. Please remove it from your config."
def _validate_deprecated(cls: Type, values: dict) -> dict: def _validate_deprecated(cls: Type, values: dict) -> dict:
if field in values: if field in values and (
warn_if_value_is_not is _unset or values[field] != warn_if_value_is_not
):
add_global_warning(output) add_global_warning(output)
warnings.warn(output, ConfigurationWarning, stacklevel=2) warnings.warn(output, ConfigurationWarning, stacklevel=2)
return values return values

View File

@ -18,8 +18,8 @@ from datahub.configuration.common import (
ConfigurationError, ConfigurationError,
LineageConfig, LineageConfig,
) )
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DatasetSourceConfigMixin from datahub.configuration.source_common import DatasetSourceConfigMixin
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.emitter import mce_builder from datahub.emitter import mce_builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.common import PipelineContext
@ -214,7 +214,9 @@ class DBTCommonConfig(
default=False, default=False,
description="Use model identifier instead of model name if defined (if not, default to model name).", description="Use model identifier instead of model name if defined (if not, default to model name).",
) )
_deprecate_use_identifiers = pydantic_field_deprecated("use_identifiers") _deprecate_use_identifiers = pydantic_field_deprecated(
"use_identifiers", warn_if_value_is_not=False
)
entities_enabled: DBTEntitiesEnabled = Field( entities_enabled: DBTEntitiesEnabled = Field(
DBTEntitiesEnabled(), DBTEntitiesEnabled(),
@ -278,6 +280,14 @@ class DBTCommonConfig(
description="When enabled, converts column URNs to lowercase to ensure cross-platform compatibility. " description="When enabled, converts column URNs to lowercase to ensure cross-platform compatibility. "
"If `target_platform` is Snowflake, the default is True.", "If `target_platform` is Snowflake, the default is True.",
) )
use_compiled_code: bool = Field(
default=False,
description="When enabled, uses the compiled dbt code instead of the raw dbt node definition.",
)
test_warnings_are_errors: bool = Field(
default=False,
description="When enabled, dbt test warnings will be treated as failures.",
)
@validator("target_platform") @validator("target_platform")
def validate_target_platform_value(cls, target_platform: str) -> str: def validate_target_platform_value(cls, target_platform: str) -> str:
@ -811,7 +821,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
mce_builder.make_schema_field_urn(upstream_urn, column_name) mce_builder.make_schema_field_urn(upstream_urn, column_name)
], ],
nativeType=node.name, nativeType=node.name,
logic=node.compiled_code if node.compiled_code else node.raw_code, logic=node.compiled_code or node.raw_code,
aggregation=AssertionStdAggregationClass._NATIVE_, aggregation=AssertionStdAggregationClass._NATIVE_,
nativeParameters=string_map(kw_args), nativeParameters=string_map(kw_args),
), ),
@ -825,7 +835,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
dataset=upstream_urn, dataset=upstream_urn,
scope=DatasetAssertionScopeClass.DATASET_ROWS, scope=DatasetAssertionScopeClass.DATASET_ROWS,
operator=AssertionStdOperatorClass._NATIVE_, operator=AssertionStdOperatorClass._NATIVE_,
logic=node.compiled_code if node.compiled_code else node.raw_code, logic=node.compiled_code or node.raw_code,
nativeType=node.name, nativeType=node.name,
aggregation=AssertionStdAggregationClass._NATIVE_, aggregation=AssertionStdAggregationClass._NATIVE_,
nativeParameters=string_map(kw_args), nativeParameters=string_map(kw_args),
@ -856,6 +866,10 @@ class DBTSourceBase(StatefulIngestionSourceBase):
result=AssertionResultClass( result=AssertionResultClass(
type=AssertionResultTypeClass.SUCCESS type=AssertionResultTypeClass.SUCCESS
if test_result.status == "pass" if test_result.status == "pass"
or (
not self.config.test_warnings_are_errors
and test_result.status == "warn"
)
else AssertionResultTypeClass.FAILURE, else AssertionResultTypeClass.FAILURE,
nativeResults=test_result.native_results, nativeResults=test_result.native_results,
), ),
@ -1007,8 +1021,8 @@ class DBTSourceBase(StatefulIngestionSourceBase):
aspects.append(upstream_lineage_class) aspects.append(upstream_lineage_class)
# add view properties aspect # add view properties aspect
if node.raw_code and node.language == "sql": view_prop_aspect = self._create_view_properties_aspect(node)
view_prop_aspect = self._create_view_properties_aspect(node) if view_prop_aspect:
aspects.append(view_prop_aspect) aspects.append(view_prop_aspect)
# emit subtype mcp # emit subtype mcp
@ -1133,14 +1147,21 @@ class DBTSourceBase(StatefulIngestionSourceBase):
def get_external_url(self, node: DBTNode) -> Optional[str]: def get_external_url(self, node: DBTNode) -> Optional[str]:
pass pass
def _create_view_properties_aspect(self, node: DBTNode) -> ViewPropertiesClass: def _create_view_properties_aspect(
self, node: DBTNode
) -> Optional[ViewPropertiesClass]:
view_logic = (
node.compiled_code if self.config.use_compiled_code else node.raw_code
)
if node.language != "sql" or not view_logic:
return None
materialized = node.materialization in {"table", "incremental", "snapshot"} materialized = node.materialization in {"table", "incremental", "snapshot"}
# this function is only called when raw sql is present. assert is added to satisfy lint checks
assert node.raw_code is not None
view_properties = ViewPropertiesClass( view_properties = ViewPropertiesClass(
materialized=materialized, materialized=materialized,
viewLanguage="SQL", viewLanguage="SQL",
viewLogic=node.raw_code, viewLogic=view_logic,
) )
return view_properties return view_properties

View File

@ -16,7 +16,7 @@ from pydantic import validator
from pydantic.fields import Field from pydantic.fields import Field
from datahub.configuration.common import ConfigEnum, ConfigModel, ConfigurationError from datahub.configuration.common import ConfigEnum, ConfigModel, ConfigurationError
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_rename import pydantic_renamed_field from datahub.configuration.validate_field_rename import pydantic_renamed_field
from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.common import PipelineContext

View File

@ -9,8 +9,8 @@ from pydantic.class_validators import root_validator
import datahub.emitter.mce_builder as builder import datahub.emitter.mce_builder as builder
from datahub.configuration.common import AllowDenyPattern, ConfigModel from datahub.configuration.common import AllowDenyPattern, ConfigModel
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DEFAULT_ENV, DatasetSourceConfigMixin from datahub.configuration.source_common import DEFAULT_ENV, DatasetSourceConfigMixin
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.ingestion.source.common.subtypes import BIAssetSubTypes from datahub.ingestion.source.common.subtypes import BIAssetSubTypes
from datahub.ingestion.source.state.stale_entity_removal_handler import ( from datahub.ingestion.source.state.stale_entity_removal_handler import (
StaleEntityRemovalSourceReport, StaleEntityRemovalSourceReport,

View File

@ -7,8 +7,8 @@ from pydantic.fields import Field
from datahub.configuration import ConfigModel from datahub.configuration import ConfigModel
from datahub.configuration.common import AllowDenyPattern from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DatasetLineageProviderConfigBase from datahub.configuration.source_common import DatasetLineageProviderConfigBase
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.ingestion.source.data_lake_common.path_spec import PathSpec from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
from datahub.ingestion.source.sql.postgres import BasePostgresConfig from datahub.ingestion.source.sql.postgres import BasePostgresConfig
from datahub.ingestion.source.state.stateful_ingestion_base import ( from datahub.ingestion.source.state.stateful_ingestion_base import (

View File

@ -5,8 +5,8 @@ import pydantic
from pydantic.fields import Field from pydantic.fields import Field
from datahub.configuration.common import AllowDenyPattern from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DatasetSourceConfigMixin from datahub.configuration.source_common import DatasetSourceConfigMixin
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_rename import pydantic_renamed_field from datahub.configuration.validate_field_rename import pydantic_renamed_field
from datahub.ingestion.source.aws.aws_common import AwsConnectionConfig from datahub.ingestion.source.aws.aws_common import AwsConnectionConfig
from datahub.ingestion.source.data_lake_common.config import PathSpecsConfigMixin from datahub.ingestion.source.data_lake_common.config import PathSpecsConfigMixin

View File

@ -19,9 +19,9 @@ from sqlalchemy.sql import sqltypes
from sqlalchemy.types import BOOLEAN, DATE, DATETIME, INTEGER from sqlalchemy.types import BOOLEAN, DATE, DATETIME, INTEGER
import datahub.emitter.mce_builder as builder import datahub.emitter.mce_builder as builder
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DatasetLineageProviderConfigBase from datahub.configuration.source_common import DatasetLineageProviderConfigBase
from datahub.configuration.time_window_config import BaseTimeWindowConfig from datahub.configuration.time_window_config import BaseTimeWindowConfig
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.emitter import mce_builder from datahub.emitter import mce_builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.decorators import ( from datahub.ingestion.api.decorators import (

View File

@ -7,8 +7,8 @@ import pydantic
from pydantic import Field from pydantic import Field
from datahub.configuration.common import AllowDenyPattern, ConfigModel from datahub.configuration.common import AllowDenyPattern, ConfigModel
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DatasetSourceConfigMixin from datahub.configuration.source_common import DatasetSourceConfigMixin
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig
from datahub.ingestion.source.state.stale_entity_removal_handler import ( from datahub.ingestion.source.state.stale_entity_removal_handler import (
StatefulStaleMetadataRemovalConfig, StatefulStaleMetadataRemovalConfig,

View File

@ -37,11 +37,11 @@ from datahub.configuration.common import (
ConfigModel, ConfigModel,
ConfigurationError, ConfigurationError,
) )
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import ( from datahub.configuration.source_common import (
DatasetLineageProviderConfigBase, DatasetLineageProviderConfigBase,
DatasetSourceConfigMixin, DatasetSourceConfigMixin,
) )
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.mcp_builder import ( from datahub.emitter.mcp_builder import (
ContainerKey, ContainerKey,

View File

@ -4,7 +4,7 @@ import pytest
from pydantic import ValidationError from pydantic import ValidationError
from datahub.configuration.common import ConfigModel from datahub.configuration.common import ConfigModel
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_removal import pydantic_removed_field from datahub.configuration.validate_field_removal import pydantic_removed_field
from datahub.configuration.validate_field_rename import pydantic_renamed_field from datahub.configuration.validate_field_rename import pydantic_renamed_field
from datahub.utilities.global_warning_util import get_global_warnings from datahub.utilities.global_warning_util import get_global_warnings