feat(ingest/dbt): support use_compiled_code and test_warnings_are_errors (#8956)

This commit is contained in:
Harshal Sheth 2023-10-05 13:29:47 -04:00 committed by GitHub
parent c9309ff157
commit 3cede10ab3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 51 additions and 22 deletions

View File

@ -4,7 +4,7 @@ from pydantic import validator
from pydantic.fields import Field
from datahub.configuration.common import ConfigModel, ConfigurationError
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.metadata.schema_classes import FabricTypeClass
DEFAULT_ENV = FabricTypeClass.PROD

View File

@ -1,20 +1,28 @@
import warnings
from typing import Optional, Type
from typing import Any, Optional, Type
import pydantic
from datahub.configuration.common import ConfigurationWarning
from datahub.utilities.global_warning_util import add_global_warning
_unset = object()
def pydantic_field_deprecated(field: str, message: Optional[str] = None) -> classmethod:
def pydantic_field_deprecated(
field: str,
warn_if_value_is_not: Any = _unset,
message: Optional[str] = None,
) -> classmethod:
if message:
output = message
else:
output = f"{field} is deprecated and will be removed in a future release. Please remove it from your config."
def _validate_deprecated(cls: Type, values: dict) -> dict:
if field in values:
if field in values and (
warn_if_value_is_not is _unset or values[field] != warn_if_value_is_not
):
add_global_warning(output)
warnings.warn(output, ConfigurationWarning, stacklevel=2)
return values

View File

@ -18,8 +18,8 @@ from datahub.configuration.common import (
ConfigurationError,
LineageConfig,
)
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DatasetSourceConfigMixin
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.emitter import mce_builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.common import PipelineContext
@ -214,7 +214,9 @@ class DBTCommonConfig(
default=False,
description="Use model identifier instead of model name if defined (if not, default to model name).",
)
_deprecate_use_identifiers = pydantic_field_deprecated("use_identifiers")
_deprecate_use_identifiers = pydantic_field_deprecated(
"use_identifiers", warn_if_value_is_not=False
)
entities_enabled: DBTEntitiesEnabled = Field(
DBTEntitiesEnabled(),
@ -278,6 +280,14 @@ class DBTCommonConfig(
description="When enabled, converts column URNs to lowercase to ensure cross-platform compatibility. "
"If `target_platform` is Snowflake, the default is True.",
)
use_compiled_code: bool = Field(
default=False,
description="When enabled, uses the compiled dbt code instead of the raw dbt node definition.",
)
test_warnings_are_errors: bool = Field(
default=False,
description="When enabled, dbt test warnings will be treated as failures.",
)
@validator("target_platform")
def validate_target_platform_value(cls, target_platform: str) -> str:
@ -811,7 +821,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
mce_builder.make_schema_field_urn(upstream_urn, column_name)
],
nativeType=node.name,
logic=node.compiled_code if node.compiled_code else node.raw_code,
logic=node.compiled_code or node.raw_code,
aggregation=AssertionStdAggregationClass._NATIVE_,
nativeParameters=string_map(kw_args),
),
@ -825,7 +835,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
dataset=upstream_urn,
scope=DatasetAssertionScopeClass.DATASET_ROWS,
operator=AssertionStdOperatorClass._NATIVE_,
logic=node.compiled_code if node.compiled_code else node.raw_code,
logic=node.compiled_code or node.raw_code,
nativeType=node.name,
aggregation=AssertionStdAggregationClass._NATIVE_,
nativeParameters=string_map(kw_args),
@ -856,6 +866,10 @@ class DBTSourceBase(StatefulIngestionSourceBase):
result=AssertionResultClass(
type=AssertionResultTypeClass.SUCCESS
if test_result.status == "pass"
or (
not self.config.test_warnings_are_errors
and test_result.status == "warn"
)
else AssertionResultTypeClass.FAILURE,
nativeResults=test_result.native_results,
),
@ -1007,8 +1021,8 @@ class DBTSourceBase(StatefulIngestionSourceBase):
aspects.append(upstream_lineage_class)
# add view properties aspect
if node.raw_code and node.language == "sql":
view_prop_aspect = self._create_view_properties_aspect(node)
view_prop_aspect = self._create_view_properties_aspect(node)
if view_prop_aspect:
aspects.append(view_prop_aspect)
# emit subtype mcp
@ -1133,14 +1147,21 @@ class DBTSourceBase(StatefulIngestionSourceBase):
def get_external_url(self, node: DBTNode) -> Optional[str]:
pass
def _create_view_properties_aspect(self, node: DBTNode) -> ViewPropertiesClass:
def _create_view_properties_aspect(
self, node: DBTNode
) -> Optional[ViewPropertiesClass]:
view_logic = (
node.compiled_code if self.config.use_compiled_code else node.raw_code
)
if node.language != "sql" or not view_logic:
return None
materialized = node.materialization in {"table", "incremental", "snapshot"}
# this function is only called when raw sql is present. assert is added to satisfy lint checks
assert node.raw_code is not None
view_properties = ViewPropertiesClass(
materialized=materialized,
viewLanguage="SQL",
viewLogic=node.raw_code,
viewLogic=view_logic,
)
return view_properties

View File

@ -16,7 +16,7 @@ from pydantic import validator
from pydantic.fields import Field
from datahub.configuration.common import ConfigEnum, ConfigModel, ConfigurationError
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_rename import pydantic_renamed_field
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.common import PipelineContext

View File

@ -9,8 +9,8 @@ from pydantic.class_validators import root_validator
import datahub.emitter.mce_builder as builder
from datahub.configuration.common import AllowDenyPattern, ConfigModel
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DEFAULT_ENV, DatasetSourceConfigMixin
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.ingestion.source.common.subtypes import BIAssetSubTypes
from datahub.ingestion.source.state.stale_entity_removal_handler import (
StaleEntityRemovalSourceReport,

View File

@ -7,8 +7,8 @@ from pydantic.fields import Field
from datahub.configuration import ConfigModel
from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DatasetLineageProviderConfigBase
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
from datahub.ingestion.source.sql.postgres import BasePostgresConfig
from datahub.ingestion.source.state.stateful_ingestion_base import (

View File

@ -5,8 +5,8 @@ import pydantic
from pydantic.fields import Field
from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DatasetSourceConfigMixin
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_rename import pydantic_renamed_field
from datahub.ingestion.source.aws.aws_common import AwsConnectionConfig
from datahub.ingestion.source.data_lake_common.config import PathSpecsConfigMixin

View File

@ -19,9 +19,9 @@ from sqlalchemy.sql import sqltypes
from sqlalchemy.types import BOOLEAN, DATE, DATETIME, INTEGER
import datahub.emitter.mce_builder as builder
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DatasetLineageProviderConfigBase
from datahub.configuration.time_window_config import BaseTimeWindowConfig
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.emitter import mce_builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.decorators import (

View File

@ -7,8 +7,8 @@ import pydantic
from pydantic import Field
from datahub.configuration.common import AllowDenyPattern, ConfigModel
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DatasetSourceConfigMixin
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig
from datahub.ingestion.source.state.stale_entity_removal_handler import (
StatefulStaleMetadataRemovalConfig,

View File

@ -37,11 +37,11 @@ from datahub.configuration.common import (
ConfigModel,
ConfigurationError,
)
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import (
DatasetLineageProviderConfigBase,
DatasetSourceConfigMixin,
)
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.mcp_builder import (
ContainerKey,

View File

@ -4,7 +4,7 @@ import pytest
from pydantic import ValidationError
from datahub.configuration.common import ConfigModel
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_removal import pydantic_removed_field
from datahub.configuration.validate_field_rename import pydantic_renamed_field
from datahub.utilities.global_warning_util import get_global_warnings