feat(dbt): support prefer_sql_parser_lineage with sources enabled (#11168)

This commit is contained in:
Harshal Sheth 2024-08-13 13:54:50 -07:00 committed by GitHub
parent d36edcace9
commit 897173f270
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 3022 additions and 55 deletions

View File

@ -276,6 +276,12 @@ class DBTCommonConfig(
DBTEntitiesEnabled(),
description="Controls for enabling / disabling metadata emission for different dbt entities (models, test definitions, test results, etc.)",
)
prefer_sql_parser_lineage: bool = Field(
default=False,
description="Normally we use dbt's metadata to generate table lineage. When enabled, we prefer results from the SQL parser when generating lineage instead. "
"This can be useful when dbt models reference tables directly, instead of using the ref() macro. "
"This requires that `skip_sources_in_lineage` is enabled.",
)
skip_sources_in_lineage: bool = Field(
default=False,
description="[Experimental] When enabled, dbt sources will not be included in the lineage graph. "
@ -366,13 +372,6 @@ class DBTCommonConfig(
description="When enabled, includes the compiled code in the emitted metadata.",
)
prefer_sql_parser_lineage: bool = Field(
default=False,
description="Normally we use dbt's metadata to generate table lineage. When enabled, we prefer results from the SQL parser when generating lineage instead. "
"This can be useful when dbt models reference tables directly, instead of using the ref() macro. "
"This requires that `skip_sources_in_lineage` is enabled.",
)
@validator("target_platform")
def validate_target_platform_value(cls, target_platform: str) -> str:
if target_platform.lower() == DBT_PLATFORM:
@ -438,15 +437,27 @@ class DBTCommonConfig(
return include_column_lineage
@validator("skip_sources_in_lineage")
@validator("skip_sources_in_lineage", always=True)
def validate_skip_sources_in_lineage(
cls, skip_sources_in_lineage: bool, values: Dict
) -> bool:
entites_enabled: Optional[DBTEntitiesEnabled] = values.get("entities_enabled")
entities_enabled: Optional[DBTEntitiesEnabled] = values.get("entities_enabled")
prefer_sql_parser_lineage: Optional[bool] = values.get(
"prefer_sql_parser_lineage"
)
if prefer_sql_parser_lineage and not skip_sources_in_lineage:
raise ValueError(
"`prefer_sql_parser_lineage` requires that `skip_sources_in_lineage` is enabled."
)
if (
skip_sources_in_lineage
and entites_enabled
and entites_enabled.sources == EmitDirective.YES
and entities_enabled
and entities_enabled.sources == EmitDirective.YES
# When `prefer_sql_parser_lineage` is enabled, it's ok to have `skip_sources_in_lineage` enabled
# without also disabling sources.
and not prefer_sql_parser_lineage
):
raise ValueError(
"When `skip_sources_in_lineage` is enabled, `entities_enabled.sources` must be set to NO."
@ -454,16 +465,6 @@ class DBTCommonConfig(
return skip_sources_in_lineage
@validator("prefer_sql_parser_lineage")
def validate_prefer_sql_parser_lineage(
cls, prefer_sql_parser_lineage: bool, values: Dict
) -> bool:
if prefer_sql_parser_lineage and not values.get("skip_sources_in_lineage"):
raise ValueError(
"`prefer_sql_parser_lineage` requires that `skip_sources_in_lineage` is enabled."
)
return prefer_sql_parser_lineage
@dataclass
class DBTColumn:

View File

@ -227,7 +227,7 @@ class DbtTestConfig:
source_config_modifiers={
"prefer_sql_parser_lineage": True,
"skip_sources_in_lineage": True,
"entities_enabled": {"sources": "NO"},
# "entities_enabled": {"sources": "NO"},
},
),
],

View File

@ -247,7 +247,6 @@ def test_dbt_config_prefer_sql_parser_lineage():
"catalog_path": "dummy_path",
"target_platform": "dummy_platform",
"skip_sources_in_lineage": True,
"entities_enabled": {"sources": "NO"},
"prefer_sql_parser_lineage": True,
}
config = DBTCoreConfig.parse_obj(config_dict)