feat(ingest/dbt): enable model performance and compiled code by default (#10164)

This commit is contained in:
Harshal Sheth 2024-04-02 07:29:27 -07:00 committed by GitHub
parent db33c8646a
commit c9b9afc530
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 9 additions and 19 deletions

View File

@ -26,7 +26,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
- #10055 - Assertion entities generated by dbt are now associated with the dbt dataset entity, and not the entity in the data warehouse.
- #10090 - For Redshift ingestion, `use_lineage_v2` is now enabled by default.
- #10147 - For looker ingestion, the browse paths for looker Dashboard, Chart, View, Explore have been updated to align with Looker UI. This does not affect URNs or lineage but primarily affects (improves) browsing experience.
-
- #10164 - For dbt ingestion, `entities_enabled.model_performance` and `include_compiled_code` are now both enabled by default. Upgrading dbt ingestion will also require upgrading the backend to 0.13.1.
### Potential Downtime
### Deprecations

View File

@ -179,9 +179,7 @@ class DBTEntitiesEnabled(ConfigModel):
description="Emit metadata for test results when set to Yes or Only",
)
model_performance: EmitDirective = Field(
# TODO: This is currently disabled by default, but will be enabled by default once
# the models have stabilized.
EmitDirective.NO,
EmitDirective.YES,
description="Emit model performance metadata when set to Yes or Only. "
"Only supported with dbt core.",
)
@ -349,9 +347,7 @@ class DBTCommonConfig(
_remove_use_compiled_code = pydantic_removed_field("use_compiled_code")
include_compiled_code: bool = Field(
# TODO: Once the formattedViewLogic field model change is included in a server
# release, probably 0.13.1, we can flip the default to True.
default=False,
default=True,
description="When enabled, includes the compiled code in the emitted metadata.",
)

View File

@ -53,9 +53,9 @@ class DBTCoreConfig(DBTCommonConfig):
run_results_paths: List[str] = Field(
default=[],
description="Path to output of dbt test run as run_results files in JSON format. "
"If invoking dbt multiple times, you can provide paths to multiple run result files."
"See https://docs.getdbt.com/reference/artifacts/run-results-json. "
"If not specified, test execution results will not be populated in DataHub.",
"If not specified, test execution results and model performance metadata will not be populated in DataHub."
"If invoking dbt multiple times, you can provide paths to multiple run result files. "
"See https://docs.getdbt.com/reference/artifacts/run-results-json.",
)
# Because we now also collect model performance metadata, the "test_results" field was renamed to "run_results".

View File

@ -26,8 +26,6 @@ GMS_SERVER = f"http://localhost:{GMS_PORT}"
_default_dbt_source_args = {
# Needed to avoid needing to access datahub server.
"write_semantics": "OVERRIDE",
# Needed until this is made the default.
"include_compiled_code": True,
}
@ -216,12 +214,7 @@ class DbtTestConfig:
manifest_file="sample_dbt_manifest_2.json",
sources_file="sample_dbt_sources_2.json",
run_results_files=["sample_dbt_run_results_2.json"],
source_config_modifiers={
"entities_enabled": {
# TODO: Remove this once it becomes the default.
"model_performance": "YES",
},
},
source_config_modifiers={},
),
],
ids=lambda dbt_test_config: dbt_test_config.run_id,

View File

@ -293,7 +293,7 @@ def test_dbt_entity_emission_configuration_helpers():
assert config.entities_enabled.can_emit_node_type("source")
assert config.entities_enabled.can_emit_node_type("test")
assert config.entities_enabled.can_emit_test_results
assert not config.entities_enabled.can_emit_model_performance
assert config.entities_enabled.can_emit_model_performance
assert not config.entities_enabled.is_only_test_results()
config_dict = {