feat(ingest/dbt): enable model performance and compiled code by default (#10164)

2025-12-12 10:35:51 +00:00 · 2024-04-02 07:29:27 -07:00 · 2024-04-02 07:29:27 -07:00 · c9b9afc530
commit c9b9afc530
parent db33c8646a
5 changed files with 9 additions and 19 deletions
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@ -26,7 +26,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
 - #10055 - Assertion entities generated by dbt are now associated with the dbt dataset entity, and not the entity in the data warehouse.
 - #10090 - For Redshift ingestion, `use_lineage_v2` is now enabled by default.
 - #10147 - For looker ingestion, the browse paths for looker Dashboard, Chart, View, Explore have been updated to align with Looker UI. This does not affect URNs or lineage but primarily affects (improves) browsing experience.
- 
+- #10164 - For dbt ingestion, `entities_enabled.model_performance` and `include_compiled_code` are now both enabled by default. Upgrading dbt ingestion will also require upgrading the backend to 0.13.1.
+
 ### Potential Downtime

 ### Deprecations
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
@ -179,9 +179,7 @@ class DBTEntitiesEnabled(ConfigModel):
        description="Emit metadata for test results when set to Yes or Only",
    )
    model_performance: EmitDirective = Field(
-        # TODO: This is currently disabled by default, but will be enabled by default once
-        # the models have stabilized.
-        EmitDirective.NO,
+        EmitDirective.YES,
        description="Emit model performance metadata when set to Yes or Only. "
        "Only supported with dbt core.",
    )
@ -349,9 +347,7 @@ class DBTCommonConfig(
    _remove_use_compiled_code = pydantic_removed_field("use_compiled_code")

    include_compiled_code: bool = Field(
-        # TODO: Once the formattedViewLogic field model change is included in a server
-        # release, probably 0.13.1, we can flip the default to True.
-        default=False,
+        default=True,
        description="When enabled, includes the compiled code in the emitted metadata.",
    )

--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py
@ -53,9 +53,9 @@ class DBTCoreConfig(DBTCommonConfig):
    run_results_paths: List[str] = Field(
        default=[],
        description="Path to output of dbt test run as run_results files in JSON format. "
-        "If invoking dbt multiple times, you can provide paths to multiple run result files."
-        "See https://docs.getdbt.com/reference/artifacts/run-results-json. "
-        "If not specified, test execution results will not be populated in DataHub.",
+        "If not specified, test execution results and model performance metadata will not be populated in DataHub."
+        "If invoking dbt multiple times, you can provide paths to multiple run result files. "
+        "See https://docs.getdbt.com/reference/artifacts/run-results-json.",
    )

    # Because we now also collect model performance metadata, the "test_results" field was renamed to "run_results".
--- a/metadata-ingestion/tests/integration/dbt/test_dbt.py
+++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py
@ -26,8 +26,6 @@ GMS_SERVER = f"http://localhost:{GMS_PORT}"
 _default_dbt_source_args = {
    # Needed to avoid needing to access datahub server.
    "write_semantics": "OVERRIDE",
-    # Needed until this is made the default.
-    "include_compiled_code": True,
 }


@ -216,12 +214,7 @@ class DbtTestConfig:
            manifest_file="sample_dbt_manifest_2.json",
            sources_file="sample_dbt_sources_2.json",
            run_results_files=["sample_dbt_run_results_2.json"],
-            source_config_modifiers={
-                "entities_enabled": {
-                    # TODO: Remove this once it becomes the default.
-                    "model_performance": "YES",
-                },
-            },
+            source_config_modifiers={},
        ),
    ],
    ids=lambda dbt_test_config: dbt_test_config.run_id,
--- a/metadata-ingestion/tests/unit/test_dbt_source.py
+++ b/metadata-ingestion/tests/unit/test_dbt_source.py
@ -293,7 +293,7 @@ def test_dbt_entity_emission_configuration_helpers():
    assert config.entities_enabled.can_emit_node_type("source")
    assert config.entities_enabled.can_emit_node_type("test")
    assert config.entities_enabled.can_emit_test_results
-    assert not config.entities_enabled.can_emit_model_performance
+    assert config.entities_enabled.can_emit_model_performance
    assert not config.entities_enabled.is_only_test_results()

    config_dict = {