diff --git a/ingestion/src/metadata/ingestion/source/database/dbt/constants.py b/ingestion/src/metadata/ingestion/source/database/dbt/constants.py index 83c49c0724a..2fd0dd44c6a 100644 --- a/ingestion/src/metadata/ingestion/source/database/dbt/constants.py +++ b/ingestion/src/metadata/ingestion/source/database/dbt/constants.py @@ -22,6 +22,14 @@ REQUIRED_MANIFEST_KEYS = ["name", "schema", "resource_type"] # Based on https://schemas.getdbt.com/dbt/catalog/v1.json REQUIRED_CATALOG_KEYS = ["name", "type", "index"] +REQUIRED_CONSTRAINT_KEYS = [ + "type", + "name", + "expression", + "warn_unenforced", + "warn_unsupported", +] + REQUIRED_RESULTS_KEYS = { "status", "timing", diff --git a/ingestion/src/metadata/ingestion/source/database/dbt/dbt_service.py b/ingestion/src/metadata/ingestion/source/database/dbt/dbt_service.py index f1646b341a1..601226ca025 100644 --- a/ingestion/src/metadata/ingestion/source/database/dbt/dbt_service.py +++ b/ingestion/src/metadata/ingestion/source/database/dbt/dbt_service.py @@ -38,6 +38,7 @@ from metadata.ingestion.models.topology import ( ) from metadata.ingestion.source.database.database_service import DataModelLink from metadata.ingestion.source.database.dbt.constants import ( + REQUIRED_CONSTRAINT_KEYS, REQUIRED_NODE_KEYS, REQUIRED_RESULTS_KEYS, ) @@ -182,6 +183,20 @@ class DbtServiceSource(TopologyRunnerMixin, Source, ABC): ] for key in keys_to_delete: del value[key] + if value.get("columns"): + for col_name, value in value[ + "columns" + ].items(): # pylint: disable=unused-variable + if value.get("constraints"): + keys_to_delete = [ + key + for key in value + if key.lower() not in REQUIRED_CONSTRAINT_KEYS + ] + for key in keys_to_delete: + del value[key] + else: + value["constraints"] = None def remove_run_result_non_required_keys(self, run_results: List[dict]): """