diff --git a/metadata-ingestion/docs/sources/dbt/dbt.md b/metadata-ingestion/docs/sources/dbt/dbt.md index 9f366f579e..80ed11df53 100644 --- a/metadata-ingestion/docs/sources/dbt/dbt.md +++ b/metadata-ingestion/docs/sources/dbt/dbt.md @@ -55,6 +55,11 @@ column_meta_mapping: operation: "add_tag" config: tag: "sensitive" + gdpr.pii: + match: true + operation: "add_tag" + config: + tag: "pii" ``` We support the following operations: @@ -118,6 +123,29 @@ meta_mapping: tag: "case_{{ $match }}" ``` +#### Nested meta properties + +If your meta section has nested properties and looks like this: + +```yaml +meta: + data_governance: + team_owner: "Finance" +``` + +and you want attach term Finance_test in case of data_governance.team_owner is set to Finance, you can use the following meta_mapping section: + +```yaml +meta_mapping: + data_governance.team_owner: + match: "Finance" + operation: "add_term" + config: + term: "Finance_test" +``` + +Note: nested meta properties mapping is supported also for column_meta_mapping + #### Stripping out leading @ sign You can also match specific groups within the value to extract subsets of the matched value. e.g. if you have a meta section that looks like this: diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 1fe01f1554..f726ce2bb3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -1336,6 +1336,7 @@ class DBTSourceBase(StatefulIngestionSourceBase): self.config.tag_prefix, "SOURCE_CONTROL", self.config.strip_user_ids_from_email, + match_nested_props=True, ) action_processor_tag = OperationProcessor( @@ -1707,6 +1708,7 @@ class DBTSourceBase(StatefulIngestionSourceBase): self.config.tag_prefix, "SOURCE_CONTROL", self.config.strip_user_ids_from_email, + match_nested_props=True, ) canonical_schema: List[SchemaField] = [] diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json index 40ade9abc8..e602612540 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json @@ -921,6 +921,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "data_governance_nested": "{'team_owner': 'Finance'}", + "data_governance.team_owner": "Finance", "node_type": "model", "materialization": "table", "dbt_file_path": "models/transform/payments_by_customer_by_month.sql", @@ -944,6 +946,22 @@ "removed": false } }, + { + "com.linkedin.pegasus2avro.common.GlossaryTerms": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:Finance_test" + }, + { + "urn": "urn:li:glossaryTerm:Finance_test_nested" + } + ], + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:datahub" + } + } + }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { "schemaName": "model.sample_dbt.payments_by_customer_by_month", @@ -1019,6 +1037,9 @@ }, { "urn": "urn:li:glossaryTerm:pii" + }, + { + "urn": "urn:li:glossaryTerm:pii_category_organization" } ], "auditStamp": { @@ -4769,6 +4790,38 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "glossaryTerm", + "entityUrn": "urn:li:glossaryTerm:Finance_test", + "changeType": "UPSERT", + "aspectName": "glossaryTermKey", + "aspect": { + "json": { + "name": "Finance_test" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "glossaryTerm", + "entityUrn": "urn:li:glossaryTerm:Finance_test_nested", + "changeType": "UPSERT", + "aspectName": "glossaryTermKey", + "aspect": { + "json": { + "name": "Finance_test_nested" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "glossaryTerm", "entityUrn": "urn:li:glossaryTerm:customer_id", @@ -4817,6 +4870,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "glossaryTerm", + "entityUrn": "urn:li:glossaryTerm:pii_category_organization", + "changeType": "UPSERT", + "aspectName": "glossaryTermKey", + "aspect": { + "json": { + "name": "pii_category_organization" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:dbt:sensitive", diff --git a/metadata-ingestion/tests/integration/dbt/sample_dbt_manifest_1.json b/metadata-ingestion/tests/integration/dbt/sample_dbt_manifest_1.json index 94ee749d48..46d6d182d2 100644 --- a/metadata-ingestion/tests/integration/dbt/sample_dbt_manifest_1.json +++ b/metadata-ingestion/tests/integration/dbt/sample_dbt_manifest_1.json @@ -7883,7 +7883,10 @@ "meta": { "is_sensitive": true, "maturity": "beta", - "terms": "pii, customer_id" + "terms": "pii, customer_id", + "governance": { + "pii_category": "organization" + } }, "name": "customer_id", "quote": null, @@ -7908,7 +7911,12 @@ "grants": {}, "incremental_strategy": null, "materialized": "table", - "meta": {}, + "meta": { + "data_governance_nested": { + "team_owner": "Finance" + }, + "data_governance.team_owner": "Finance" + }, "on_schema_change": "ignore", "packages": [], "persist_docs": {}, diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index 6ba4c8d5bf..bb3bb0dd1d 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -170,7 +170,7 @@ class DbtTestConfig: }, ), DbtTestConfig( - "dbt-column-meta-mapping", # this also tests snapshot support + "dbt-column-meta-mapping", # this also tests snapshot support and meta nested mapping "dbt_test_column_meta_mapping.json", "dbt_test_column_meta_mapping_golden.json", catalog_file="sample_dbt_catalog_1.json", @@ -178,6 +178,43 @@ class DbtTestConfig: sources_file="sample_dbt_sources_1.json", source_config_modifiers={ "enable_meta_mapping": True, + "meta_mapping": { + "data_governance_nested.team_owner": { + "match": "Finance", + "operation": "add_term", + "config": {"term": "Finance_test_nested"}, + }, + "owner": { + "match": "^@(.*)", + "operation": "add_owner", + "config": {"owner_type": "user"}, + }, + "business_owner": { + "match": ".*", + "operation": "add_owner", + "config": {"owner_type": "user"}, + }, + "has_pii": { + "match": True, + "operation": "add_tag", + "config": {"tag": "has_pii_test"}, + }, + "int_property": { + "match": 1, + "operation": "add_tag", + "config": {"tag": "int_meta_property"}, + }, + "double_property": { + "match": 2.5, + "operation": "add_term", + "config": {"term": "double_meta_property"}, + }, + "data_governance.team_owner": { + "match": "Finance", + "operation": "add_term", + "config": {"term": "Finance_test"}, + }, + }, "column_meta_mapping": { "terms": { "match": ".*", @@ -194,6 +231,11 @@ class DbtTestConfig: "operation": "add_term", "config": {"term": "maturity_{{ $match }}"}, }, + "governance.pii_category": { + "match": ".*", + "operation": "add_term", + "config": {"term": "pii_category_{{ $match }}"}, + }, }, "entities_enabled": { "test_definitions": "NO",