mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-11-04 04:39:10 +00:00 
			
		
		
		
	feat(dbt/ingestion): add support for nested meta properties mapping (#13396)
Co-authored-by: Petr Knez <pknez@gmail.com>
This commit is contained in:
		
							parent
							
								
									127ecca3bc
								
							
						
					
					
						commit
						311387410b
					
				@ -55,6 +55,11 @@ column_meta_mapping:
 | 
			
		||||
    operation: "add_tag"
 | 
			
		||||
    config:
 | 
			
		||||
      tag: "sensitive"
 | 
			
		||||
  gdpr.pii:
 | 
			
		||||
    match: true
 | 
			
		||||
    operation: "add_tag"
 | 
			
		||||
    config:
 | 
			
		||||
      tag: "pii"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
We support the following operations:
 | 
			
		||||
@ -118,6 +123,29 @@ meta_mapping:
 | 
			
		||||
       tag: "case_{{ $match }}"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
#### Nested meta properties
 | 
			
		||||
 | 
			
		||||
If your meta section has nested properties and looks like this:
 | 
			
		||||
 | 
			
		||||
```yaml
 | 
			
		||||
meta:
 | 
			
		||||
  data_governance:
 | 
			
		||||
    team_owner: "Finance"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
and you want attach term Finance_test in case of data_governance.team_owner is set to Finance, you can use the following meta_mapping section:
 | 
			
		||||
 | 
			
		||||
```yaml
 | 
			
		||||
meta_mapping:
 | 
			
		||||
  data_governance.team_owner:
 | 
			
		||||
    match: "Finance"
 | 
			
		||||
    operation: "add_term"
 | 
			
		||||
    config:
 | 
			
		||||
      term: "Finance_test"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Note: nested meta properties mapping is supported also for column_meta_mapping
 | 
			
		||||
 | 
			
		||||
#### Stripping out leading @ sign
 | 
			
		||||
 | 
			
		||||
You can also match specific groups within the value to extract subsets of the matched value. e.g. if you have a meta section that looks like this:
 | 
			
		||||
 | 
			
		||||
@ -1336,6 +1336,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
 | 
			
		||||
            self.config.tag_prefix,
 | 
			
		||||
            "SOURCE_CONTROL",
 | 
			
		||||
            self.config.strip_user_ids_from_email,
 | 
			
		||||
            match_nested_props=True,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        action_processor_tag = OperationProcessor(
 | 
			
		||||
@ -1707,6 +1708,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
 | 
			
		||||
            self.config.tag_prefix,
 | 
			
		||||
            "SOURCE_CONTROL",
 | 
			
		||||
            self.config.strip_user_ids_from_email,
 | 
			
		||||
            match_nested_props=True,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        canonical_schema: List[SchemaField] = []
 | 
			
		||||
 | 
			
		||||
@ -921,6 +921,8 @@
 | 
			
		||||
                {
 | 
			
		||||
                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
 | 
			
		||||
                        "customProperties": {
 | 
			
		||||
                            "data_governance_nested": "{'team_owner': 'Finance'}",
 | 
			
		||||
                            "data_governance.team_owner": "Finance",
 | 
			
		||||
                            "node_type": "model",
 | 
			
		||||
                            "materialization": "table",
 | 
			
		||||
                            "dbt_file_path": "models/transform/payments_by_customer_by_month.sql",
 | 
			
		||||
@ -944,6 +946,22 @@
 | 
			
		||||
                        "removed": false
 | 
			
		||||
                    }
 | 
			
		||||
                },
 | 
			
		||||
                {
 | 
			
		||||
                    "com.linkedin.pegasus2avro.common.GlossaryTerms": {
 | 
			
		||||
                        "terms": [
 | 
			
		||||
                            {
 | 
			
		||||
                                "urn": "urn:li:glossaryTerm:Finance_test"
 | 
			
		||||
                            },
 | 
			
		||||
                            {
 | 
			
		||||
                                "urn": "urn:li:glossaryTerm:Finance_test_nested"
 | 
			
		||||
                            }
 | 
			
		||||
                        ],
 | 
			
		||||
                        "auditStamp": {
 | 
			
		||||
                            "time": 1643871600000,
 | 
			
		||||
                            "actor": "urn:li:corpuser:datahub"
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                },
 | 
			
		||||
                {
 | 
			
		||||
                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
 | 
			
		||||
                        "schemaName": "model.sample_dbt.payments_by_customer_by_month",
 | 
			
		||||
@ -1019,6 +1037,9 @@
 | 
			
		||||
                                        },
 | 
			
		||||
                                        {
 | 
			
		||||
                                            "urn": "urn:li:glossaryTerm:pii"
 | 
			
		||||
                                        },
 | 
			
		||||
                                        {
 | 
			
		||||
                                            "urn": "urn:li:glossaryTerm:pii_category_organization"
 | 
			
		||||
                                        }
 | 
			
		||||
                                    ],
 | 
			
		||||
                                    "auditStamp": {
 | 
			
		||||
@ -4769,6 +4790,38 @@
 | 
			
		||||
        "lastRunId": "no-run-id-provided"
 | 
			
		||||
    }
 | 
			
		||||
},
 | 
			
		||||
{
 | 
			
		||||
    "entityType": "glossaryTerm",
 | 
			
		||||
    "entityUrn": "urn:li:glossaryTerm:Finance_test",
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "glossaryTermKey",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
        "json": {
 | 
			
		||||
            "name": "Finance_test"
 | 
			
		||||
        }
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
        "lastObserved": 1643871600000,
 | 
			
		||||
        "runId": "dbt-column-meta-mapping",
 | 
			
		||||
        "lastRunId": "no-run-id-provided"
 | 
			
		||||
    }
 | 
			
		||||
},
 | 
			
		||||
{
 | 
			
		||||
    "entityType": "glossaryTerm",
 | 
			
		||||
    "entityUrn": "urn:li:glossaryTerm:Finance_test_nested",
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "glossaryTermKey",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
        "json": {
 | 
			
		||||
            "name": "Finance_test_nested"
 | 
			
		||||
        }
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
        "lastObserved": 1643871600000,
 | 
			
		||||
        "runId": "dbt-column-meta-mapping",
 | 
			
		||||
        "lastRunId": "no-run-id-provided"
 | 
			
		||||
    }
 | 
			
		||||
},
 | 
			
		||||
{
 | 
			
		||||
    "entityType": "glossaryTerm",
 | 
			
		||||
    "entityUrn": "urn:li:glossaryTerm:customer_id",
 | 
			
		||||
@ -4817,6 +4870,22 @@
 | 
			
		||||
        "lastRunId": "no-run-id-provided"
 | 
			
		||||
    }
 | 
			
		||||
},
 | 
			
		||||
{
 | 
			
		||||
    "entityType": "glossaryTerm",
 | 
			
		||||
    "entityUrn": "urn:li:glossaryTerm:pii_category_organization",
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "glossaryTermKey",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
        "json": {
 | 
			
		||||
            "name": "pii_category_organization"
 | 
			
		||||
        }
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
        "lastObserved": 1643871600000,
 | 
			
		||||
        "runId": "dbt-column-meta-mapping",
 | 
			
		||||
        "lastRunId": "no-run-id-provided"
 | 
			
		||||
    }
 | 
			
		||||
},
 | 
			
		||||
{
 | 
			
		||||
    "entityType": "tag",
 | 
			
		||||
    "entityUrn": "urn:li:tag:dbt:sensitive",
 | 
			
		||||
 | 
			
		||||
@ -7883,7 +7883,10 @@
 | 
			
		||||
          "meta": {
 | 
			
		||||
            "is_sensitive": true,
 | 
			
		||||
            "maturity": "beta",
 | 
			
		||||
            "terms": "pii, customer_id"
 | 
			
		||||
            "terms": "pii, customer_id",
 | 
			
		||||
            "governance": {
 | 
			
		||||
              "pii_category": "organization"
 | 
			
		||||
            }
 | 
			
		||||
          },
 | 
			
		||||
          "name": "customer_id",
 | 
			
		||||
          "quote": null,
 | 
			
		||||
@ -7908,7 +7911,12 @@
 | 
			
		||||
        "grants": {},
 | 
			
		||||
        "incremental_strategy": null,
 | 
			
		||||
        "materialized": "table",
 | 
			
		||||
        "meta": {},
 | 
			
		||||
        "meta": {
 | 
			
		||||
          "data_governance_nested": {
 | 
			
		||||
            "team_owner": "Finance"
 | 
			
		||||
          },
 | 
			
		||||
          "data_governance.team_owner": "Finance"
 | 
			
		||||
        },
 | 
			
		||||
        "on_schema_change": "ignore",
 | 
			
		||||
        "packages": [],
 | 
			
		||||
        "persist_docs": {},
 | 
			
		||||
 | 
			
		||||
@ -170,7 +170,7 @@ class DbtTestConfig:
 | 
			
		||||
            },
 | 
			
		||||
        ),
 | 
			
		||||
        DbtTestConfig(
 | 
			
		||||
            "dbt-column-meta-mapping",  # this also tests snapshot support
 | 
			
		||||
            "dbt-column-meta-mapping",  # this also tests snapshot support and meta nested mapping
 | 
			
		||||
            "dbt_test_column_meta_mapping.json",
 | 
			
		||||
            "dbt_test_column_meta_mapping_golden.json",
 | 
			
		||||
            catalog_file="sample_dbt_catalog_1.json",
 | 
			
		||||
@ -178,6 +178,43 @@ class DbtTestConfig:
 | 
			
		||||
            sources_file="sample_dbt_sources_1.json",
 | 
			
		||||
            source_config_modifiers={
 | 
			
		||||
                "enable_meta_mapping": True,
 | 
			
		||||
                "meta_mapping": {
 | 
			
		||||
                    "data_governance_nested.team_owner": {
 | 
			
		||||
                        "match": "Finance",
 | 
			
		||||
                        "operation": "add_term",
 | 
			
		||||
                        "config": {"term": "Finance_test_nested"},
 | 
			
		||||
                    },
 | 
			
		||||
                    "owner": {
 | 
			
		||||
                        "match": "^@(.*)",
 | 
			
		||||
                        "operation": "add_owner",
 | 
			
		||||
                        "config": {"owner_type": "user"},
 | 
			
		||||
                    },
 | 
			
		||||
                    "business_owner": {
 | 
			
		||||
                        "match": ".*",
 | 
			
		||||
                        "operation": "add_owner",
 | 
			
		||||
                        "config": {"owner_type": "user"},
 | 
			
		||||
                    },
 | 
			
		||||
                    "has_pii": {
 | 
			
		||||
                        "match": True,
 | 
			
		||||
                        "operation": "add_tag",
 | 
			
		||||
                        "config": {"tag": "has_pii_test"},
 | 
			
		||||
                    },
 | 
			
		||||
                    "int_property": {
 | 
			
		||||
                        "match": 1,
 | 
			
		||||
                        "operation": "add_tag",
 | 
			
		||||
                        "config": {"tag": "int_meta_property"},
 | 
			
		||||
                    },
 | 
			
		||||
                    "double_property": {
 | 
			
		||||
                        "match": 2.5,
 | 
			
		||||
                        "operation": "add_term",
 | 
			
		||||
                        "config": {"term": "double_meta_property"},
 | 
			
		||||
                    },
 | 
			
		||||
                    "data_governance.team_owner": {
 | 
			
		||||
                        "match": "Finance",
 | 
			
		||||
                        "operation": "add_term",
 | 
			
		||||
                        "config": {"term": "Finance_test"},
 | 
			
		||||
                    },
 | 
			
		||||
                },
 | 
			
		||||
                "column_meta_mapping": {
 | 
			
		||||
                    "terms": {
 | 
			
		||||
                        "match": ".*",
 | 
			
		||||
@ -194,6 +231,11 @@ class DbtTestConfig:
 | 
			
		||||
                        "operation": "add_term",
 | 
			
		||||
                        "config": {"term": "maturity_{{ $match }}"},
 | 
			
		||||
                    },
 | 
			
		||||
                    "governance.pii_category": {
 | 
			
		||||
                        "match": ".*",
 | 
			
		||||
                        "operation": "add_term",
 | 
			
		||||
                        "config": {"term": "pii_category_{{ $match }}"},
 | 
			
		||||
                    },
 | 
			
		||||
                },
 | 
			
		||||
                "entities_enabled": {
 | 
			
		||||
                    "test_definitions": "NO",
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user