feat(dbt/ingestion): add support for nested meta properties mapping (#13396)

Co-authored-by: Petr Knez <pknez@gmail.com>
This commit is contained in:
Petr Knez 2025-07-17 08:12:51 +02:00 committed by GitHub
parent 127ecca3bc
commit 311387410b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 152 additions and 3 deletions

View File

@ -55,6 +55,11 @@ column_meta_mapping:
operation: "add_tag"
config:
tag: "sensitive"
gdpr.pii:
match: true
operation: "add_tag"
config:
tag: "pii"
```
We support the following operations:
@ -118,6 +123,29 @@ meta_mapping:
tag: "case_{{ $match }}"
```
#### Nested meta properties
If your meta section has nested properties and looks like this:
```yaml
meta:
data_governance:
team_owner: "Finance"
```
and you want attach term Finance_test in case of data_governance.team_owner is set to Finance, you can use the following meta_mapping section:
```yaml
meta_mapping:
data_governance.team_owner:
match: "Finance"
operation: "add_term"
config:
term: "Finance_test"
```
Note: nested meta properties mapping is supported also for column_meta_mapping
#### Stripping out leading @ sign
You can also match specific groups within the value to extract subsets of the matched value. e.g. if you have a meta section that looks like this:

View File

@ -1336,6 +1336,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
self.config.tag_prefix,
"SOURCE_CONTROL",
self.config.strip_user_ids_from_email,
match_nested_props=True,
)
action_processor_tag = OperationProcessor(
@ -1707,6 +1708,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
self.config.tag_prefix,
"SOURCE_CONTROL",
self.config.strip_user_ids_from_email,
match_nested_props=True,
)
canonical_schema: List[SchemaField] = []

View File

@ -921,6 +921,8 @@
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"data_governance_nested": "{'team_owner': 'Finance'}",
"data_governance.team_owner": "Finance",
"node_type": "model",
"materialization": "table",
"dbt_file_path": "models/transform/payments_by_customer_by_month.sql",
@ -944,6 +946,22 @@
"removed": false
}
},
{
"com.linkedin.pegasus2avro.common.GlossaryTerms": {
"terms": [
{
"urn": "urn:li:glossaryTerm:Finance_test"
},
{
"urn": "urn:li:glossaryTerm:Finance_test_nested"
}
],
"auditStamp": {
"time": 1643871600000,
"actor": "urn:li:corpuser:datahub"
}
}
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "model.sample_dbt.payments_by_customer_by_month",
@ -1019,6 +1037,9 @@
},
{
"urn": "urn:li:glossaryTerm:pii"
},
{
"urn": "urn:li:glossaryTerm:pii_category_organization"
}
],
"auditStamp": {
@ -4769,6 +4790,38 @@
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "glossaryTerm",
"entityUrn": "urn:li:glossaryTerm:Finance_test",
"changeType": "UPSERT",
"aspectName": "glossaryTermKey",
"aspect": {
"json": {
"name": "Finance_test"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "dbt-column-meta-mapping",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "glossaryTerm",
"entityUrn": "urn:li:glossaryTerm:Finance_test_nested",
"changeType": "UPSERT",
"aspectName": "glossaryTermKey",
"aspect": {
"json": {
"name": "Finance_test_nested"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "dbt-column-meta-mapping",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "glossaryTerm",
"entityUrn": "urn:li:glossaryTerm:customer_id",
@ -4817,6 +4870,22 @@
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "glossaryTerm",
"entityUrn": "urn:li:glossaryTerm:pii_category_organization",
"changeType": "UPSERT",
"aspectName": "glossaryTermKey",
"aspect": {
"json": {
"name": "pii_category_organization"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "dbt-column-meta-mapping",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "tag",
"entityUrn": "urn:li:tag:dbt:sensitive",

View File

@ -7883,7 +7883,10 @@
"meta": {
"is_sensitive": true,
"maturity": "beta",
"terms": "pii, customer_id"
"terms": "pii, customer_id",
"governance": {
"pii_category": "organization"
}
},
"name": "customer_id",
"quote": null,
@ -7908,7 +7911,12 @@
"grants": {},
"incremental_strategy": null,
"materialized": "table",
"meta": {},
"meta": {
"data_governance_nested": {
"team_owner": "Finance"
},
"data_governance.team_owner": "Finance"
},
"on_schema_change": "ignore",
"packages": [],
"persist_docs": {},

View File

@ -170,7 +170,7 @@ class DbtTestConfig:
},
),
DbtTestConfig(
"dbt-column-meta-mapping", # this also tests snapshot support
"dbt-column-meta-mapping", # this also tests snapshot support and meta nested mapping
"dbt_test_column_meta_mapping.json",
"dbt_test_column_meta_mapping_golden.json",
catalog_file="sample_dbt_catalog_1.json",
@ -178,6 +178,43 @@ class DbtTestConfig:
sources_file="sample_dbt_sources_1.json",
source_config_modifiers={
"enable_meta_mapping": True,
"meta_mapping": {
"data_governance_nested.team_owner": {
"match": "Finance",
"operation": "add_term",
"config": {"term": "Finance_test_nested"},
},
"owner": {
"match": "^@(.*)",
"operation": "add_owner",
"config": {"owner_type": "user"},
},
"business_owner": {
"match": ".*",
"operation": "add_owner",
"config": {"owner_type": "user"},
},
"has_pii": {
"match": True,
"operation": "add_tag",
"config": {"tag": "has_pii_test"},
},
"int_property": {
"match": 1,
"operation": "add_tag",
"config": {"tag": "int_meta_property"},
},
"double_property": {
"match": 2.5,
"operation": "add_term",
"config": {"term": "double_meta_property"},
},
"data_governance.team_owner": {
"match": "Finance",
"operation": "add_term",
"config": {"term": "Finance_test"},
},
},
"column_meta_mapping": {
"terms": {
"match": ".*",
@ -194,6 +231,11 @@ class DbtTestConfig:
"operation": "add_term",
"config": {"term": "maturity_{{ $match }}"},
},
"governance.pii_category": {
"match": ".*",
"operation": "add_term",
"config": {"term": "pii_category_{{ $match }}"},
},
},
"entities_enabled": {
"test_definitions": "NO",