mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-01 19:25:56 +00:00
feat(dbt/ingestion): add support for nested meta properties mapping (#13396)
Co-authored-by: Petr Knez <pknez@gmail.com>
This commit is contained in:
parent
127ecca3bc
commit
311387410b
@ -55,6 +55,11 @@ column_meta_mapping:
|
||||
operation: "add_tag"
|
||||
config:
|
||||
tag: "sensitive"
|
||||
gdpr.pii:
|
||||
match: true
|
||||
operation: "add_tag"
|
||||
config:
|
||||
tag: "pii"
|
||||
```
|
||||
|
||||
We support the following operations:
|
||||
@ -118,6 +123,29 @@ meta_mapping:
|
||||
tag: "case_{{ $match }}"
|
||||
```
|
||||
|
||||
#### Nested meta properties
|
||||
|
||||
If your meta section has nested properties and looks like this:
|
||||
|
||||
```yaml
|
||||
meta:
|
||||
data_governance:
|
||||
team_owner: "Finance"
|
||||
```
|
||||
|
||||
and you want attach term Finance_test in case of data_governance.team_owner is set to Finance, you can use the following meta_mapping section:
|
||||
|
||||
```yaml
|
||||
meta_mapping:
|
||||
data_governance.team_owner:
|
||||
match: "Finance"
|
||||
operation: "add_term"
|
||||
config:
|
||||
term: "Finance_test"
|
||||
```
|
||||
|
||||
Note: nested meta properties mapping is supported also for column_meta_mapping
|
||||
|
||||
#### Stripping out leading @ sign
|
||||
|
||||
You can also match specific groups within the value to extract subsets of the matched value. e.g. if you have a meta section that looks like this:
|
||||
|
||||
@ -1336,6 +1336,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
||||
self.config.tag_prefix,
|
||||
"SOURCE_CONTROL",
|
||||
self.config.strip_user_ids_from_email,
|
||||
match_nested_props=True,
|
||||
)
|
||||
|
||||
action_processor_tag = OperationProcessor(
|
||||
@ -1707,6 +1708,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
||||
self.config.tag_prefix,
|
||||
"SOURCE_CONTROL",
|
||||
self.config.strip_user_ids_from_email,
|
||||
match_nested_props=True,
|
||||
)
|
||||
|
||||
canonical_schema: List[SchemaField] = []
|
||||
|
||||
@ -921,6 +921,8 @@
|
||||
{
|
||||
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
|
||||
"customProperties": {
|
||||
"data_governance_nested": "{'team_owner': 'Finance'}",
|
||||
"data_governance.team_owner": "Finance",
|
||||
"node_type": "model",
|
||||
"materialization": "table",
|
||||
"dbt_file_path": "models/transform/payments_by_customer_by_month.sql",
|
||||
@ -944,6 +946,22 @@
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.GlossaryTerms": {
|
||||
"terms": [
|
||||
{
|
||||
"urn": "urn:li:glossaryTerm:Finance_test"
|
||||
},
|
||||
{
|
||||
"urn": "urn:li:glossaryTerm:Finance_test_nested"
|
||||
}
|
||||
],
|
||||
"auditStamp": {
|
||||
"time": 1643871600000,
|
||||
"actor": "urn:li:corpuser:datahub"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
|
||||
"schemaName": "model.sample_dbt.payments_by_customer_by_month",
|
||||
@ -1019,6 +1037,9 @@
|
||||
},
|
||||
{
|
||||
"urn": "urn:li:glossaryTerm:pii"
|
||||
},
|
||||
{
|
||||
"urn": "urn:li:glossaryTerm:pii_category_organization"
|
||||
}
|
||||
],
|
||||
"auditStamp": {
|
||||
@ -4769,6 +4790,38 @@
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "glossaryTerm",
|
||||
"entityUrn": "urn:li:glossaryTerm:Finance_test",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "glossaryTermKey",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"name": "Finance_test"
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1643871600000,
|
||||
"runId": "dbt-column-meta-mapping",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "glossaryTerm",
|
||||
"entityUrn": "urn:li:glossaryTerm:Finance_test_nested",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "glossaryTermKey",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"name": "Finance_test_nested"
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1643871600000,
|
||||
"runId": "dbt-column-meta-mapping",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "glossaryTerm",
|
||||
"entityUrn": "urn:li:glossaryTerm:customer_id",
|
||||
@ -4817,6 +4870,22 @@
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "glossaryTerm",
|
||||
"entityUrn": "urn:li:glossaryTerm:pii_category_organization",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "glossaryTermKey",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"name": "pii_category_organization"
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1643871600000,
|
||||
"runId": "dbt-column-meta-mapping",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "tag",
|
||||
"entityUrn": "urn:li:tag:dbt:sensitive",
|
||||
|
||||
@ -7883,7 +7883,10 @@
|
||||
"meta": {
|
||||
"is_sensitive": true,
|
||||
"maturity": "beta",
|
||||
"terms": "pii, customer_id"
|
||||
"terms": "pii, customer_id",
|
||||
"governance": {
|
||||
"pii_category": "organization"
|
||||
}
|
||||
},
|
||||
"name": "customer_id",
|
||||
"quote": null,
|
||||
@ -7908,7 +7911,12 @@
|
||||
"grants": {},
|
||||
"incremental_strategy": null,
|
||||
"materialized": "table",
|
||||
"meta": {},
|
||||
"meta": {
|
||||
"data_governance_nested": {
|
||||
"team_owner": "Finance"
|
||||
},
|
||||
"data_governance.team_owner": "Finance"
|
||||
},
|
||||
"on_schema_change": "ignore",
|
||||
"packages": [],
|
||||
"persist_docs": {},
|
||||
|
||||
@ -170,7 +170,7 @@ class DbtTestConfig:
|
||||
},
|
||||
),
|
||||
DbtTestConfig(
|
||||
"dbt-column-meta-mapping", # this also tests snapshot support
|
||||
"dbt-column-meta-mapping", # this also tests snapshot support and meta nested mapping
|
||||
"dbt_test_column_meta_mapping.json",
|
||||
"dbt_test_column_meta_mapping_golden.json",
|
||||
catalog_file="sample_dbt_catalog_1.json",
|
||||
@ -178,6 +178,43 @@ class DbtTestConfig:
|
||||
sources_file="sample_dbt_sources_1.json",
|
||||
source_config_modifiers={
|
||||
"enable_meta_mapping": True,
|
||||
"meta_mapping": {
|
||||
"data_governance_nested.team_owner": {
|
||||
"match": "Finance",
|
||||
"operation": "add_term",
|
||||
"config": {"term": "Finance_test_nested"},
|
||||
},
|
||||
"owner": {
|
||||
"match": "^@(.*)",
|
||||
"operation": "add_owner",
|
||||
"config": {"owner_type": "user"},
|
||||
},
|
||||
"business_owner": {
|
||||
"match": ".*",
|
||||
"operation": "add_owner",
|
||||
"config": {"owner_type": "user"},
|
||||
},
|
||||
"has_pii": {
|
||||
"match": True,
|
||||
"operation": "add_tag",
|
||||
"config": {"tag": "has_pii_test"},
|
||||
},
|
||||
"int_property": {
|
||||
"match": 1,
|
||||
"operation": "add_tag",
|
||||
"config": {"tag": "int_meta_property"},
|
||||
},
|
||||
"double_property": {
|
||||
"match": 2.5,
|
||||
"operation": "add_term",
|
||||
"config": {"term": "double_meta_property"},
|
||||
},
|
||||
"data_governance.team_owner": {
|
||||
"match": "Finance",
|
||||
"operation": "add_term",
|
||||
"config": {"term": "Finance_test"},
|
||||
},
|
||||
},
|
||||
"column_meta_mapping": {
|
||||
"terms": {
|
||||
"match": ".*",
|
||||
@ -194,6 +231,11 @@ class DbtTestConfig:
|
||||
"operation": "add_term",
|
||||
"config": {"term": "maturity_{{ $match }}"},
|
||||
},
|
||||
"governance.pii_category": {
|
||||
"match": ".*",
|
||||
"operation": "add_term",
|
||||
"config": {"term": "pii_category_{{ $match }}"},
|
||||
},
|
||||
},
|
||||
"entities_enabled": {
|
||||
"test_definitions": "NO",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user