mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-11 08:52:58 +00:00
feat(dbt/ingestion): add support for nested meta properties mapping (#13396)
Co-authored-by: Petr Knez <pknez@gmail.com>
This commit is contained in:
parent
127ecca3bc
commit
311387410b
@ -55,6 +55,11 @@ column_meta_mapping:
|
|||||||
operation: "add_tag"
|
operation: "add_tag"
|
||||||
config:
|
config:
|
||||||
tag: "sensitive"
|
tag: "sensitive"
|
||||||
|
gdpr.pii:
|
||||||
|
match: true
|
||||||
|
operation: "add_tag"
|
||||||
|
config:
|
||||||
|
tag: "pii"
|
||||||
```
|
```
|
||||||
|
|
||||||
We support the following operations:
|
We support the following operations:
|
||||||
@ -118,6 +123,29 @@ meta_mapping:
|
|||||||
tag: "case_{{ $match }}"
|
tag: "case_{{ $match }}"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Nested meta properties
|
||||||
|
|
||||||
|
If your meta section has nested properties and looks like this:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
meta:
|
||||||
|
data_governance:
|
||||||
|
team_owner: "Finance"
|
||||||
|
```
|
||||||
|
|
||||||
|
and you want attach term Finance_test in case of data_governance.team_owner is set to Finance, you can use the following meta_mapping section:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
meta_mapping:
|
||||||
|
data_governance.team_owner:
|
||||||
|
match: "Finance"
|
||||||
|
operation: "add_term"
|
||||||
|
config:
|
||||||
|
term: "Finance_test"
|
||||||
|
```
|
||||||
|
|
||||||
|
Note: nested meta properties mapping is supported also for column_meta_mapping
|
||||||
|
|
||||||
#### Stripping out leading @ sign
|
#### Stripping out leading @ sign
|
||||||
|
|
||||||
You can also match specific groups within the value to extract subsets of the matched value. e.g. if you have a meta section that looks like this:
|
You can also match specific groups within the value to extract subsets of the matched value. e.g. if you have a meta section that looks like this:
|
||||||
|
|||||||
@ -1336,6 +1336,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|||||||
self.config.tag_prefix,
|
self.config.tag_prefix,
|
||||||
"SOURCE_CONTROL",
|
"SOURCE_CONTROL",
|
||||||
self.config.strip_user_ids_from_email,
|
self.config.strip_user_ids_from_email,
|
||||||
|
match_nested_props=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
action_processor_tag = OperationProcessor(
|
action_processor_tag = OperationProcessor(
|
||||||
@ -1707,6 +1708,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|||||||
self.config.tag_prefix,
|
self.config.tag_prefix,
|
||||||
"SOURCE_CONTROL",
|
"SOURCE_CONTROL",
|
||||||
self.config.strip_user_ids_from_email,
|
self.config.strip_user_ids_from_email,
|
||||||
|
match_nested_props=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
canonical_schema: List[SchemaField] = []
|
canonical_schema: List[SchemaField] = []
|
||||||
|
|||||||
@ -921,6 +921,8 @@
|
|||||||
{
|
{
|
||||||
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
|
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
|
||||||
"customProperties": {
|
"customProperties": {
|
||||||
|
"data_governance_nested": "{'team_owner': 'Finance'}",
|
||||||
|
"data_governance.team_owner": "Finance",
|
||||||
"node_type": "model",
|
"node_type": "model",
|
||||||
"materialization": "table",
|
"materialization": "table",
|
||||||
"dbt_file_path": "models/transform/payments_by_customer_by_month.sql",
|
"dbt_file_path": "models/transform/payments_by_customer_by_month.sql",
|
||||||
@ -944,6 +946,22 @@
|
|||||||
"removed": false
|
"removed": false
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.common.GlossaryTerms": {
|
||||||
|
"terms": [
|
||||||
|
{
|
||||||
|
"urn": "urn:li:glossaryTerm:Finance_test"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"urn": "urn:li:glossaryTerm:Finance_test_nested"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"auditStamp": {
|
||||||
|
"time": 1643871600000,
|
||||||
|
"actor": "urn:li:corpuser:datahub"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
|
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
|
||||||
"schemaName": "model.sample_dbt.payments_by_customer_by_month",
|
"schemaName": "model.sample_dbt.payments_by_customer_by_month",
|
||||||
@ -1019,6 +1037,9 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"urn": "urn:li:glossaryTerm:pii"
|
"urn": "urn:li:glossaryTerm:pii"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"urn": "urn:li:glossaryTerm:pii_category_organization"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"auditStamp": {
|
"auditStamp": {
|
||||||
@ -4769,6 +4790,38 @@
|
|||||||
"lastRunId": "no-run-id-provided"
|
"lastRunId": "no-run-id-provided"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"entityType": "glossaryTerm",
|
||||||
|
"entityUrn": "urn:li:glossaryTerm:Finance_test",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "glossaryTermKey",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"name": "Finance_test"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "dbt-column-meta-mapping",
|
||||||
|
"lastRunId": "no-run-id-provided"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "glossaryTerm",
|
||||||
|
"entityUrn": "urn:li:glossaryTerm:Finance_test_nested",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "glossaryTermKey",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"name": "Finance_test_nested"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "dbt-column-meta-mapping",
|
||||||
|
"lastRunId": "no-run-id-provided"
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"entityType": "glossaryTerm",
|
"entityType": "glossaryTerm",
|
||||||
"entityUrn": "urn:li:glossaryTerm:customer_id",
|
"entityUrn": "urn:li:glossaryTerm:customer_id",
|
||||||
@ -4817,6 +4870,22 @@
|
|||||||
"lastRunId": "no-run-id-provided"
|
"lastRunId": "no-run-id-provided"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"entityType": "glossaryTerm",
|
||||||
|
"entityUrn": "urn:li:glossaryTerm:pii_category_organization",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "glossaryTermKey",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"name": "pii_category_organization"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "dbt-column-meta-mapping",
|
||||||
|
"lastRunId": "no-run-id-provided"
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"entityType": "tag",
|
"entityType": "tag",
|
||||||
"entityUrn": "urn:li:tag:dbt:sensitive",
|
"entityUrn": "urn:li:tag:dbt:sensitive",
|
||||||
|
|||||||
@ -7883,7 +7883,10 @@
|
|||||||
"meta": {
|
"meta": {
|
||||||
"is_sensitive": true,
|
"is_sensitive": true,
|
||||||
"maturity": "beta",
|
"maturity": "beta",
|
||||||
"terms": "pii, customer_id"
|
"terms": "pii, customer_id",
|
||||||
|
"governance": {
|
||||||
|
"pii_category": "organization"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"name": "customer_id",
|
"name": "customer_id",
|
||||||
"quote": null,
|
"quote": null,
|
||||||
@ -7908,7 +7911,12 @@
|
|||||||
"grants": {},
|
"grants": {},
|
||||||
"incremental_strategy": null,
|
"incremental_strategy": null,
|
||||||
"materialized": "table",
|
"materialized": "table",
|
||||||
"meta": {},
|
"meta": {
|
||||||
|
"data_governance_nested": {
|
||||||
|
"team_owner": "Finance"
|
||||||
|
},
|
||||||
|
"data_governance.team_owner": "Finance"
|
||||||
|
},
|
||||||
"on_schema_change": "ignore",
|
"on_schema_change": "ignore",
|
||||||
"packages": [],
|
"packages": [],
|
||||||
"persist_docs": {},
|
"persist_docs": {},
|
||||||
|
|||||||
@ -170,7 +170,7 @@ class DbtTestConfig:
|
|||||||
},
|
},
|
||||||
),
|
),
|
||||||
DbtTestConfig(
|
DbtTestConfig(
|
||||||
"dbt-column-meta-mapping", # this also tests snapshot support
|
"dbt-column-meta-mapping", # this also tests snapshot support and meta nested mapping
|
||||||
"dbt_test_column_meta_mapping.json",
|
"dbt_test_column_meta_mapping.json",
|
||||||
"dbt_test_column_meta_mapping_golden.json",
|
"dbt_test_column_meta_mapping_golden.json",
|
||||||
catalog_file="sample_dbt_catalog_1.json",
|
catalog_file="sample_dbt_catalog_1.json",
|
||||||
@ -178,6 +178,43 @@ class DbtTestConfig:
|
|||||||
sources_file="sample_dbt_sources_1.json",
|
sources_file="sample_dbt_sources_1.json",
|
||||||
source_config_modifiers={
|
source_config_modifiers={
|
||||||
"enable_meta_mapping": True,
|
"enable_meta_mapping": True,
|
||||||
|
"meta_mapping": {
|
||||||
|
"data_governance_nested.team_owner": {
|
||||||
|
"match": "Finance",
|
||||||
|
"operation": "add_term",
|
||||||
|
"config": {"term": "Finance_test_nested"},
|
||||||
|
},
|
||||||
|
"owner": {
|
||||||
|
"match": "^@(.*)",
|
||||||
|
"operation": "add_owner",
|
||||||
|
"config": {"owner_type": "user"},
|
||||||
|
},
|
||||||
|
"business_owner": {
|
||||||
|
"match": ".*",
|
||||||
|
"operation": "add_owner",
|
||||||
|
"config": {"owner_type": "user"},
|
||||||
|
},
|
||||||
|
"has_pii": {
|
||||||
|
"match": True,
|
||||||
|
"operation": "add_tag",
|
||||||
|
"config": {"tag": "has_pii_test"},
|
||||||
|
},
|
||||||
|
"int_property": {
|
||||||
|
"match": 1,
|
||||||
|
"operation": "add_tag",
|
||||||
|
"config": {"tag": "int_meta_property"},
|
||||||
|
},
|
||||||
|
"double_property": {
|
||||||
|
"match": 2.5,
|
||||||
|
"operation": "add_term",
|
||||||
|
"config": {"term": "double_meta_property"},
|
||||||
|
},
|
||||||
|
"data_governance.team_owner": {
|
||||||
|
"match": "Finance",
|
||||||
|
"operation": "add_term",
|
||||||
|
"config": {"term": "Finance_test"},
|
||||||
|
},
|
||||||
|
},
|
||||||
"column_meta_mapping": {
|
"column_meta_mapping": {
|
||||||
"terms": {
|
"terms": {
|
||||||
"match": ".*",
|
"match": ".*",
|
||||||
@ -194,6 +231,11 @@ class DbtTestConfig:
|
|||||||
"operation": "add_term",
|
"operation": "add_term",
|
||||||
"config": {"term": "maturity_{{ $match }}"},
|
"config": {"term": "maturity_{{ $match }}"},
|
||||||
},
|
},
|
||||||
|
"governance.pii_category": {
|
||||||
|
"match": ".*",
|
||||||
|
"operation": "add_term",
|
||||||
|
"config": {"term": "pii_category_{{ $match }}"},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"entities_enabled": {
|
"entities_enabled": {
|
||||||
"test_definitions": "NO",
|
"test_definitions": "NO",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user