fix(ingestion/tableau): ingest parent project name in container properties (#8030)

Co-authored-by: MohdSiddiqueBagwan <mohdsiddique.bagwan@gslab.com>
This commit is contained in:
mohdsiddique 2023-05-18 02:49:41 +05:30 committed by GitHub
parent 8cc6606e68
commit ae30be9c25
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 103 additions and 9 deletions

View File

@ -4,7 +4,7 @@ import re
from dataclasses import dataclass
from datetime import datetime
from functools import lru_cache
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast
import dateutil.parser as dp
import tableauserverclient as TSC
@ -371,6 +371,7 @@ class TableauProject:
name: str
description: str
parent_id: Optional[str]
parent_name: Optional[str] # Name of parent project
path: List[str]
@ -484,9 +485,17 @@ class TableauSource(StatefulIngestionSourceBase):
id=project.id,
name=project.name,
parent_id=project.parent_id,
parent_name=None,
description=project.description,
path=[],
)
# Set parent project name
for project_id, project in all_project_map.items():
if (
project.parent_id is not None
and project.parent_id in all_project_map
):
project.parent_name = all_project_map[project.parent_id].name
def set_project_path():
def form_path(project_id: str) -> List[str]:
@ -2288,15 +2297,34 @@ class TableauSource(StatefulIngestionSourceBase):
def emit_project_containers(self) -> Iterable[MetadataWorkUnit]:
for _id, project in self.tableau_project_registry.items():
project_workunits = gen_containers(
container_key=self.gen_project_key(_id),
name=project.name,
description=project.description,
sub_types=[tableau_constant.PROJECT],
parent_container_key=self.gen_project_key(project.parent_id)
if project.parent_id
else None,
project_workunits = list(
gen_containers(
container_key=self.gen_project_key(_id),
name=project.name,
description=project.description,
sub_types=[tableau_constant.PROJECT],
parent_container_key=self.gen_project_key(project.parent_id)
if project.parent_id
else None,
)
)
if (
project.parent_id is not None
and project.parent_id not in self.tableau_project_registry
):
# Parent project got skipped because of project_pattern.
# Let's ingest its container name property to show parent container name on DataHub Portal, otherwise
# DataHub Portal will show parent container URN
project_workunits.extend(
list(
gen_containers(
container_key=self.gen_project_key(project.parent_id),
name=cast(str, project.parent_name),
sub_types=[tableau_constant.PROJECT],
)
)
)
for wu in project_workunits:
self.report.report_workunit(wu)
yield wu

View File

@ -80,6 +80,72 @@
"runId": "tableau-test"
}
},
{
"entityType": "container",
"entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b",
"changeType": "UPSERT",
"aspectName": "containerProperties",
"aspect": {
"json": {
"customProperties": {
"platform": "tableau",
"project_id": "190a6a5c-63ed-4de1-8045-faeae5df5b01"
},
"name": "default"
}
},
"systemMetadata": {
"lastObserved": 1638860400000,
"runId": "tableau-test"
}
},
{
"entityType": "container",
"entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1638860400000,
"runId": "tableau-test"
}
},
{
"entityType": "container",
"entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
"json": {
"platform": "urn:li:dataPlatform:tableau"
}
},
"systemMetadata": {
"lastObserved": 1638860400000,
"runId": "tableau-test"
}
},
{
"entityType": "container",
"entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
"json": {
"typeNames": [
"Project"
]
}
},
"systemMetadata": {
"lastObserved": 1638860400000,
"runId": "tableau-test"
}
},
{
"entityType": "container",
"entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607",