fix(ingest/looker): support platform instance for dashboards & charts (#10771)

This commit is contained in:
sid-acryl 2024-07-26 06:25:39 +05:30 committed by GitHub
parent 71d1cdbe3b
commit b173f605b8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 227 additions and 51 deletions

View File

@ -286,12 +286,17 @@ class LookerDashboardSourceConfig(
)
extract_independent_looks: bool = Field(
False,
description="Extract looks which are not part of any Dashboard. To enable this flag the stateful_ingestion should also be enabled.",
description="Extract looks which are not part of any Dashboard. To enable this flag the stateful_ingestion "
"should also be enabled.",
)
emit_used_explores_only: bool = Field(
True,
description="When enabled, only explores that are used by a Dashboard/Look will be ingested.",
)
include_platform_instance_in_urns: bool = Field(
False,
description="When enabled, platform instance will be added in dashboard and chart urn.",
)
@validator("external_base_url", pre=True, always=True)
def external_url_defaults_to_api_config_base_url(

View File

@ -80,6 +80,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
from datahub.metadata.com.linkedin.pegasus2avro.common import (
AuditStamp,
ChangeAuditStamps,
DataPlatformInstance,
Status,
)
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
@ -95,11 +96,13 @@ from datahub.metadata.schema_classes import (
ChartTypeClass,
ContainerClass,
DashboardInfoClass,
DataPlatformInfoClass,
InputFieldClass,
InputFieldsClass,
OwnerClass,
OwnershipClass,
OwnershipTypeClass,
PlatformTypeClass,
SubTypesClass,
)
from datahub.utilities.backpressure_aware_executor import BackpressureAwareExecutor
@ -624,6 +627,38 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
if include_current_folder:
yield BrowsePathEntryClass(id=urn, urn=urn)
def _create_platform_instance_aspect(
self,
) -> DataPlatformInstance:
assert (
self.source_config.platform_name
), "Platform name is not set in the configuration."
assert (
self.source_config.platform_instance
), "Platform instance is not set in the configuration."
return DataPlatformInstance(
platform=builder.make_data_platform_urn(self.source_config.platform_name),
instance=builder.make_dataplatform_instance_urn(
platform=self.source_config.platform_name,
instance=self.source_config.platform_instance,
),
)
def _make_chart_urn(self, element_id: str) -> str:
platform_instance: Optional[str] = None
if self.source_config.include_platform_instance_in_urns:
platform_instance = self.source_config.platform_instance
return builder.make_chart_urn(
name=element_id,
platform=self.source_config.platform_name,
platform_instance=platform_instance,
)
def _make_chart_metadata_events(
self,
dashboard_element: LookerDashboardElement,
@ -631,8 +666,8 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
LookerDashboard
], # dashboard will be None if this is a standalone look
) -> List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
chart_urn = builder.make_chart_urn(
self.source_config.platform_name, dashboard_element.get_urn_element_id()
chart_urn = self._make_chart_urn(
element_id=dashboard_element.get_urn_element_id()
)
chart_snapshot = ChartSnapshot(
urn=chart_urn,
@ -713,6 +748,14 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
),
]
if self.source_config.include_platform_instance_in_urns:
proposals.append(
MetadataChangeProposalWrapper(
entityUrn=chart_urn,
aspect=self._create_platform_instance_aspect(),
),
)
# If extracting embeds is enabled, produce an MCP for embed URL.
if (
self.source_config.extract_embed_urls
@ -818,11 +861,26 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
)
)
if self.source_config.include_platform_instance_in_urns:
proposals.append(
MetadataChangeProposalWrapper(
entityUrn=dashboard_urn,
aspect=self._create_platform_instance_aspect(),
)
)
return proposals
def make_dashboard_urn(self, looker_dashboard: LookerDashboard) -> str:
platform_instance: Optional[str] = None
if self.source_config.include_platform_instance_in_urns:
platform_instance = self.source_config.platform_instance
return builder.make_dashboard_urn(
self.source_config.platform_name, looker_dashboard.get_urn_dashboard_id()
name=looker_dashboard.get_urn_dashboard_id(),
platform=self.source_config.platform_name,
platform_instance=platform_instance,
)
def _make_explore_metadata_events(
@ -1154,8 +1212,8 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
# enrich the input_fields with the fully hydrated ViewField from the now fetched explores
for input_field in input_fields:
entity_urn = builder.make_chart_urn(
self.source_config.platform_name, dashboard_element.get_urn_element_id()
entity_urn = self._make_chart_urn(
element_id=dashboard_element.get_urn_element_id()
)
view_field_for_reference = input_field.view_field
@ -1220,8 +1278,8 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
def _make_metrics_dimensions_chart_mcp(
self, dashboard_element: LookerDashboardElement
) -> MetadataChangeProposalWrapper:
chart_urn = builder.make_chart_urn(
self.source_config.platform_name, dashboard_element.get_urn_element_id()
chart_urn = self._make_chart_urn(
element_id=dashboard_element.get_urn_element_id()
)
input_fields_aspect = InputFieldsClass(
fields=self._input_fields_from_dashboard_element(dashboard_element)
@ -1513,6 +1571,25 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
looker_dashboards_for_usage: List[looker_usage.LookerDashboardForUsage] = []
# Emit platform instance entity
if self.source_config.platform_instance:
platform_instance_urn = builder.make_dataplatform_instance_urn(
platform=self.source_config.platform_name,
instance=self.source_config.platform_instance,
)
yield MetadataWorkUnit(
id=f"{platform_instance_urn}-aspect-dataplatformInfo",
mcp=MetadataChangeProposalWrapper(
entityUrn=platform_instance_urn,
aspect=DataPlatformInfoClass(
name=self.source_config.platform_instance,
type=PlatformTypeClass.OTHERS,
datasetNameDelimiter=".",
),
),
)
with self.reporter.report_stage("dashboard_chart_metadata"):
for job in BackpressureAwareExecutor.map(
self.process_dashboard,

View File

@ -1,13 +1,32 @@
[
{
"entityType": "dataPlatformInstance",
"entityUrn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)",
"changeType": "UPSERT",
"aspectName": "dataPlatformInfo",
"aspect": {
"json": {
"name": "ap-south-1",
"type": "OTHERS",
"datasetNameDelimiter": "."
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "looker-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "container",
"entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8",
"entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7",
"changeType": "UPSERT",
"aspectName": "containerProperties",
"aspect": {
"json": {
"customProperties": {
"platform": "looker",
"instance": "ap-south-1",
"env": "PROD",
"folder_id": "shared-folder-id"
},
@ -22,7 +41,7 @@
},
{
"entityType": "container",
"entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8",
"entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@ -38,12 +57,13 @@
},
{
"entityType": "container",
"entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8",
"entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
"json": {
"platform": "urn:li:dataPlatform:looker"
"platform": "urn:li:dataPlatform:looker",
"instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)"
}
},
"systemMetadata": {
@ -54,7 +74,7 @@
},
{
"entityType": "container",
"entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8",
"entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
@ -72,12 +92,16 @@
},
{
"entityType": "container",
"entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8",
"entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7",
"changeType": "UPSERT",
"aspectName": "browsePathsV2",
"aspect": {
"json": {
"path": [
{
"id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)",
"urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)"
},
{
"id": "Folders"
}
@ -93,7 +117,7 @@
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": {
"urn": "urn:li:chart:(looker,dashboard_elements.2)",
"urn": "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.Status": {
@ -120,7 +144,7 @@
"chartUrl": "https://looker.company.com/x/",
"inputs": [
{
"string": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)"
"string": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)"
}
]
}
@ -143,7 +167,7 @@
},
{
"entityType": "chart",
"entityUrn": "urn:li:chart:(looker,dashboard_elements.2)",
"entityUrn": "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
@ -161,22 +185,43 @@
},
{
"entityType": "chart",
"entityUrn": "urn:li:chart:(looker,dashboard_elements.2)",
"entityUrn": "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
"json": {
"platform": "urn:li:dataPlatform:looker",
"instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "looker-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "chart",
"entityUrn": "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)",
"changeType": "UPSERT",
"aspectName": "browsePathsV2",
"aspect": {
"json": {
"path": [
{
"id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)",
"urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)"
},
{
"id": "Folders"
},
{
"id": "urn:li:container:691314a7b63628684d62a14861d057a8",
"urn": "urn:li:container:691314a7b63628684d62a14861d057a8"
"id": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7",
"urn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7"
},
{
"id": "urn:li:dashboard:(looker,dashboards.1)",
"urn": "urn:li:dashboard:(looker,dashboards.1)"
"id": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)",
"urn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)"
}
]
}
@ -190,7 +235,7 @@
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": {
"urn": "urn:li:dashboard:(looker,dashboards.1)",
"urn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)",
"aspects": [
{
"com.linkedin.pegasus2avro.dashboard.DashboardInfo": {
@ -198,7 +243,7 @@
"title": "foo",
"description": "lorem ipsum",
"charts": [
"urn:li:chart:(looker,dashboard_elements.2)"
"urn:li:chart:(looker,ap-south-1.dashboard_elements.2)"
],
"datasets": [],
"lastModified": {
@ -237,12 +282,12 @@
},
{
"entityType": "dashboard",
"entityUrn": "urn:li:dashboard:(looker,dashboards.1)",
"entityUrn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
"json": {
"container": "urn:li:container:691314a7b63628684d62a14861d057a8"
"container": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7"
}
},
"systemMetadata": {
@ -253,7 +298,7 @@
},
{
"entityType": "dashboard",
"entityUrn": "urn:li:dashboard:(looker,dashboards.1)",
"entityUrn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)",
"changeType": "UPSERT",
"aspectName": "embed",
"aspect": {
@ -269,18 +314,39 @@
},
{
"entityType": "dashboard",
"entityUrn": "urn:li:dashboard:(looker,dashboards.1)",
"entityUrn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
"json": {
"platform": "urn:li:dataPlatform:looker",
"instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "looker-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dashboard",
"entityUrn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)",
"changeType": "UPSERT",
"aspectName": "browsePathsV2",
"aspect": {
"json": {
"path": [
{
"id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)",
"urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)"
},
{
"id": "Folders"
},
{
"id": "urn:li:container:691314a7b63628684d62a14861d057a8",
"urn": "urn:li:container:691314a7b63628684d62a14861d057a8"
"id": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7",
"urn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7"
}
]
}
@ -293,14 +359,14 @@
},
{
"entityType": "chart",
"entityUrn": "urn:li:chart:(looker,dashboard_elements.2)",
"entityUrn": "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)",
"changeType": "UPSERT",
"aspectName": "inputFields",
"aspect": {
"json": {
"fields": [
{
"schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,dashboard_elements.2),calc)",
"schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,ap-south-1.dashboard_elements.2),calc)",
"schemaField": {
"fieldPath": "calc",
"nullable": false,
@ -317,7 +383,7 @@
}
},
{
"schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD),dim1)",
"schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD),dim1)",
"schemaField": {
"fieldPath": "dim1",
"nullable": false,
@ -351,14 +417,14 @@
},
{
"entityType": "dashboard",
"entityUrn": "urn:li:dashboard:(looker,dashboards.1)",
"entityUrn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)",
"changeType": "UPSERT",
"aspectName": "inputFields",
"aspect": {
"json": {
"fields": [
{
"schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,dashboard_elements.2),calc)",
"schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,ap-south-1.dashboard_elements.2),calc)",
"schemaField": {
"fieldPath": "calc",
"nullable": false,
@ -375,7 +441,7 @@
}
},
{
"schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD),dim1)",
"schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD),dim1)",
"schemaField": {
"fieldPath": "dim1",
"nullable": false,
@ -409,13 +475,14 @@
},
{
"entityType": "container",
"entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42",
"entityUrn": "urn:li:container:63e49aaeb15b289d177acbb32625d577",
"changeType": "UPSERT",
"aspectName": "containerProperties",
"aspect": {
"json": {
"customProperties": {
"platform": "looker",
"instance": "ap-south-1",
"env": "PROD",
"model_name": "data"
},
@ -430,7 +497,7 @@
},
{
"entityType": "container",
"entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42",
"entityUrn": "urn:li:container:63e49aaeb15b289d177acbb32625d577",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@ -446,12 +513,13 @@
},
{
"entityType": "container",
"entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42",
"entityUrn": "urn:li:container:63e49aaeb15b289d177acbb32625d577",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
"json": {
"platform": "urn:li:dataPlatform:looker"
"platform": "urn:li:dataPlatform:looker",
"instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)"
}
},
"systemMetadata": {
@ -462,7 +530,7 @@
},
{
"entityType": "container",
"entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42",
"entityUrn": "urn:li:container:63e49aaeb15b289d177acbb32625d577",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
@ -480,12 +548,16 @@
},
{
"entityType": "container",
"entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42",
"entityUrn": "urn:li:container:63e49aaeb15b289d177acbb32625d577",
"changeType": "UPSERT",
"aspectName": "browsePathsV2",
"aspect": {
"json": {
"path": [
{
"id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)",
"urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)"
},
{
"id": "Explore"
}
@ -501,7 +573,7 @@
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.BrowsePaths": {
@ -535,7 +607,7 @@
"time": 1586847600000,
"actor": "urn:li:corpuser:datahub"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.underlying_view,PROD)",
"dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.lkml_samples.view.underlying_view,PROD)",
"type": "VIEW"
}
]
@ -597,7 +669,7 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
@ -615,7 +687,7 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)",
"changeType": "UPSERT",
"aspectName": "embed",
"aspect": {
@ -631,12 +703,12 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
"json": {
"container": "urn:li:container:59a5aa45397364e6882e793f1bc77b42"
"container": "urn:li:container:63e49aaeb15b289d177acbb32625d577"
}
},
"systemMetadata": {
@ -647,18 +719,22 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)",
"changeType": "UPSERT",
"aspectName": "browsePathsV2",
"aspect": {
"json": {
"path": [
{
"id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)",
"urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)"
},
{
"id": "Explore"
},
{
"id": "urn:li:container:59a5aa45397364e6882e793f1bc77b42",
"urn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42"
"id": "urn:li:container:63e49aaeb15b289d177acbb32625d577",
"urn": "urn:li:container:63e49aaeb15b289d177acbb32625d577"
}
]
}
@ -729,6 +805,22 @@
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataPlatformInstance",
"entityUrn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "looker-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "tag",
"entityUrn": "urn:li:tag:Dimension",

View File

@ -94,6 +94,8 @@ def test_looker_ingest(pytestconfig, tmp_path, mock_time):
"client_id": "foo",
"client_secret": "bar",
"extract_usage_history": False,
"platform_instance": "ap-south-1",
"include_platform_instance_in_urns": True,
},
},
"sink": {