mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-12 18:47:45 +00:00
feat(ingest/looker): Do not emit usage for non-ingested dashboards and charts (#11647)
This commit is contained in:
parent
047644b888
commit
09a9b6eef9
@ -1408,6 +1408,15 @@ class LookerDashboardSourceReport(StaleEntityRemovalSourceReport):
|
||||
dashboards_with_activity: LossySet[str] = dataclasses_field(
|
||||
default_factory=LossySet
|
||||
)
|
||||
|
||||
# Entities that don't seem to exist, so we don't emit usage aspects for them despite having usage data
|
||||
dashboards_skipped_for_usage: LossySet[str] = dataclasses_field(
|
||||
default_factory=LossySet
|
||||
)
|
||||
charts_skipped_for_usage: LossySet[str] = dataclasses_field(
|
||||
default_factory=LossySet
|
||||
)
|
||||
|
||||
stage_latency: List[StageLatency] = dataclasses_field(default_factory=list)
|
||||
_looker_explore_registry: Optional[LookerExploreRegistry] = None
|
||||
total_explores: int = 0
|
||||
|
||||
@ -68,6 +68,7 @@ from datahub.ingestion.source.looker.looker_common import (
|
||||
ViewField,
|
||||
ViewFieldType,
|
||||
gen_model_key,
|
||||
get_urn_looker_element_id,
|
||||
)
|
||||
from datahub.ingestion.source.looker.looker_config import LookerDashboardSourceConfig
|
||||
from datahub.ingestion.source.looker.looker_lib_wrapper import LookerAPI
|
||||
@ -165,6 +166,9 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
||||
# Required, as we do not ingest all folders but only those that have dashboards/looks
|
||||
self.processed_folders: List[str] = []
|
||||
|
||||
# Keep track of ingested chart urns, to omit usage for non-ingested entities
|
||||
self.chart_urns: Set[str] = set()
|
||||
|
||||
@staticmethod
|
||||
def test_connection(config_dict: dict) -> TestConnectionReport:
|
||||
test_report = TestConnectionReport()
|
||||
@ -642,6 +646,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
||||
chart_urn = self._make_chart_urn(
|
||||
element_id=dashboard_element.get_urn_element_id()
|
||||
)
|
||||
self.chart_urns.add(chart_urn)
|
||||
chart_snapshot = ChartSnapshot(
|
||||
urn=chart_urn,
|
||||
aspects=[Status(removed=False)],
|
||||
@ -1380,7 +1385,9 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
||||
yield from self._emit_folder_as_container(folder)
|
||||
|
||||
def extract_usage_stat(
|
||||
self, looker_dashboards: List[looker_usage.LookerDashboardForUsage]
|
||||
self,
|
||||
looker_dashboards: List[looker_usage.LookerDashboardForUsage],
|
||||
ingested_chart_urns: Set[str],
|
||||
) -> List[MetadataChangeProposalWrapper]:
|
||||
looks: List[looker_usage.LookerChartForUsage] = []
|
||||
# filter out look from all dashboard
|
||||
@ -1391,6 +1398,15 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
||||
|
||||
# dedup looks
|
||||
looks = list({str(look.id): look for look in looks}.values())
|
||||
filtered_looks = []
|
||||
for look in looks:
|
||||
if not look.id:
|
||||
continue
|
||||
chart_urn = self._make_chart_urn(get_urn_looker_element_id(look.id))
|
||||
if chart_urn in ingested_chart_urns:
|
||||
filtered_looks.append(look)
|
||||
else:
|
||||
self.reporter.charts_skipped_for_usage.add(look.id)
|
||||
|
||||
# Keep stat generators to generate entity stat aspect later
|
||||
stat_generator_config: looker_usage.StatGeneratorConfig = (
|
||||
@ -1414,7 +1430,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
||||
stat_generator_config,
|
||||
self.reporter,
|
||||
self._make_chart_urn,
|
||||
looks,
|
||||
filtered_looks,
|
||||
)
|
||||
|
||||
mcps: List[MetadataChangeProposalWrapper] = []
|
||||
@ -1669,7 +1685,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
||||
if self.source_config.extract_usage_history:
|
||||
self.reporter.report_stage_start("usage_extraction")
|
||||
usage_mcps: List[MetadataChangeProposalWrapper] = self.extract_usage_stat(
|
||||
looker_dashboards_for_usage
|
||||
looker_dashboards_for_usage, self.chart_urns
|
||||
)
|
||||
for usage_mcp in usage_mcps:
|
||||
yield usage_mcp.as_workunit()
|
||||
|
||||
@ -42,6 +42,7 @@ from datahub.metadata.schema_classes import (
|
||||
TimeWindowSizeClass,
|
||||
_Aspect as AspectAbstract,
|
||||
)
|
||||
from datahub.utilities.lossy_collections import LossySet
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -170,7 +171,7 @@ class BaseStatGenerator(ABC):
|
||||
self.config = config
|
||||
self.looker_models = looker_models
|
||||
# Later it will help to find out for what are the looker entities from query result
|
||||
self.id_vs_model: Dict[str, ModelForUsage] = {
|
||||
self.id_to_model: Dict[str, ModelForUsage] = {
|
||||
self.get_id(looker_object): looker_object for looker_object in looker_models
|
||||
}
|
||||
self.post_filter = len(self.looker_models) > 100
|
||||
@ -225,6 +226,10 @@ class BaseStatGenerator(ABC):
|
||||
def get_id_from_row(self, row: dict) -> str:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def report_skip_set(self) -> LossySet[str]:
|
||||
pass
|
||||
|
||||
def create_mcp(
|
||||
self, model: ModelForUsage, aspect: Aspect
|
||||
) -> MetadataChangeProposalWrapper:
|
||||
@ -258,20 +263,11 @@ class BaseStatGenerator(ABC):
|
||||
|
||||
return entity_stat_aspect
|
||||
|
||||
def _process_absolute_aspect(self) -> List[Tuple[ModelForUsage, AspectAbstract]]:
|
||||
aspects: List[Tuple[ModelForUsage, AspectAbstract]] = []
|
||||
for looker_object in self.looker_models:
|
||||
aspects.append(
|
||||
(looker_object, self.to_entity_absolute_stat_aspect(looker_object))
|
||||
)
|
||||
|
||||
return aspects
|
||||
|
||||
def _fill_user_stat_aspect(
|
||||
self,
|
||||
entity_usage_stat: Dict[Tuple[str, str], Aspect],
|
||||
user_wise_rows: List[Dict],
|
||||
) -> Iterable[Tuple[ModelForUsage, Aspect]]:
|
||||
) -> Iterable[Tuple[str, Aspect]]:
|
||||
logger.debug("Entering fill user stat aspect")
|
||||
|
||||
# We first resolve all the users using a threadpool to warm up the cache
|
||||
@ -300,7 +296,7 @@ class BaseStatGenerator(ABC):
|
||||
|
||||
for row in user_wise_rows:
|
||||
# Confirm looker object was given for stat generation
|
||||
looker_object = self.id_vs_model.get(self.get_id_from_row(row))
|
||||
looker_object = self.id_to_model.get(self.get_id_from_row(row))
|
||||
if looker_object is None:
|
||||
logger.warning(
|
||||
"Looker object with id({}) was not register with stat generator".format(
|
||||
@ -338,7 +334,7 @@ class BaseStatGenerator(ABC):
|
||||
logger.debug("Starting to yield answers for user-wise counts")
|
||||
|
||||
for (id, _), aspect in entity_usage_stat.items():
|
||||
yield self.id_vs_model[id], aspect
|
||||
yield id, aspect
|
||||
|
||||
def _execute_query(self, query: LookerQuery, query_name: str) -> List[Dict]:
|
||||
rows = []
|
||||
@ -357,7 +353,7 @@ class BaseStatGenerator(ABC):
|
||||
)
|
||||
if self.post_filter:
|
||||
logger.debug("post filtering")
|
||||
rows = [r for r in rows if self.get_id_from_row(r) in self.id_vs_model]
|
||||
rows = [r for r in rows if self.get_id_from_row(r) in self.id_to_model]
|
||||
logger.debug("Filtered down to %d rows", len(rows))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to execute {query_name} query: {e}")
|
||||
@ -378,7 +374,8 @@ class BaseStatGenerator(ABC):
|
||||
return
|
||||
|
||||
# yield absolute stat for looker entities
|
||||
for looker_object, aspect in self._process_absolute_aspect(): # type: ignore
|
||||
for looker_object in self.looker_models:
|
||||
aspect = self.to_entity_absolute_stat_aspect(looker_object)
|
||||
yield self.create_mcp(looker_object, aspect)
|
||||
|
||||
# Execute query and process the raw json which contains stat information
|
||||
@ -399,10 +396,13 @@ class BaseStatGenerator(ABC):
|
||||
)
|
||||
user_wise_rows = self._execute_query(user_wise_query_with_filters, "user_query")
|
||||
# yield absolute stat for entity
|
||||
for looker_object, aspect in self._fill_user_stat_aspect(
|
||||
for object_id, aspect in self._fill_user_stat_aspect(
|
||||
entity_usage_stat, user_wise_rows
|
||||
):
|
||||
yield self.create_mcp(looker_object, aspect)
|
||||
if object_id in self.id_to_model:
|
||||
yield self.create_mcp(self.id_to_model[object_id], aspect)
|
||||
else:
|
||||
self.report_skip_set().add(object_id)
|
||||
|
||||
|
||||
class DashboardStatGenerator(BaseStatGenerator):
|
||||
@ -425,6 +425,9 @@ class DashboardStatGenerator(BaseStatGenerator):
|
||||
def get_stats_generator_name(self) -> str:
|
||||
return "DashboardStats"
|
||||
|
||||
def report_skip_set(self) -> LossySet[str]:
|
||||
return self.report.dashboards_skipped_for_usage
|
||||
|
||||
def get_filter(self) -> Dict[ViewField, str]:
|
||||
return {
|
||||
HistoryViewField.HISTORY_DASHBOARD_ID: ",".join(
|
||||
@ -541,6 +544,9 @@ class LookStatGenerator(BaseStatGenerator):
|
||||
def get_stats_generator_name(self) -> str:
|
||||
return "ChartStats"
|
||||
|
||||
def report_skip_set(self) -> LossySet[str]:
|
||||
return self.report.charts_skipped_for_usage
|
||||
|
||||
def get_filter(self) -> Dict[ViewField, str]:
|
||||
return {
|
||||
LookViewField.LOOK_ID: ",".join(
|
||||
|
||||
@ -1,4 +1,66 @@
|
||||
[
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": {
|
||||
"urn": "urn:li:chart:(looker,dashboard_elements.3)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.Status": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.chart.ChartInfo": {
|
||||
"customProperties": {
|
||||
"upstream_fields": ""
|
||||
},
|
||||
"title": "",
|
||||
"description": "",
|
||||
"lastModified": {
|
||||
"created": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
},
|
||||
"lastModified": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
}
|
||||
},
|
||||
"chartUrl": "https://looker.company.com/x/",
|
||||
"inputs": [
|
||||
{
|
||||
"string": "urn:li:dataset:(urn:li:dataPlatform:looker,look_data.explore.look_view,PROD)"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "looker-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "chart",
|
||||
"entityUrn": "urn:li:chart:(looker,dashboard_elements.3)",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "subTypes",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"typeNames": [
|
||||
"Look"
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "looker-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": {
|
||||
@ -9,7 +71,9 @@
|
||||
"customProperties": {},
|
||||
"title": "foo",
|
||||
"description": "lorem ipsum",
|
||||
"charts": [],
|
||||
"charts": [
|
||||
"urn:li:chart:(looker,dashboard_elements.3)"
|
||||
],
|
||||
"datasets": [],
|
||||
"dashboards": [],
|
||||
"lastModified": {
|
||||
@ -89,6 +153,22 @@
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "chart",
|
||||
"entityUrn": "urn:li:chart:(looker,dashboard_elements.3)",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "inputFields",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"fields": []
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "looker-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "dashboard",
|
||||
"entityUrn": "urn:li:dashboard:(looker,dashboards.1)",
|
||||
@ -215,6 +295,98 @@
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "container",
|
||||
"entityUrn": "urn:li:container:a2a7aa63752695f9a1705faed9d03ffb",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "containerProperties",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"customProperties": {
|
||||
"platform": "looker",
|
||||
"env": "PROD",
|
||||
"model_name": "look_data"
|
||||
},
|
||||
"name": "look_data",
|
||||
"env": "PROD"
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "looker-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "container",
|
||||
"entityUrn": "urn:li:container:a2a7aa63752695f9a1705faed9d03ffb",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "status",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "looker-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "container",
|
||||
"entityUrn": "urn:li:container:a2a7aa63752695f9a1705faed9d03ffb",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "dataPlatformInstance",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"platform": "urn:li:dataPlatform:looker"
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "looker-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "container",
|
||||
"entityUrn": "urn:li:container:a2a7aa63752695f9a1705faed9d03ffb",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "subTypes",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"typeNames": [
|
||||
"LookML Model"
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "looker-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "container",
|
||||
"entityUrn": "urn:li:container:a2a7aa63752695f9a1705faed9d03ffb",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "browsePathsV2",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"path": [
|
||||
{
|
||||
"id": "Explore"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "looker-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
||||
@ -389,6 +561,180 @@
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
||||
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,look_data.explore.look_view,PROD)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.BrowsePaths": {
|
||||
"paths": [
|
||||
"/Explore/look_data"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.Status": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
|
||||
"customProperties": {
|
||||
"project": "lkml_samples",
|
||||
"model": "look_data",
|
||||
"looker.explore.label": "My Explore View",
|
||||
"looker.explore.name": "look_view",
|
||||
"looker.explore.file": "test_source_file.lkml"
|
||||
},
|
||||
"externalUrl": "https://looker.company.com/explore/look_data/look_view",
|
||||
"name": "My Explore View",
|
||||
"description": "lorem ipsum",
|
||||
"tags": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
|
||||
"upstreams": [
|
||||
{
|
||||
"auditStamp": {
|
||||
"time": 1586847600000,
|
||||
"actor": "urn:li:corpuser:datahub"
|
||||
},
|
||||
"dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.underlying_view,PROD)",
|
||||
"type": "VIEW"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
|
||||
"schemaName": "look_view",
|
||||
"platform": "urn:li:dataPlatform:looker",
|
||||
"version": 0,
|
||||
"created": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
},
|
||||
"lastModified": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
},
|
||||
"hash": "",
|
||||
"platformSchema": {
|
||||
"com.linkedin.pegasus2avro.schema.OtherSchema": {
|
||||
"rawSchema": ""
|
||||
}
|
||||
},
|
||||
"fields": [
|
||||
{
|
||||
"fieldPath": "dim1",
|
||||
"nullable": false,
|
||||
"description": "dimension one description",
|
||||
"label": "Dimensions One Label",
|
||||
"type": {
|
||||
"type": {
|
||||
"com.linkedin.pegasus2avro.schema.StringType": {}
|
||||
}
|
||||
},
|
||||
"nativeDataType": "string",
|
||||
"recursive": false,
|
||||
"globalTags": {
|
||||
"tags": [
|
||||
{
|
||||
"tag": "urn:li:tag:Dimension"
|
||||
}
|
||||
]
|
||||
},
|
||||
"isPartOfKey": false
|
||||
}
|
||||
],
|
||||
"primaryKeys": []
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "looker-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "dataset",
|
||||
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,look_data.explore.look_view,PROD)",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "subTypes",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"typeNames": [
|
||||
"Explore"
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "looker-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "dataset",
|
||||
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,look_data.explore.look_view,PROD)",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "embed",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"renderUrl": "https://looker.company.com/embed/explore/look_data/look_view"
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "looker-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "dataset",
|
||||
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,look_data.explore.look_view,PROD)",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "container",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"container": "urn:li:container:a2a7aa63752695f9a1705faed9d03ffb"
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "looker-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "dataset",
|
||||
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,look_data.explore.look_view,PROD)",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "browsePathsV2",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"path": [
|
||||
{
|
||||
"id": "Explore"
|
||||
},
|
||||
{
|
||||
"id": "urn:li:container:a2a7aa63752695f9a1705faed9d03ffb",
|
||||
"urn": "urn:li:container:a2a7aa63752695f9a1705faed9d03ffb"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "looker-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
|
||||
@ -747,22 +1093,6 @@
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "chart",
|
||||
"entityUrn": "urn:li:chart:(looker,dashboard_elements.3)",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "status",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "looker-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "tag",
|
||||
"entityUrn": "urn:li:tag:Dimension",
|
||||
|
||||
@ -31,7 +31,10 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||
from datahub.ingestion.api.source import SourceReport
|
||||
from datahub.ingestion.run.pipeline import Pipeline, PipelineInitError
|
||||
from datahub.ingestion.source.looker import looker_common, looker_usage
|
||||
from datahub.ingestion.source.looker.looker_common import LookerExplore
|
||||
from datahub.ingestion.source.looker.looker_common import (
|
||||
LookerDashboardSourceReport,
|
||||
LookerExplore,
|
||||
)
|
||||
from datahub.ingestion.source.looker.looker_config import LookerCommonConfig
|
||||
from datahub.ingestion.source.looker.looker_lib_wrapper import (
|
||||
LookerAPI,
|
||||
@ -414,7 +417,9 @@ def setup_mock_dashboard_multiple_charts(mocked_client):
|
||||
)
|
||||
|
||||
|
||||
def setup_mock_dashboard_with_usage(mocked_client):
|
||||
def setup_mock_dashboard_with_usage(
|
||||
mocked_client: mock.MagicMock, skip_look: bool = False
|
||||
) -> None:
|
||||
mocked_client.all_dashboards.return_value = [Dashboard(id="1")]
|
||||
mocked_client.dashboard.return_value = Dashboard(
|
||||
id="1",
|
||||
@ -437,7 +442,13 @@ def setup_mock_dashboard_with_usage(mocked_client):
|
||||
),
|
||||
),
|
||||
DashboardElement(
|
||||
id="3", type="", look=LookWithQuery(id="3", view_count=30)
|
||||
id="3",
|
||||
type="" if skip_look else "vis", # Looks only ingested if type == `vis`
|
||||
look=LookWithQuery(
|
||||
id="3",
|
||||
view_count=30,
|
||||
query=Query(model="look_data", view="look_view"),
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
@ -611,6 +622,12 @@ def side_effect_query_inline(
|
||||
HistoryViewField.HISTORY_DASHBOARD_USER: 1,
|
||||
HistoryViewField.HISTORY_DASHBOARD_RUN_COUNT: 5,
|
||||
},
|
||||
{
|
||||
HistoryViewField.HISTORY_DASHBOARD_ID: "5",
|
||||
HistoryViewField.HISTORY_CREATED_DATE: "2022-07-07",
|
||||
HistoryViewField.HISTORY_DASHBOARD_USER: 1,
|
||||
HistoryViewField.HISTORY_DASHBOARD_RUN_COUNT: 5,
|
||||
},
|
||||
]
|
||||
),
|
||||
looker_usage.QueryId.DASHBOARD_PER_USER_PER_DAY_USAGE_STAT: json.dumps(
|
||||
@ -790,6 +807,70 @@ def test_looker_ingest_usage_history(pytestconfig, tmp_path, mock_time):
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_looker_filter_usage_history(pytestconfig, tmp_path, mock_time):
|
||||
mocked_client = mock.MagicMock()
|
||||
with mock.patch("looker_sdk.init40") as mock_sdk:
|
||||
mock_sdk.return_value = mocked_client
|
||||
setup_mock_dashboard_with_usage(mocked_client, skip_look=True)
|
||||
mocked_client.run_inline_query.side_effect = side_effect_query_inline
|
||||
setup_mock_explore(mocked_client)
|
||||
setup_mock_user(mocked_client)
|
||||
|
||||
temp_output_file = f"{tmp_path}/looker_mces.json"
|
||||
pipeline = Pipeline.create(
|
||||
{
|
||||
"run_id": "looker-test",
|
||||
"source": {
|
||||
"type": "looker",
|
||||
"config": {
|
||||
"base_url": "https://looker.company.com",
|
||||
"client_id": "foo",
|
||||
"client_secret": "bar",
|
||||
"extract_usage_history": True,
|
||||
"max_threads": 1,
|
||||
},
|
||||
},
|
||||
"sink": {
|
||||
"type": "file",
|
||||
"config": {
|
||||
"filename": temp_output_file,
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
pipeline.run()
|
||||
pipeline.pretty_print_summary()
|
||||
pipeline.raise_from_status()
|
||||
|
||||
# There should be 4 dashboardUsageStatistics aspects (one absolute and 3 timeseries)
|
||||
dashboard_usage_aspect_count = 0
|
||||
# There should be 0 chartUsageStatistics -- filtered by set of ingested charts
|
||||
chart_usage_aspect_count = 0
|
||||
with open(temp_output_file) as f:
|
||||
temp_output_dict = json.load(f)
|
||||
for element in temp_output_dict:
|
||||
if (
|
||||
element.get("entityType") == "dashboard"
|
||||
and element.get("aspectName") == "dashboardUsageStatistics"
|
||||
):
|
||||
dashboard_usage_aspect_count = dashboard_usage_aspect_count + 1
|
||||
if (
|
||||
element.get("entityType") == "chart"
|
||||
and element.get("aspectName") == "chartUsageStatistics"
|
||||
):
|
||||
chart_usage_aspect_count = chart_usage_aspect_count + 1
|
||||
|
||||
assert dashboard_usage_aspect_count == 4
|
||||
assert chart_usage_aspect_count == 0
|
||||
|
||||
source_report = cast(LookerDashboardSourceReport, pipeline.source.get_report())
|
||||
# From timeseries query
|
||||
assert str(source_report.dashboards_skipped_for_usage) == str(["5"])
|
||||
# From dashboard element
|
||||
assert str(source_report.charts_skipped_for_usage) == str(["3"])
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_looker_ingest_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
|
||||
output_file_name: str = "looker_mces.json"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user