From c677a06fd89cc79d22f858112f6516a79f626390 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Wed, 8 Jun 2022 14:18:22 +0200 Subject: [PATCH] feat(ingestion): bigquery-usage - Collect stats from read event reasons (#5118) --- .../src/datahub/ingestion/source/usage/bigquery_usage.py | 5 +++++ .../datahub/ingestion/source_report/usage/bigquery_usage.py | 3 +++ 2 files changed, 8 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py index 7f14f5e5c2..7db38d2148 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py @@ -992,6 +992,11 @@ class BigQueryUsageSource(Source): if not self._is_table_allowed(event.resource): self.report.num_filtered_read_events += 1 continue + + if event.readReason: + self.report.read_reasons_stat[event.readReason] = ( + self.report.read_reasons_stat.get(event.readReason, 0) + 1 + ) self.report.num_read_events += 1 missing_query_entry = QueryEvent.get_missing_key_entry(entry) diff --git a/metadata-ingestion/src/datahub/ingestion/source_report/usage/bigquery_usage.py b/metadata-ingestion/src/datahub/ingestion/source_report/usage/bigquery_usage.py index 89cb27fb93..766e342da9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source_report/usage/bigquery_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source_report/usage/bigquery_usage.py @@ -25,6 +25,9 @@ class BigQueryUsageSourceReport(SourceReport): log_entry_end_time: Optional[str] = None num_usage_workunits_emitted: Optional[int] = None num_operational_stats_workunits_emitted: Optional[int] = None + read_reasons_stat: Counter[str] = dataclasses.field( + default_factory=collections.Counter + ) def report_dropped(self, key: str) -> None: self.dropped_table[key] += 1