mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-01 11:19:05 +00:00
docs(bigquery): profiling report enhancement (#5342)
This commit is contained in:
parent
d60f789519
commit
77eaab609d
@ -460,6 +460,30 @@ class BigQuerySource(SQLAlchemySource):
|
||||
return None
|
||||
project_id = self.get_db_name(inspector)
|
||||
_client: BigQueryClient = BigQueryClient(project=project_id)
|
||||
# Reading all tables' metadata to report
|
||||
base_query = (
|
||||
f"SELECT "
|
||||
f"table_id, "
|
||||
f"size_bytes, "
|
||||
f"last_modified_time, "
|
||||
f"row_count, "
|
||||
f"FROM {schema}.__TABLES__"
|
||||
)
|
||||
all_tables = _client.query(base_query)
|
||||
report_tables: List[str] = [
|
||||
"table_id, size_bytes, last_modified_time, row_count"
|
||||
]
|
||||
for table_row in all_tables:
|
||||
report_tables.append(
|
||||
f"{table_row.table_id}, {table_row.size_bytes}, {table_row.last_modified_time}, {table_row.row_count}"
|
||||
)
|
||||
report_key = f"{self._get_project_id(inspector)}.{schema}"
|
||||
self.report.table_metadata[report_key] = report_tables
|
||||
self.report.profile_table_selection_criteria[report_key] = (
|
||||
"no constraint" if profile_clause == "" else profile_clause.lstrip(" WHERE")
|
||||
)
|
||||
|
||||
# reading filtered tables. TODO: remove this call and apply local filtering on above query results.
|
||||
query = (
|
||||
f"SELECT "
|
||||
f"table_id, "
|
||||
@ -483,6 +507,7 @@ class BigQuerySource(SQLAlchemySource):
|
||||
logger.debug(
|
||||
f"Generated profiling candidates for {schema}: {_profile_candidates}"
|
||||
)
|
||||
self.report.selected_profile_tables[report_key] = _profile_candidates
|
||||
return _profile_candidates
|
||||
|
||||
def _get_bigquery_log_entries(
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Dict, Optional
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import pydantic
|
||||
|
||||
@ -33,3 +33,6 @@ class BigQueryReport(SQLSourceReport):
|
||||
audit_end_time: Optional[str] = None
|
||||
upstream_lineage: Dict = field(default_factory=dict)
|
||||
partition_info: Dict[str, str] = field(default_factory=dict)
|
||||
table_metadata: Dict[str, List[str]] = field(default_factory=dict)
|
||||
profile_table_selection_criteria: Dict[str, str] = field(default_factory=dict)
|
||||
selected_profile_tables: Dict[str, List[str]] = field(default_factory=dict)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user