docs(bigquery): profiling report enhancement (#5342)

This commit is contained in:
Mugdha Hardikar 2022-07-06 18:38:35 +05:30 committed by GitHub
parent d60f789519
commit 77eaab609d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 1 deletions

View File

@ -460,6 +460,30 @@ class BigQuerySource(SQLAlchemySource):
return None
project_id = self.get_db_name(inspector)
_client: BigQueryClient = BigQueryClient(project=project_id)
# Reading all tables' metadata to report
base_query = (
f"SELECT "
f"table_id, "
f"size_bytes, "
f"last_modified_time, "
f"row_count, "
f"FROM {schema}.__TABLES__"
)
all_tables = _client.query(base_query)
report_tables: List[str] = [
"table_id, size_bytes, last_modified_time, row_count"
]
for table_row in all_tables:
report_tables.append(
f"{table_row.table_id}, {table_row.size_bytes}, {table_row.last_modified_time}, {table_row.row_count}"
)
report_key = f"{self._get_project_id(inspector)}.{schema}"
self.report.table_metadata[report_key] = report_tables
self.report.profile_table_selection_criteria[report_key] = (
"no constraint" if profile_clause == "" else profile_clause.lstrip(" WHERE")
)
# reading filtered tables. TODO: remove this call and apply local filtering on above query results.
query = (
f"SELECT "
f"table_id, "
@ -483,6 +507,7 @@ class BigQuerySource(SQLAlchemySource):
logger.debug(
f"Generated profiling candidates for {schema}: {_profile_candidates}"
)
self.report.selected_profile_tables[report_key] = _profile_candidates
return _profile_candidates
def _get_bigquery_log_entries(

View File

@ -1,6 +1,6 @@
from dataclasses import dataclass, field
from datetime import datetime
from typing import Dict, Optional
from typing import Dict, List, Optional
import pydantic
@ -33,3 +33,6 @@ class BigQueryReport(SQLSourceReport):
audit_end_time: Optional[str] = None
upstream_lineage: Dict = field(default_factory=dict)
partition_info: Dict[str, str] = field(default_factory=dict)
table_metadata: Dict[str, List[str]] = field(default_factory=dict)
profile_table_selection_criteria: Dict[str, str] = field(default_factory=dict)
selected_profile_tables: Dict[str, List[str]] = field(default_factory=dict)