MINOR: BigQuery Improvement, Hive Partitioned Tables, Nonetype issue resolved (#19429)

This commit is contained in:
Ayush Shah 2025-01-18 19:41:30 +05:30 committed by ulixius9
parent 88315e2c84
commit 427a06cfa8
3 changed files with 28 additions and 1 deletions

View File

@ -667,6 +667,31 @@ class BigquerySource(LifeCycleQueryMixin, CommonDbSourceService, MultiDBSource):
database = self.context.get().database
table = self.client.get_table(fqn._build(database, schema_name, table_name))
columns = inspector.get_columns(table_name, schema_name, db_name=database)
if hasattr(table, "external_data_configuration") and hasattr(
table.external_data_configuration, "hive_partitioning"
):
# Ingesting External Hive Partitioned Tables
from google.cloud.bigquery.external_config import ( # pylint: disable=import-outside-toplevel
HivePartitioningOptions,
)
partition_details: HivePartitioningOptions = (
table.external_data_configuration.hive_partitioning
)
return True, TablePartition(
columns=[
PartitionColumnDetails(
columnName=self._get_partition_column_name(
columns=columns,
partition_field_name=field,
),
interval=str(partition_details._properties.get("mode")),
intervalType=PartitionIntervalTypes.OTHER,
)
for field in partition_details._properties.get("fields")
]
)
if table.time_partitioning is not None:
if table.time_partitioning.field:
table_partition = TablePartition(

View File

@ -24,6 +24,7 @@ class BigQueryProfiler(BigQueryProfilerInterface):
return self.system_metrics_computer.get_system_metrics(
table=runner.dataset,
usage_location=self.service_connection_config.usageLocation,
runner=runner,
)
def initialize_system_metrics_computer(self) -> BigQuerySystemMetricsComputer:

View File

@ -49,7 +49,7 @@ def validate_athena_injected_partitioning(
column_partitions: Optional[List[PartitionColumnDetails]] = table_partitions.columns
if not column_partitions:
raise RuntimeError("Table parition is set but no columns are defined.")
raise RuntimeError("Table partition is set but no columns are defined.")
for column_partition in column_partitions:
if column_partition.intervalType == PartitionIntervalTypes.INJECTED:
@ -163,6 +163,7 @@ def _handle_bigquery_partition(
partitionIntegerRangeStart=1,
partitionIntegerRangeEnd=10000,
)
# TODO: Allow External Hive Partitioning for profiler
raise TypeError(
f"Unsupported partition type {partition.intervalType}. Skipping table"
)