mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-01 02:56:10 +00:00
MINOR: BigQuery Improvement, Hive Partitioned Tables, Nonetype issue resolved (#19429)
This commit is contained in:
parent
88315e2c84
commit
427a06cfa8
@ -667,6 +667,31 @@ class BigquerySource(LifeCycleQueryMixin, CommonDbSourceService, MultiDBSource):
|
||||
database = self.context.get().database
|
||||
table = self.client.get_table(fqn._build(database, schema_name, table_name))
|
||||
columns = inspector.get_columns(table_name, schema_name, db_name=database)
|
||||
if hasattr(table, "external_data_configuration") and hasattr(
|
||||
table.external_data_configuration, "hive_partitioning"
|
||||
):
|
||||
# Ingesting External Hive Partitioned Tables
|
||||
from google.cloud.bigquery.external_config import ( # pylint: disable=import-outside-toplevel
|
||||
HivePartitioningOptions,
|
||||
)
|
||||
|
||||
partition_details: HivePartitioningOptions = (
|
||||
table.external_data_configuration.hive_partitioning
|
||||
)
|
||||
return True, TablePartition(
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName=self._get_partition_column_name(
|
||||
columns=columns,
|
||||
partition_field_name=field,
|
||||
),
|
||||
interval=str(partition_details._properties.get("mode")),
|
||||
intervalType=PartitionIntervalTypes.OTHER,
|
||||
)
|
||||
for field in partition_details._properties.get("fields")
|
||||
]
|
||||
)
|
||||
|
||||
if table.time_partitioning is not None:
|
||||
if table.time_partitioning.field:
|
||||
table_partition = TablePartition(
|
||||
|
||||
@ -24,6 +24,7 @@ class BigQueryProfiler(BigQueryProfilerInterface):
|
||||
return self.system_metrics_computer.get_system_metrics(
|
||||
table=runner.dataset,
|
||||
usage_location=self.service_connection_config.usageLocation,
|
||||
runner=runner,
|
||||
)
|
||||
|
||||
def initialize_system_metrics_computer(self) -> BigQuerySystemMetricsComputer:
|
||||
|
||||
@ -49,7 +49,7 @@ def validate_athena_injected_partitioning(
|
||||
|
||||
column_partitions: Optional[List[PartitionColumnDetails]] = table_partitions.columns
|
||||
if not column_partitions:
|
||||
raise RuntimeError("Table parition is set but no columns are defined.")
|
||||
raise RuntimeError("Table partition is set but no columns are defined.")
|
||||
|
||||
for column_partition in column_partitions:
|
||||
if column_partition.intervalType == PartitionIntervalTypes.INJECTED:
|
||||
@ -163,6 +163,7 @@ def _handle_bigquery_partition(
|
||||
partitionIntegerRangeStart=1,
|
||||
partitionIntegerRangeEnd=10000,
|
||||
)
|
||||
# TODO: Allow External Hive Partitioning for profiler
|
||||
raise TypeError(
|
||||
f"Unsupported partition type {partition.intervalType}. Skipping table"
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user