mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-07 05:53:46 +00:00
MINOR: BigQuery Improvement, Hive Partitioned Tables, Nonetype issue resolved (#19429)
This commit is contained in:
parent
88315e2c84
commit
427a06cfa8
@ -667,6 +667,31 @@ class BigquerySource(LifeCycleQueryMixin, CommonDbSourceService, MultiDBSource):
|
|||||||
database = self.context.get().database
|
database = self.context.get().database
|
||||||
table = self.client.get_table(fqn._build(database, schema_name, table_name))
|
table = self.client.get_table(fqn._build(database, schema_name, table_name))
|
||||||
columns = inspector.get_columns(table_name, schema_name, db_name=database)
|
columns = inspector.get_columns(table_name, schema_name, db_name=database)
|
||||||
|
if hasattr(table, "external_data_configuration") and hasattr(
|
||||||
|
table.external_data_configuration, "hive_partitioning"
|
||||||
|
):
|
||||||
|
# Ingesting External Hive Partitioned Tables
|
||||||
|
from google.cloud.bigquery.external_config import ( # pylint: disable=import-outside-toplevel
|
||||||
|
HivePartitioningOptions,
|
||||||
|
)
|
||||||
|
|
||||||
|
partition_details: HivePartitioningOptions = (
|
||||||
|
table.external_data_configuration.hive_partitioning
|
||||||
|
)
|
||||||
|
return True, TablePartition(
|
||||||
|
columns=[
|
||||||
|
PartitionColumnDetails(
|
||||||
|
columnName=self._get_partition_column_name(
|
||||||
|
columns=columns,
|
||||||
|
partition_field_name=field,
|
||||||
|
),
|
||||||
|
interval=str(partition_details._properties.get("mode")),
|
||||||
|
intervalType=PartitionIntervalTypes.OTHER,
|
||||||
|
)
|
||||||
|
for field in partition_details._properties.get("fields")
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
if table.time_partitioning is not None:
|
if table.time_partitioning is not None:
|
||||||
if table.time_partitioning.field:
|
if table.time_partitioning.field:
|
||||||
table_partition = TablePartition(
|
table_partition = TablePartition(
|
||||||
|
|||||||
@ -24,6 +24,7 @@ class BigQueryProfiler(BigQueryProfilerInterface):
|
|||||||
return self.system_metrics_computer.get_system_metrics(
|
return self.system_metrics_computer.get_system_metrics(
|
||||||
table=runner.dataset,
|
table=runner.dataset,
|
||||||
usage_location=self.service_connection_config.usageLocation,
|
usage_location=self.service_connection_config.usageLocation,
|
||||||
|
runner=runner,
|
||||||
)
|
)
|
||||||
|
|
||||||
def initialize_system_metrics_computer(self) -> BigQuerySystemMetricsComputer:
|
def initialize_system_metrics_computer(self) -> BigQuerySystemMetricsComputer:
|
||||||
|
|||||||
@ -49,7 +49,7 @@ def validate_athena_injected_partitioning(
|
|||||||
|
|
||||||
column_partitions: Optional[List[PartitionColumnDetails]] = table_partitions.columns
|
column_partitions: Optional[List[PartitionColumnDetails]] = table_partitions.columns
|
||||||
if not column_partitions:
|
if not column_partitions:
|
||||||
raise RuntimeError("Table parition is set but no columns are defined.")
|
raise RuntimeError("Table partition is set but no columns are defined.")
|
||||||
|
|
||||||
for column_partition in column_partitions:
|
for column_partition in column_partitions:
|
||||||
if column_partition.intervalType == PartitionIntervalTypes.INJECTED:
|
if column_partition.intervalType == PartitionIntervalTypes.INJECTED:
|
||||||
@ -163,6 +163,7 @@ def _handle_bigquery_partition(
|
|||||||
partitionIntegerRangeStart=1,
|
partitionIntegerRangeStart=1,
|
||||||
partitionIntegerRangeEnd=10000,
|
partitionIntegerRangeEnd=10000,
|
||||||
)
|
)
|
||||||
|
# TODO: Allow External Hive Partitioning for profiler
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
f"Unsupported partition type {partition.intervalType}. Skipping table"
|
f"Unsupported partition type {partition.intervalType}. Skipping table"
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user