mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-25 17:15:09 +00:00
fix(ingest/bigquery): ignore include constraints for biglake datasets (#11874)
This commit is contained in:
parent
766d36d164
commit
dd892dfbb1
@ -152,6 +152,21 @@ class BigqueryDataset:
|
||||
snapshots: List[BigqueryTableSnapshot] = field(default_factory=list)
|
||||
columns: List[BigqueryColumn] = field(default_factory=list)
|
||||
|
||||
# Some INFORMATION_SCHEMA views are not available for BigLake tables
|
||||
# based on Amazon S3 and Blob Storage data.
|
||||
# https://cloud.google.com/bigquery/docs/omni-introduction#limitations
|
||||
# Omni Locations - https://cloud.google.com/bigquery/docs/omni-introduction#locations
|
||||
def is_biglake_dataset(self) -> bool:
|
||||
return self.location is not None and self.location.lower().startswith(
|
||||
("aws-", "azure-")
|
||||
)
|
||||
|
||||
def supports_table_constraints(self) -> bool:
|
||||
return not self.is_biglake_dataset()
|
||||
|
||||
def supports_table_partitions(self) -> bool:
|
||||
return not self.is_biglake_dataset()
|
||||
|
||||
|
||||
@dataclass
|
||||
class BigqueryProject:
|
||||
@ -541,18 +556,26 @@ class BigQuerySchemaApi:
|
||||
table_name=constraint.table_name,
|
||||
type=constraint.constraint_type,
|
||||
field_path=constraint.column_name,
|
||||
referenced_project_id=constraint.referenced_catalog
|
||||
if constraint.constraint_type == "FOREIGN KEY"
|
||||
else None,
|
||||
referenced_dataset=constraint.referenced_schema
|
||||
if constraint.constraint_type == "FOREIGN KEY"
|
||||
else None,
|
||||
referenced_table_name=constraint.referenced_table
|
||||
if constraint.constraint_type == "FOREIGN KEY"
|
||||
else None,
|
||||
referenced_column_name=constraint.referenced_column
|
||||
if constraint.constraint_type == "FOREIGN KEY"
|
||||
else None,
|
||||
referenced_project_id=(
|
||||
constraint.referenced_catalog
|
||||
if constraint.constraint_type == "FOREIGN KEY"
|
||||
else None
|
||||
),
|
||||
referenced_dataset=(
|
||||
constraint.referenced_schema
|
||||
if constraint.constraint_type == "FOREIGN KEY"
|
||||
else None
|
||||
),
|
||||
referenced_table_name=(
|
||||
constraint.referenced_table
|
||||
if constraint.constraint_type == "FOREIGN KEY"
|
||||
else None
|
||||
),
|
||||
referenced_column_name=(
|
||||
constraint.referenced_column
|
||||
if constraint.constraint_type == "FOREIGN KEY"
|
||||
else None
|
||||
),
|
||||
)
|
||||
)
|
||||
self.report.num_get_table_constraints_for_dataset_api_requests += 1
|
||||
|
@ -498,7 +498,10 @@ class BigQuerySchemaGenerator:
|
||||
report=self.report,
|
||||
rate_limiter=rate_limiter,
|
||||
)
|
||||
if self.config.include_table_constraints:
|
||||
if (
|
||||
self.config.include_table_constraints
|
||||
and bigquery_dataset.supports_table_constraints()
|
||||
):
|
||||
constraints = self.schema_api.get_table_constraints_for_dataset(
|
||||
project_id=project_id, dataset_name=dataset_name, report=self.report
|
||||
)
|
||||
@ -1157,9 +1160,11 @@ class BigQuerySchemaGenerator:
|
||||
# fields=[],
|
||||
fields=self.gen_schema_fields(
|
||||
columns,
|
||||
table.constraints
|
||||
if (isinstance(table, BigqueryTable) and table.constraints)
|
||||
else [],
|
||||
(
|
||||
table.constraints
|
||||
if (isinstance(table, BigqueryTable) and table.constraints)
|
||||
else []
|
||||
),
|
||||
),
|
||||
foreignKeys=foreign_keys if foreign_keys else None,
|
||||
)
|
||||
@ -1180,13 +1185,9 @@ class BigQuerySchemaGenerator:
|
||||
) -> Iterable[BigqueryTable]:
|
||||
# In bigquery there is no way to query all tables in a Project id
|
||||
with PerfTimer() as timer:
|
||||
# PARTITIONS INFORMATION_SCHEMA view is not available for BigLake tables
|
||||
# based on Amazon S3 and Blob Storage data.
|
||||
# https://cloud.google.com/bigquery/docs/omni-introduction#limitations
|
||||
# Omni Locations - https://cloud.google.com/bigquery/docs/omni-introduction#locations
|
||||
with_partitions = self.config.have_table_data_read_permission and not (
|
||||
dataset.location
|
||||
and dataset.location.lower().startswith(("aws-", "azure-"))
|
||||
with_partitions = (
|
||||
self.config.have_table_data_read_permission
|
||||
and dataset.supports_table_partitions()
|
||||
)
|
||||
|
||||
# Partitions view throw exception if we try to query partition info for too many tables
|
||||
|
Loading…
x
Reference in New Issue
Block a user