mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-26 17:45:30 +00:00
fix(ingest/bigquery): ignore include constraints for biglake datasets (#11874)
This commit is contained in:
parent
766d36d164
commit
dd892dfbb1
@ -152,6 +152,21 @@ class BigqueryDataset:
|
|||||||
snapshots: List[BigqueryTableSnapshot] = field(default_factory=list)
|
snapshots: List[BigqueryTableSnapshot] = field(default_factory=list)
|
||||||
columns: List[BigqueryColumn] = field(default_factory=list)
|
columns: List[BigqueryColumn] = field(default_factory=list)
|
||||||
|
|
||||||
|
# Some INFORMATION_SCHEMA views are not available for BigLake tables
|
||||||
|
# based on Amazon S3 and Blob Storage data.
|
||||||
|
# https://cloud.google.com/bigquery/docs/omni-introduction#limitations
|
||||||
|
# Omni Locations - https://cloud.google.com/bigquery/docs/omni-introduction#locations
|
||||||
|
def is_biglake_dataset(self) -> bool:
|
||||||
|
return self.location is not None and self.location.lower().startswith(
|
||||||
|
("aws-", "azure-")
|
||||||
|
)
|
||||||
|
|
||||||
|
def supports_table_constraints(self) -> bool:
|
||||||
|
return not self.is_biglake_dataset()
|
||||||
|
|
||||||
|
def supports_table_partitions(self) -> bool:
|
||||||
|
return not self.is_biglake_dataset()
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class BigqueryProject:
|
class BigqueryProject:
|
||||||
@ -541,18 +556,26 @@ class BigQuerySchemaApi:
|
|||||||
table_name=constraint.table_name,
|
table_name=constraint.table_name,
|
||||||
type=constraint.constraint_type,
|
type=constraint.constraint_type,
|
||||||
field_path=constraint.column_name,
|
field_path=constraint.column_name,
|
||||||
referenced_project_id=constraint.referenced_catalog
|
referenced_project_id=(
|
||||||
|
constraint.referenced_catalog
|
||||||
if constraint.constraint_type == "FOREIGN KEY"
|
if constraint.constraint_type == "FOREIGN KEY"
|
||||||
else None,
|
else None
|
||||||
referenced_dataset=constraint.referenced_schema
|
),
|
||||||
|
referenced_dataset=(
|
||||||
|
constraint.referenced_schema
|
||||||
if constraint.constraint_type == "FOREIGN KEY"
|
if constraint.constraint_type == "FOREIGN KEY"
|
||||||
else None,
|
else None
|
||||||
referenced_table_name=constraint.referenced_table
|
),
|
||||||
|
referenced_table_name=(
|
||||||
|
constraint.referenced_table
|
||||||
if constraint.constraint_type == "FOREIGN KEY"
|
if constraint.constraint_type == "FOREIGN KEY"
|
||||||
else None,
|
else None
|
||||||
referenced_column_name=constraint.referenced_column
|
),
|
||||||
|
referenced_column_name=(
|
||||||
|
constraint.referenced_column
|
||||||
if constraint.constraint_type == "FOREIGN KEY"
|
if constraint.constraint_type == "FOREIGN KEY"
|
||||||
else None,
|
else None
|
||||||
|
),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
self.report.num_get_table_constraints_for_dataset_api_requests += 1
|
self.report.num_get_table_constraints_for_dataset_api_requests += 1
|
||||||
|
@ -498,7 +498,10 @@ class BigQuerySchemaGenerator:
|
|||||||
report=self.report,
|
report=self.report,
|
||||||
rate_limiter=rate_limiter,
|
rate_limiter=rate_limiter,
|
||||||
)
|
)
|
||||||
if self.config.include_table_constraints:
|
if (
|
||||||
|
self.config.include_table_constraints
|
||||||
|
and bigquery_dataset.supports_table_constraints()
|
||||||
|
):
|
||||||
constraints = self.schema_api.get_table_constraints_for_dataset(
|
constraints = self.schema_api.get_table_constraints_for_dataset(
|
||||||
project_id=project_id, dataset_name=dataset_name, report=self.report
|
project_id=project_id, dataset_name=dataset_name, report=self.report
|
||||||
)
|
)
|
||||||
@ -1157,9 +1160,11 @@ class BigQuerySchemaGenerator:
|
|||||||
# fields=[],
|
# fields=[],
|
||||||
fields=self.gen_schema_fields(
|
fields=self.gen_schema_fields(
|
||||||
columns,
|
columns,
|
||||||
|
(
|
||||||
table.constraints
|
table.constraints
|
||||||
if (isinstance(table, BigqueryTable) and table.constraints)
|
if (isinstance(table, BigqueryTable) and table.constraints)
|
||||||
else [],
|
else []
|
||||||
|
),
|
||||||
),
|
),
|
||||||
foreignKeys=foreign_keys if foreign_keys else None,
|
foreignKeys=foreign_keys if foreign_keys else None,
|
||||||
)
|
)
|
||||||
@ -1180,13 +1185,9 @@ class BigQuerySchemaGenerator:
|
|||||||
) -> Iterable[BigqueryTable]:
|
) -> Iterable[BigqueryTable]:
|
||||||
# In bigquery there is no way to query all tables in a Project id
|
# In bigquery there is no way to query all tables in a Project id
|
||||||
with PerfTimer() as timer:
|
with PerfTimer() as timer:
|
||||||
# PARTITIONS INFORMATION_SCHEMA view is not available for BigLake tables
|
with_partitions = (
|
||||||
# based on Amazon S3 and Blob Storage data.
|
self.config.have_table_data_read_permission
|
||||||
# https://cloud.google.com/bigquery/docs/omni-introduction#limitations
|
and dataset.supports_table_partitions()
|
||||||
# Omni Locations - https://cloud.google.com/bigquery/docs/omni-introduction#locations
|
|
||||||
with_partitions = self.config.have_table_data_read_permission and not (
|
|
||||||
dataset.location
|
|
||||||
and dataset.location.lower().startswith(("aws-", "azure-"))
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Partitions view throw exception if we try to query partition info for too many tables
|
# Partitions view throw exception if we try to query partition info for too many tables
|
||||||
|
Loading…
x
Reference in New Issue
Block a user