fix(ingest/bigquery): Fixing double sanitization of urns (#10386)

This commit is contained in:
Tamas Nemeth 2024-05-03 06:24:53 +02:00 committed by GitHub
parent d82750b891
commit 4e47933e55
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -261,7 +261,7 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
self.lineage_extractor = BigqueryLineageExtractor(
config,
self.report,
dataset_urn_builder=self.gen_dataset_urn_from_ref,
dataset_urn_builder=self.gen_dataset_urn_from_raw_ref,
redundant_run_skip_handler=redundant_lineage_run_skip_handler,
)
@ -278,7 +278,7 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
config,
self.report,
schema_resolver=self.sql_parser_schema_resolver,
dataset_urn_builder=self.gen_dataset_urn_from_ref,
dataset_urn_builder=self.gen_dataset_urn_from_raw_ref,
redundant_run_skip_handler=redundant_usage_run_skip_handler,
)
@ -1189,14 +1189,28 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
entityUrn=dataset_urn, aspect=tags
).as_workunit()
def gen_dataset_urn(self, project_id: str, dataset_name: str, table: str) -> str:
def gen_dataset_urn(
self, project_id: str, dataset_name: str, table: str, use_raw_name: bool = False
) -> str:
datahub_dataset_name = BigqueryTableIdentifier(project_id, dataset_name, table)
return make_dataset_urn(
self.platform,
str(datahub_dataset_name),
(
str(datahub_dataset_name)
if not use_raw_name
else datahub_dataset_name.raw_table_name()
),
self.config.env,
)
def gen_dataset_urn_from_raw_ref(self, ref: BigQueryTableRef) -> str:
return self.gen_dataset_urn(
ref.table_identifier.project_id,
ref.table_identifier.dataset,
ref.table_identifier.table,
use_raw_name=True,
)
def gen_dataset_urn_from_ref(self, ref: BigQueryTableRef) -> str:
return self.gen_dataset_urn(
ref.table_identifier.project_id,