mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-26 02:50:01 +00:00
fix(ingest/bigquery): Fixing double sanitization of urns (#10386)
This commit is contained in:
parent
d82750b891
commit
4e47933e55
@ -261,7 +261,7 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
|||||||
self.lineage_extractor = BigqueryLineageExtractor(
|
self.lineage_extractor = BigqueryLineageExtractor(
|
||||||
config,
|
config,
|
||||||
self.report,
|
self.report,
|
||||||
dataset_urn_builder=self.gen_dataset_urn_from_ref,
|
dataset_urn_builder=self.gen_dataset_urn_from_raw_ref,
|
||||||
redundant_run_skip_handler=redundant_lineage_run_skip_handler,
|
redundant_run_skip_handler=redundant_lineage_run_skip_handler,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -278,7 +278,7 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
|||||||
config,
|
config,
|
||||||
self.report,
|
self.report,
|
||||||
schema_resolver=self.sql_parser_schema_resolver,
|
schema_resolver=self.sql_parser_schema_resolver,
|
||||||
dataset_urn_builder=self.gen_dataset_urn_from_ref,
|
dataset_urn_builder=self.gen_dataset_urn_from_raw_ref,
|
||||||
redundant_run_skip_handler=redundant_usage_run_skip_handler,
|
redundant_run_skip_handler=redundant_usage_run_skip_handler,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -1189,14 +1189,28 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
|||||||
entityUrn=dataset_urn, aspect=tags
|
entityUrn=dataset_urn, aspect=tags
|
||||||
).as_workunit()
|
).as_workunit()
|
||||||
|
|
||||||
def gen_dataset_urn(self, project_id: str, dataset_name: str, table: str) -> str:
|
def gen_dataset_urn(
|
||||||
|
self, project_id: str, dataset_name: str, table: str, use_raw_name: bool = False
|
||||||
|
) -> str:
|
||||||
datahub_dataset_name = BigqueryTableIdentifier(project_id, dataset_name, table)
|
datahub_dataset_name = BigqueryTableIdentifier(project_id, dataset_name, table)
|
||||||
return make_dataset_urn(
|
return make_dataset_urn(
|
||||||
self.platform,
|
self.platform,
|
||||||
str(datahub_dataset_name),
|
(
|
||||||
|
str(datahub_dataset_name)
|
||||||
|
if not use_raw_name
|
||||||
|
else datahub_dataset_name.raw_table_name()
|
||||||
|
),
|
||||||
self.config.env,
|
self.config.env,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def gen_dataset_urn_from_raw_ref(self, ref: BigQueryTableRef) -> str:
|
||||||
|
return self.gen_dataset_urn(
|
||||||
|
ref.table_identifier.project_id,
|
||||||
|
ref.table_identifier.dataset,
|
||||||
|
ref.table_identifier.table,
|
||||||
|
use_raw_name=True,
|
||||||
|
)
|
||||||
|
|
||||||
def gen_dataset_urn_from_ref(self, ref: BigQueryTableRef) -> str:
|
def gen_dataset_urn_from_ref(self, ref: BigQueryTableRef) -> str:
|
||||||
return self.gen_dataset_urn(
|
return self.gen_dataset_urn(
|
||||||
ref.table_identifier.project_id,
|
ref.table_identifier.project_id,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user