mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-09 17:20:42 +00:00
feat(ingest/bigquery): Respect dataset and table patterns when ingesting lineage via catalog api (#10080)
This commit is contained in:
parent
2657c8f68b
commit
e4ebf34b6f
@ -517,19 +517,23 @@ class BigqueryLineageExtractor:
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
# Convert project tables to <project_id>.<dataset_id>.<table_id> format
|
|
||||||
project_table_names = list(
|
|
||||||
map(
|
|
||||||
lambda table: "{}.{}.{}".format(
|
|
||||||
table.project, table.dataset_id, table.table_id
|
|
||||||
),
|
|
||||||
project_tables,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
lineage_map: Dict[str, Set[LineageEdge]] = {}
|
lineage_map: Dict[str, Set[LineageEdge]] = {}
|
||||||
curr_date = datetime.now()
|
curr_date = datetime.now()
|
||||||
for table in project_table_names:
|
for project_table in project_tables:
|
||||||
|
# Convert project table to <project_id>.<dataset_id>.<table_id> format
|
||||||
|
table = f"{project_table.project}.{project_table.dataset_id}.{project_table.table_id}"
|
||||||
|
|
||||||
|
if not is_schema_allowed(
|
||||||
|
self.config.dataset_pattern,
|
||||||
|
schema_name=project_table.dataset_id,
|
||||||
|
db_name=project_table.project,
|
||||||
|
match_fully_qualified_schema_name=self.config.match_fully_qualified_names,
|
||||||
|
) or not self.config.table_pattern.allowed(table):
|
||||||
|
self.report.num_skipped_lineage_entries_not_allowed[
|
||||||
|
project_table.project
|
||||||
|
] += 1
|
||||||
|
continue
|
||||||
|
|
||||||
logger.info("Creating lineage map for table %s", table)
|
logger.info("Creating lineage map for table %s", table)
|
||||||
upstreams = set()
|
upstreams = set()
|
||||||
downstream_table = lineage_v1.EntityReference()
|
downstream_table = lineage_v1.EntityReference()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user