feat(ingest/bigquery): Respect dataset and table patterns when ingesting lineage via catalog api (#10080)

This commit is contained in:
Alexander 2024-03-26 13:03:28 -04:00 committed by GitHub
parent 2657c8f68b
commit e4ebf34b6f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -517,19 +517,23 @@ class BigqueryLineageExtractor:
] ]
) )
# Convert project tables to <project_id>.<dataset_id>.<table_id> format
project_table_names = list(
map(
lambda table: "{}.{}.{}".format(
table.project, table.dataset_id, table.table_id
),
project_tables,
)
)
lineage_map: Dict[str, Set[LineageEdge]] = {} lineage_map: Dict[str, Set[LineageEdge]] = {}
curr_date = datetime.now() curr_date = datetime.now()
for table in project_table_names: for project_table in project_tables:
# Convert project table to <project_id>.<dataset_id>.<table_id> format
table = f"{project_table.project}.{project_table.dataset_id}.{project_table.table_id}"
if not is_schema_allowed(
self.config.dataset_pattern,
schema_name=project_table.dataset_id,
db_name=project_table.project,
match_fully_qualified_schema_name=self.config.match_fully_qualified_names,
) or not self.config.table_pattern.allowed(table):
self.report.num_skipped_lineage_entries_not_allowed[
project_table.project
] += 1
continue
logger.info("Creating lineage map for table %s", table) logger.info("Creating lineage map for table %s", table)
upstreams = set() upstreams = set()
downstream_table = lineage_v1.EntityReference() downstream_table = lineage_v1.EntityReference()