fix(ingest/bigquery): Fixing lineage filter query (#9114)

2025-09-17 13:13:15 +00:00 · 2023-10-26 18:46:10 +02:00 · 2023-10-26 18:46:10 +02:00 · a96a512166
commit a96a512166
parent f402090c1e
3 changed files with 13 additions and 6 deletions
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
@ -309,6 +309,7 @@ class BigQueryV2Config(
                "dataset_pattern is not set but schema_pattern is set, using schema_pattern as dataset_pattern. schema_pattern will be deprecated, please use dataset_pattern instead."
            )
            values["dataset_pattern"] = schema_pattern
            dataset_pattern = schema_pattern
        elif (
            dataset_pattern != AllowDenyPattern.allow_all()
            and schema_pattern != AllowDenyPattern.allow_all()
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
@ -20,6 +20,7 @@ import humanfriendly
 from google.cloud.datacatalog import lineage_v1
 from google.cloud.logging_v2.client import Client as GCPLoggingClient
 from datahub.configuration.pattern_utils import is_schema_allowed
 from datahub.emitter import mce_builder
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.workunit import MetadataWorkUnit
@ -683,8 +684,11 @@ class BigqueryLineageExtractor:
                self.report.num_skipped_lineage_entries_missing_data[e.project_id] += 1
                continue
-            if not self.config.dataset_pattern.allowed(
+            if not is_schema_allowed(
-                destination_table.table_identifier.dataset
+                self.config.dataset_pattern,
                destination_table.table_identifier.dataset,
                destination_table.table_identifier.project_id,
                self.config.match_fully_qualified_names,
            ) or not self.config.table_pattern.allowed(
                destination_table.table_identifier.get_table_name()
            ):
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
@ -21,6 +21,7 @@ from typing import (
 import humanfriendly
 from datahub.configuration.pattern_utils import is_schema_allowed
 from datahub.configuration.time_window_config import (
    BaseTimeWindowConfig,
    get_time_bucket,
@ -335,10 +336,11 @@ class BigQueryUsageExtractor:
    def _is_table_allowed(self, table_ref: Optional[BigQueryTableRef]) -> bool:
        return (
            table_ref is not None
-            and self.config.dataset_pattern.allowed(
+            and is_schema_allowed(
-                f"{table_ref.table_identifier.project_id}.{table_ref.table_identifier.dataset}"
+                self.config.dataset_pattern,
-                if self.config.match_fully_qualified_names
+                table_ref.table_identifier.dataset,
-                else table_ref.table_identifier.dataset
+                table_ref.table_identifier.project_id,
                self.config.match_fully_qualified_names,
            )
            and self.config.table_pattern.allowed(str(table_ref.table_identifier))
        )