fix(ingestion): Allow profiling of only those tables that are allowed by the table_pattern. (#4842)

This commit is contained in:
Ravindra Lanka 2022-05-06 02:07:31 -07:00 committed by GitHub
parent f1151cb478
commit 5c64e9d541
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 7 deletions

View File

@ -747,14 +747,14 @@ WHERE
return shard, None
return None, None
def is_dataset_eligable_profiling(
def is_dataset_eligible_for_profiling(
self, dataset_name: str, sql_config: SQLAlchemyConfig
) -> bool:
"""
Method overrides default profiling filter which checks profiling eligibility based on allow-deny pattern.
This one also don't profile those sharded tables which are not the latest.
"""
if not super().is_dataset_eligable_profiling(dataset_name, sql_config):
if not super().is_dataset_eligible_for_profiling(dataset_name, sql_config):
return False
(project_id, schema, table) = dataset_name.split(".")

View File

@ -240,7 +240,7 @@ class SQLAlchemyConfig(StatefulIngestionConfigBase):
)
table_pattern: AllowDenyPattern = Field(
default=AllowDenyPattern.allow_all(),
description="egex patterns for tables to filter in ingestion.",
description="regex patterns for tables to filter in ingestion.",
)
view_pattern: AllowDenyPattern = Field(
default=AllowDenyPattern.allow_all(),
@ -248,7 +248,7 @@ class SQLAlchemyConfig(StatefulIngestionConfigBase):
)
profile_pattern: AllowDenyPattern = Field(
default=AllowDenyPattern.allow_all(),
description="regex patterns for profiles to filter in ingestion.",
description="regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.",
)
domain: Dict[str, AllowDenyPattern] = Field(
default=dict(),
@ -1268,10 +1268,12 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
return None, None
# Override if you want to do additional checks
def is_dataset_eligable_profiling(
def is_dataset_eligible_for_profiling(
self, dataset_name: str, sql_config: SQLAlchemyConfig
) -> bool:
return sql_config.profile_pattern.allowed(dataset_name)
return sql_config.table_pattern.allowed(
dataset_name
) and sql_config.profile_pattern.allowed(dataset_name)
def loop_profiler_requests(
self,
@ -1290,7 +1292,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
dataset_name = self.get_identifier(
schema=schema, entity=table, inspector=inspector
)
if not self.is_dataset_eligable_profiling(dataset_name, sql_config):
if not self.is_dataset_eligible_for_profiling(dataset_name, sql_config):
self.report.report_dropped(f"profile of {dataset_name}")
continue