mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-27 18:07:57 +00:00
fix(ingestion): Allow profiling of only those tables that are allowed by the table_pattern. (#4842)
This commit is contained in:
parent
f1151cb478
commit
5c64e9d541
@ -747,14 +747,14 @@ WHERE
|
||||
return shard, None
|
||||
return None, None
|
||||
|
||||
def is_dataset_eligable_profiling(
|
||||
def is_dataset_eligible_for_profiling(
|
||||
self, dataset_name: str, sql_config: SQLAlchemyConfig
|
||||
) -> bool:
|
||||
"""
|
||||
Method overrides default profiling filter which checks profiling eligibility based on allow-deny pattern.
|
||||
This one also don't profile those sharded tables which are not the latest.
|
||||
"""
|
||||
if not super().is_dataset_eligable_profiling(dataset_name, sql_config):
|
||||
if not super().is_dataset_eligible_for_profiling(dataset_name, sql_config):
|
||||
return False
|
||||
|
||||
(project_id, schema, table) = dataset_name.split(".")
|
||||
|
||||
@ -240,7 +240,7 @@ class SQLAlchemyConfig(StatefulIngestionConfigBase):
|
||||
)
|
||||
table_pattern: AllowDenyPattern = Field(
|
||||
default=AllowDenyPattern.allow_all(),
|
||||
description="egex patterns for tables to filter in ingestion.",
|
||||
description="regex patterns for tables to filter in ingestion.",
|
||||
)
|
||||
view_pattern: AllowDenyPattern = Field(
|
||||
default=AllowDenyPattern.allow_all(),
|
||||
@ -248,7 +248,7 @@ class SQLAlchemyConfig(StatefulIngestionConfigBase):
|
||||
)
|
||||
profile_pattern: AllowDenyPattern = Field(
|
||||
default=AllowDenyPattern.allow_all(),
|
||||
description="regex patterns for profiles to filter in ingestion.",
|
||||
description="regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.",
|
||||
)
|
||||
domain: Dict[str, AllowDenyPattern] = Field(
|
||||
default=dict(),
|
||||
@ -1268,10 +1268,12 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
|
||||
return None, None
|
||||
|
||||
# Override if you want to do additional checks
|
||||
def is_dataset_eligable_profiling(
|
||||
def is_dataset_eligible_for_profiling(
|
||||
self, dataset_name: str, sql_config: SQLAlchemyConfig
|
||||
) -> bool:
|
||||
return sql_config.profile_pattern.allowed(dataset_name)
|
||||
return sql_config.table_pattern.allowed(
|
||||
dataset_name
|
||||
) and sql_config.profile_pattern.allowed(dataset_name)
|
||||
|
||||
def loop_profiler_requests(
|
||||
self,
|
||||
@ -1290,7 +1292,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
|
||||
dataset_name = self.get_identifier(
|
||||
schema=schema, entity=table, inspector=inspector
|
||||
)
|
||||
if not self.is_dataset_eligable_profiling(dataset_name, sql_config):
|
||||
if not self.is_dataset_eligible_for_profiling(dataset_name, sql_config):
|
||||
self.report.report_dropped(f"profile of {dataset_name}")
|
||||
continue
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user