fix(ingest): count profiled tables separately in report (#3731)

This commit is contained in:
Harshal Sheth 2021-12-14 02:06:49 -05:00 committed by GitHub
parent 578590e795
commit f24440eff3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -123,6 +123,7 @@ def make_sqlalchemy_uri(
class SQLSourceReport(SourceReport): class SQLSourceReport(SourceReport):
tables_scanned: int = 0 tables_scanned: int = 0
views_scanned: int = 0 views_scanned: int = 0
entities_profiled: int = 0
filtered: List[str] = field(default_factory=list) filtered: List[str] = field(default_factory=list)
query_combiner: Optional[SQLAlchemyQueryCombinerReport] = None query_combiner: Optional[SQLAlchemyQueryCombinerReport] = None
@ -138,6 +139,9 @@ class SQLSourceReport(SourceReport):
else: else:
raise KeyError(f"Unknown entity {ent_type}.") raise KeyError(f"Unknown entity {ent_type}.")
def report_entity_profiled(self, name: str) -> None:
self.entities_profiled += 1
def report_dropped(self, ent_name: str) -> None: def report_dropped(self, ent_name: str) -> None:
self.filtered.append(ent_name) self.filtered.append(ent_name)
@ -642,12 +646,12 @@ class SQLAlchemySource(Source):
dataset_name = self.get_identifier( dataset_name = self.get_identifier(
schema=schema, entity=table, inspector=inspector schema=schema, entity=table, inspector=inspector
) )
self.report.report_entity_scanned(f"profile of {dataset_name}")
if not sql_config.profile_pattern.allowed(dataset_name): if not sql_config.profile_pattern.allowed(dataset_name):
self.report.report_dropped(f"profile of {dataset_name}") self.report.report_dropped(f"profile of {dataset_name}")
continue continue
self.report.report_entity_profiled(dataset_name)
yield GEProfilerRequest( yield GEProfilerRequest(
pretty_name=dataset_name, pretty_name=dataset_name,
batch_kwargs=self.prepare_profiler_args(schema=schema, table=table), batch_kwargs=self.prepare_profiler_args(schema=schema, table=table),