Fix #14764: Profiler make compute metric configurable (#14791)

This commit is contained in:
Mayur Singal 2024-01-19 18:13:26 +05:30 committed by GitHub
parent d7f4172416
commit 4e02cb1c54
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 20 additions and 13 deletions

View File

@ -53,6 +53,7 @@ from metadata.profiler.metrics.static.row_count import RowCount
from metadata.profiler.orm.registry import NOT_COMPUTE
from metadata.profiler.processor.sample_data_handler import upload_sample_data
from metadata.utils.constants import SAMPLE_DATA_DEFAULT_COUNT
from metadata.utils.helpers import calculate_execution_time
from metadata.utils.logger import profiler_logger
logger = profiler_logger()
@ -91,6 +92,7 @@ class Profiler(Generic[TMetric]):
"""
self.profiler_interface = profiler_interface
self.source_config = self.profiler_interface.source_config
self.include_columns = include_columns
self.exclude_columns = exclude_columns
self._metrics = metrics
@ -518,27 +520,27 @@ class Profiler(Generic[TMetric]):
return self
def process(
self,
generate_sample_data: Optional[bool],
) -> ProfilerResponse:
def process(self) -> ProfilerResponse:
"""
Given a table, we will prepare the profiler for
all its columns and return all the run profilers
in a Dict in the shape {col_name: Profiler}
"""
logger.debug(
f"Computing profile metrics for {self.profiler_interface.table_entity.fullyQualifiedName.__root__}..."
)
self.compute_metrics()
if generate_sample_data:
if self.source_config.computeMetrics:
logger.debug(
f"Computing profile metrics for {self.profiler_interface.table_entity.fullyQualifiedName.__root__}..."
)
self.compute_metrics()
if self.source_config.generateSampleData:
sample_data = self.generate_sample_data()
else:
sample_data = None
profile = self.get_profile()
self._check_profile_and_handle(profile)
if self.source_config.computeMetrics:
self._check_profile_and_handle(profile)
table_profile = ProfilerResponse(
table=self.profiler_interface.table_entity,
@ -548,6 +550,7 @@ class Profiler(Generic[TMetric]):
return table_profile
@calculate_execution_time
def generate_sample_data(self) -> Optional[TableData]:
"""Fetch and ingest sample data

View File

@ -58,9 +58,7 @@ class ProfilerProcessor(Processor):
)
try:
profile: ProfilerResponse = profiler_runner.process(
self.source_config.generateSampleData,
)
profile: ProfilerResponse = profiler_runner.process()
except Exception as exc:
self.status.failed(
StackTraceError(

View File

@ -67,6 +67,12 @@
"default": true,
"title": "Generate Sample Data"
},
"computeMetrics": {
"description": "Option to turn on/off computing profiler metrics.",
"type": "boolean",
"default": true,
"title": "Compute Metrics"
},
"sampleDataCount": {
"description": "Number of row of sample data to be generated",
"type": "integer",