Fix #14764: Profiler make compute metric configurable (#14791)

This commit is contained in:
Mayur Singal 2024-01-19 18:13:26 +05:30 committed by GitHub
parent d7f4172416
commit 4e02cb1c54
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 20 additions and 13 deletions

View File

@ -53,6 +53,7 @@ from metadata.profiler.metrics.static.row_count import RowCount
from metadata.profiler.orm.registry import NOT_COMPUTE from metadata.profiler.orm.registry import NOT_COMPUTE
from metadata.profiler.processor.sample_data_handler import upload_sample_data from metadata.profiler.processor.sample_data_handler import upload_sample_data
from metadata.utils.constants import SAMPLE_DATA_DEFAULT_COUNT from metadata.utils.constants import SAMPLE_DATA_DEFAULT_COUNT
from metadata.utils.helpers import calculate_execution_time
from metadata.utils.logger import profiler_logger from metadata.utils.logger import profiler_logger
logger = profiler_logger() logger = profiler_logger()
@ -91,6 +92,7 @@ class Profiler(Generic[TMetric]):
""" """
self.profiler_interface = profiler_interface self.profiler_interface = profiler_interface
self.source_config = self.profiler_interface.source_config
self.include_columns = include_columns self.include_columns = include_columns
self.exclude_columns = exclude_columns self.exclude_columns = exclude_columns
self._metrics = metrics self._metrics = metrics
@ -518,27 +520,27 @@ class Profiler(Generic[TMetric]):
return self return self
def process( def process(self) -> ProfilerResponse:
self,
generate_sample_data: Optional[bool],
) -> ProfilerResponse:
""" """
Given a table, we will prepare the profiler for Given a table, we will prepare the profiler for
all its columns and return all the run profilers all its columns and return all the run profilers
in a Dict in the shape {col_name: Profiler} in a Dict in the shape {col_name: Profiler}
""" """
logger.debug(
f"Computing profile metrics for {self.profiler_interface.table_entity.fullyQualifiedName.__root__}..."
)
self.compute_metrics() if self.source_config.computeMetrics:
if generate_sample_data: logger.debug(
f"Computing profile metrics for {self.profiler_interface.table_entity.fullyQualifiedName.__root__}..."
)
self.compute_metrics()
if self.source_config.generateSampleData:
sample_data = self.generate_sample_data() sample_data = self.generate_sample_data()
else: else:
sample_data = None sample_data = None
profile = self.get_profile() profile = self.get_profile()
self._check_profile_and_handle(profile) if self.source_config.computeMetrics:
self._check_profile_and_handle(profile)
table_profile = ProfilerResponse( table_profile = ProfilerResponse(
table=self.profiler_interface.table_entity, table=self.profiler_interface.table_entity,
@ -548,6 +550,7 @@ class Profiler(Generic[TMetric]):
return table_profile return table_profile
@calculate_execution_time
def generate_sample_data(self) -> Optional[TableData]: def generate_sample_data(self) -> Optional[TableData]:
"""Fetch and ingest sample data """Fetch and ingest sample data

View File

@ -58,9 +58,7 @@ class ProfilerProcessor(Processor):
) )
try: try:
profile: ProfilerResponse = profiler_runner.process( profile: ProfilerResponse = profiler_runner.process()
self.source_config.generateSampleData,
)
except Exception as exc: except Exception as exc:
self.status.failed( self.status.failed(
StackTraceError( StackTraceError(

View File

@ -67,6 +67,12 @@
"default": true, "default": true,
"title": "Generate Sample Data" "title": "Generate Sample Data"
}, },
"computeMetrics": {
"description": "Option to turn on/off computing profiler metrics.",
"type": "boolean",
"default": true,
"title": "Compute Metrics"
},
"sampleDataCount": { "sampleDataCount": {
"description": "Number of row of sample data to be generated", "description": "Number of row of sample data to be generated",
"type": "integer", "type": "integer",