diff --git a/ingestion/src/metadata/profiler/processor/core.py b/ingestion/src/metadata/profiler/processor/core.py index 2e18a249e70..7eacab4cdff 100644 --- a/ingestion/src/metadata/profiler/processor/core.py +++ b/ingestion/src/metadata/profiler/processor/core.py @@ -53,6 +53,7 @@ from metadata.profiler.metrics.static.row_count import RowCount from metadata.profiler.orm.registry import NOT_COMPUTE from metadata.profiler.processor.sample_data_handler import upload_sample_data from metadata.utils.constants import SAMPLE_DATA_DEFAULT_COUNT +from metadata.utils.helpers import calculate_execution_time from metadata.utils.logger import profiler_logger logger = profiler_logger() @@ -91,6 +92,7 @@ class Profiler(Generic[TMetric]): """ self.profiler_interface = profiler_interface + self.source_config = self.profiler_interface.source_config self.include_columns = include_columns self.exclude_columns = exclude_columns self._metrics = metrics @@ -518,27 +520,27 @@ class Profiler(Generic[TMetric]): return self - def process( - self, - generate_sample_data: Optional[bool], - ) -> ProfilerResponse: + def process(self) -> ProfilerResponse: """ Given a table, we will prepare the profiler for all its columns and return all the run profilers in a Dict in the shape {col_name: Profiler} """ - logger.debug( - f"Computing profile metrics for {self.profiler_interface.table_entity.fullyQualifiedName.__root__}..." - ) - self.compute_metrics() - if generate_sample_data: + if self.source_config.computeMetrics: + logger.debug( + f"Computing profile metrics for {self.profiler_interface.table_entity.fullyQualifiedName.__root__}..." + ) + self.compute_metrics() + + if self.source_config.generateSampleData: sample_data = self.generate_sample_data() else: sample_data = None profile = self.get_profile() - self._check_profile_and_handle(profile) + if self.source_config.computeMetrics: + self._check_profile_and_handle(profile) table_profile = ProfilerResponse( table=self.profiler_interface.table_entity, @@ -548,6 +550,7 @@ class Profiler(Generic[TMetric]): return table_profile + @calculate_execution_time def generate_sample_data(self) -> Optional[TableData]: """Fetch and ingest sample data diff --git a/ingestion/src/metadata/profiler/processor/processor.py b/ingestion/src/metadata/profiler/processor/processor.py index 3d8b74ac75e..e90e0b38a6c 100644 --- a/ingestion/src/metadata/profiler/processor/processor.py +++ b/ingestion/src/metadata/profiler/processor/processor.py @@ -58,9 +58,7 @@ class ProfilerProcessor(Processor): ) try: - profile: ProfilerResponse = profiler_runner.process( - self.source_config.generateSampleData, - ) + profile: ProfilerResponse = profiler_runner.process() except Exception as exc: self.status.failed( StackTraceError( diff --git a/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/databaseServiceProfilerPipeline.json b/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/databaseServiceProfilerPipeline.json index d5951393c00..d8e8a35eba2 100644 --- a/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/databaseServiceProfilerPipeline.json +++ b/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/databaseServiceProfilerPipeline.json @@ -67,6 +67,12 @@ "default": true, "title": "Generate Sample Data" }, + "computeMetrics": { + "description": "Option to turn on/off computing profiler metrics.", + "type": "boolean", + "default": true, + "title": "Compute Metrics" + }, "sampleDataCount": { "description": "Number of row of sample data to be generated", "type": "integer",