mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-07-24 09:50:01 +00:00
parent
d7f4172416
commit
4e02cb1c54
@ -53,6 +53,7 @@ from metadata.profiler.metrics.static.row_count import RowCount
|
||||
from metadata.profiler.orm.registry import NOT_COMPUTE
|
||||
from metadata.profiler.processor.sample_data_handler import upload_sample_data
|
||||
from metadata.utils.constants import SAMPLE_DATA_DEFAULT_COUNT
|
||||
from metadata.utils.helpers import calculate_execution_time
|
||||
from metadata.utils.logger import profiler_logger
|
||||
|
||||
logger = profiler_logger()
|
||||
@ -91,6 +92,7 @@ class Profiler(Generic[TMetric]):
|
||||
"""
|
||||
|
||||
self.profiler_interface = profiler_interface
|
||||
self.source_config = self.profiler_interface.source_config
|
||||
self.include_columns = include_columns
|
||||
self.exclude_columns = exclude_columns
|
||||
self._metrics = metrics
|
||||
@ -518,27 +520,27 @@ class Profiler(Generic[TMetric]):
|
||||
|
||||
return self
|
||||
|
||||
def process(
|
||||
self,
|
||||
generate_sample_data: Optional[bool],
|
||||
) -> ProfilerResponse:
|
||||
def process(self) -> ProfilerResponse:
|
||||
"""
|
||||
Given a table, we will prepare the profiler for
|
||||
all its columns and return all the run profilers
|
||||
in a Dict in the shape {col_name: Profiler}
|
||||
"""
|
||||
logger.debug(
|
||||
f"Computing profile metrics for {self.profiler_interface.table_entity.fullyQualifiedName.__root__}..."
|
||||
)
|
||||
|
||||
self.compute_metrics()
|
||||
if generate_sample_data:
|
||||
if self.source_config.computeMetrics:
|
||||
logger.debug(
|
||||
f"Computing profile metrics for {self.profiler_interface.table_entity.fullyQualifiedName.__root__}..."
|
||||
)
|
||||
self.compute_metrics()
|
||||
|
||||
if self.source_config.generateSampleData:
|
||||
sample_data = self.generate_sample_data()
|
||||
else:
|
||||
sample_data = None
|
||||
|
||||
profile = self.get_profile()
|
||||
self._check_profile_and_handle(profile)
|
||||
if self.source_config.computeMetrics:
|
||||
self._check_profile_and_handle(profile)
|
||||
|
||||
table_profile = ProfilerResponse(
|
||||
table=self.profiler_interface.table_entity,
|
||||
@ -548,6 +550,7 @@ class Profiler(Generic[TMetric]):
|
||||
|
||||
return table_profile
|
||||
|
||||
@calculate_execution_time
|
||||
def generate_sample_data(self) -> Optional[TableData]:
|
||||
"""Fetch and ingest sample data
|
||||
|
||||
|
@ -58,9 +58,7 @@ class ProfilerProcessor(Processor):
|
||||
)
|
||||
|
||||
try:
|
||||
profile: ProfilerResponse = profiler_runner.process(
|
||||
self.source_config.generateSampleData,
|
||||
)
|
||||
profile: ProfilerResponse = profiler_runner.process()
|
||||
except Exception as exc:
|
||||
self.status.failed(
|
||||
StackTraceError(
|
||||
|
@ -67,6 +67,12 @@
|
||||
"default": true,
|
||||
"title": "Generate Sample Data"
|
||||
},
|
||||
"computeMetrics": {
|
||||
"description": "Option to turn on/off computing profiler metrics.",
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"title": "Compute Metrics"
|
||||
},
|
||||
"sampleDataCount": {
|
||||
"description": "Number of row of sample data to be generated",
|
||||
"type": "integer",
|
||||
|
Loading…
x
Reference in New Issue
Block a user