mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-27 16:55:06 +00:00
* Added additional table + test coverage * Added logic for front end input fields * Added comment for median metric * skipping `Update owner and check description` cypress test * Added support to run window metrics for the profiler * Fix except code smell * moved median metric to windown folder * Fix pyformat Co-authored-by: Shailesh Parmar <shailesh.parmar.webdev@gmail.com>
This commit is contained in:
parent
09b37d28f2
commit
48f6553fb3
@ -123,6 +123,15 @@ class Metric(ABC):
|
||||
"""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def is_window_metric(cls) -> bool:
|
||||
"""
|
||||
Marks the metric as a window metric.
|
||||
|
||||
By default, assume it is not a window metric
|
||||
"""
|
||||
return False
|
||||
|
||||
@property
|
||||
def metric_type(self):
|
||||
"""
|
||||
|
||||
@ -35,7 +35,6 @@ from metadata.orm_profiler.metrics.static.like_count import LikeCount
|
||||
from metadata.orm_profiler.metrics.static.max import Max
|
||||
from metadata.orm_profiler.metrics.static.max_length import MaxLength
|
||||
from metadata.orm_profiler.metrics.static.mean import Mean
|
||||
from metadata.orm_profiler.metrics.static.median import Median
|
||||
from metadata.orm_profiler.metrics.static.min import Min
|
||||
from metadata.orm_profiler.metrics.static.min_length import MinLength
|
||||
from metadata.orm_profiler.metrics.static.not_like_count import NotLikeCount
|
||||
@ -44,6 +43,7 @@ from metadata.orm_profiler.metrics.static.row_count import RowCount
|
||||
from metadata.orm_profiler.metrics.static.stddev import StdDev
|
||||
from metadata.orm_profiler.metrics.static.sum import Sum
|
||||
from metadata.orm_profiler.metrics.static.unique_count import UniqueCount
|
||||
from metadata.orm_profiler.metrics.window.median import Median
|
||||
from metadata.orm_profiler.registry import MetricRegistry
|
||||
|
||||
|
||||
|
||||
@ -37,6 +37,10 @@ class Median(StaticMetric):
|
||||
def name(cls):
|
||||
return "median"
|
||||
|
||||
@classmethod
|
||||
def is_window_metric(cls):
|
||||
return True
|
||||
|
||||
@property
|
||||
def metric_type(self):
|
||||
return float
|
||||
@ -18,6 +18,7 @@ from typing import Any, Dict, Generic, List, Optional, Tuple, Type, Union
|
||||
|
||||
from pydantic import ValidationError
|
||||
from sqlalchemy import Column, inspect
|
||||
from sqlalchemy.engine.row import Row
|
||||
from sqlalchemy.orm import DeclarativeMeta
|
||||
from sqlalchemy.orm.session import Session
|
||||
from sqlalchemy.orm.util import AliasedClass
|
||||
@ -241,7 +242,11 @@ class Profiler(Generic[TMetric]):
|
||||
|
||||
try:
|
||||
row = self.runner.select_first_from_sample(
|
||||
*[metric(col).fn() for metric in col_metrics]
|
||||
*[
|
||||
metric(col).fn()
|
||||
for metric in col_metrics
|
||||
if not metric.is_window_metric()
|
||||
]
|
||||
)
|
||||
self._column_results[col.name].update(dict(row))
|
||||
except (TimeoutError, Exception) as err:
|
||||
@ -342,6 +347,39 @@ class Profiler(Generic[TMetric]):
|
||||
current_col_results
|
||||
)
|
||||
|
||||
def run_window_metrics(self, col: Column):
|
||||
"""
|
||||
Run windown metrics in isolation
|
||||
|
||||
Args:
|
||||
col: column name
|
||||
"""
|
||||
|
||||
col_metrics = [
|
||||
metric
|
||||
for metric in self.get_col_metrics(self.static_metrics)
|
||||
if metric.is_window_metric()
|
||||
]
|
||||
|
||||
if not col_metrics:
|
||||
return None
|
||||
|
||||
for metric in col_metrics:
|
||||
try:
|
||||
row = self.runner.select_first_from_sample(metric(col).fn())
|
||||
self._column_results[col.name].update(
|
||||
dict(row)
|
||||
if isinstance(row, Row)
|
||||
else {
|
||||
metric.name(): row
|
||||
} # Snowflake does not return a Row object when table is empty throwing an error
|
||||
)
|
||||
except (Exception) as err:
|
||||
logger.warning(
|
||||
f"Error trying to compute column profile for {col.name} - {err}"
|
||||
)
|
||||
self.session.rollback()
|
||||
|
||||
def execute_column(self, col: Column) -> None:
|
||||
"""
|
||||
Run the profiler on all the columns that
|
||||
@ -351,6 +389,7 @@ class Profiler(Generic[TMetric]):
|
||||
columns are of allowed types
|
||||
"""
|
||||
self.run_static_metrics(col)
|
||||
self.run_window_metrics(col)
|
||||
self.run_query_metrics(col)
|
||||
self.run_composed_metrics(col)
|
||||
|
||||
|
||||
@ -31,7 +31,7 @@ def get_default_metrics(table: DeclarativeMeta) -> List[Metric]:
|
||||
add_props(table=table)(Metrics.COLUMN_COUNT.value),
|
||||
add_props(table=table)(Metrics.COLUMN_NAMES.value),
|
||||
# Column Metrics
|
||||
# Metrics.MEDIAN.value, # TODO: enable it back after #5866
|
||||
Metrics.MEDIAN.value,
|
||||
Metrics.MEAN.value,
|
||||
Metrics.COUNT.value,
|
||||
Metrics.DISTINCT_COUNT.value,
|
||||
|
||||
@ -104,6 +104,7 @@ class ProfilerTest(TestCase):
|
||||
variance=None,
|
||||
distinctCount=2.0,
|
||||
distinctProportion=1.0,
|
||||
median=30.5,
|
||||
# histogram=Histogram(
|
||||
# boundaries=["30.0 to 30.25", "31.0 to 31.25"], frequencies=[1, 1]
|
||||
# ),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user