mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-30 01:59:23 +00:00
* Added additional table + test coverage * Added logic for front end input fields * Added comment for median metric * skipping `Update owner and check description` cypress test * Added support to run window metrics for the profiler * Fix except code smell * moved median metric to windown folder * Fix pyformat Co-authored-by: Shailesh Parmar <shailesh.parmar.webdev@gmail.com>
This commit is contained in:
parent
09b37d28f2
commit
48f6553fb3
@ -123,6 +123,15 @@ class Metric(ABC):
|
|||||||
"""
|
"""
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_window_metric(cls) -> bool:
|
||||||
|
"""
|
||||||
|
Marks the metric as a window metric.
|
||||||
|
|
||||||
|
By default, assume it is not a window metric
|
||||||
|
"""
|
||||||
|
return False
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def metric_type(self):
|
def metric_type(self):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -35,7 +35,6 @@ from metadata.orm_profiler.metrics.static.like_count import LikeCount
|
|||||||
from metadata.orm_profiler.metrics.static.max import Max
|
from metadata.orm_profiler.metrics.static.max import Max
|
||||||
from metadata.orm_profiler.metrics.static.max_length import MaxLength
|
from metadata.orm_profiler.metrics.static.max_length import MaxLength
|
||||||
from metadata.orm_profiler.metrics.static.mean import Mean
|
from metadata.orm_profiler.metrics.static.mean import Mean
|
||||||
from metadata.orm_profiler.metrics.static.median import Median
|
|
||||||
from metadata.orm_profiler.metrics.static.min import Min
|
from metadata.orm_profiler.metrics.static.min import Min
|
||||||
from metadata.orm_profiler.metrics.static.min_length import MinLength
|
from metadata.orm_profiler.metrics.static.min_length import MinLength
|
||||||
from metadata.orm_profiler.metrics.static.not_like_count import NotLikeCount
|
from metadata.orm_profiler.metrics.static.not_like_count import NotLikeCount
|
||||||
@ -44,6 +43,7 @@ from metadata.orm_profiler.metrics.static.row_count import RowCount
|
|||||||
from metadata.orm_profiler.metrics.static.stddev import StdDev
|
from metadata.orm_profiler.metrics.static.stddev import StdDev
|
||||||
from metadata.orm_profiler.metrics.static.sum import Sum
|
from metadata.orm_profiler.metrics.static.sum import Sum
|
||||||
from metadata.orm_profiler.metrics.static.unique_count import UniqueCount
|
from metadata.orm_profiler.metrics.static.unique_count import UniqueCount
|
||||||
|
from metadata.orm_profiler.metrics.window.median import Median
|
||||||
from metadata.orm_profiler.registry import MetricRegistry
|
from metadata.orm_profiler.registry import MetricRegistry
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -37,6 +37,10 @@ class Median(StaticMetric):
|
|||||||
def name(cls):
|
def name(cls):
|
||||||
return "median"
|
return "median"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_window_metric(cls):
|
||||||
|
return True
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def metric_type(self):
|
def metric_type(self):
|
||||||
return float
|
return float
|
||||||
@ -18,6 +18,7 @@ from typing import Any, Dict, Generic, List, Optional, Tuple, Type, Union
|
|||||||
|
|
||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
from sqlalchemy import Column, inspect
|
from sqlalchemy import Column, inspect
|
||||||
|
from sqlalchemy.engine.row import Row
|
||||||
from sqlalchemy.orm import DeclarativeMeta
|
from sqlalchemy.orm import DeclarativeMeta
|
||||||
from sqlalchemy.orm.session import Session
|
from sqlalchemy.orm.session import Session
|
||||||
from sqlalchemy.orm.util import AliasedClass
|
from sqlalchemy.orm.util import AliasedClass
|
||||||
@ -241,7 +242,11 @@ class Profiler(Generic[TMetric]):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
row = self.runner.select_first_from_sample(
|
row = self.runner.select_first_from_sample(
|
||||||
*[metric(col).fn() for metric in col_metrics]
|
*[
|
||||||
|
metric(col).fn()
|
||||||
|
for metric in col_metrics
|
||||||
|
if not metric.is_window_metric()
|
||||||
|
]
|
||||||
)
|
)
|
||||||
self._column_results[col.name].update(dict(row))
|
self._column_results[col.name].update(dict(row))
|
||||||
except (TimeoutError, Exception) as err:
|
except (TimeoutError, Exception) as err:
|
||||||
@ -342,6 +347,39 @@ class Profiler(Generic[TMetric]):
|
|||||||
current_col_results
|
current_col_results
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def run_window_metrics(self, col: Column):
|
||||||
|
"""
|
||||||
|
Run windown metrics in isolation
|
||||||
|
|
||||||
|
Args:
|
||||||
|
col: column name
|
||||||
|
"""
|
||||||
|
|
||||||
|
col_metrics = [
|
||||||
|
metric
|
||||||
|
for metric in self.get_col_metrics(self.static_metrics)
|
||||||
|
if metric.is_window_metric()
|
||||||
|
]
|
||||||
|
|
||||||
|
if not col_metrics:
|
||||||
|
return None
|
||||||
|
|
||||||
|
for metric in col_metrics:
|
||||||
|
try:
|
||||||
|
row = self.runner.select_first_from_sample(metric(col).fn())
|
||||||
|
self._column_results[col.name].update(
|
||||||
|
dict(row)
|
||||||
|
if isinstance(row, Row)
|
||||||
|
else {
|
||||||
|
metric.name(): row
|
||||||
|
} # Snowflake does not return a Row object when table is empty throwing an error
|
||||||
|
)
|
||||||
|
except (Exception) as err:
|
||||||
|
logger.warning(
|
||||||
|
f"Error trying to compute column profile for {col.name} - {err}"
|
||||||
|
)
|
||||||
|
self.session.rollback()
|
||||||
|
|
||||||
def execute_column(self, col: Column) -> None:
|
def execute_column(self, col: Column) -> None:
|
||||||
"""
|
"""
|
||||||
Run the profiler on all the columns that
|
Run the profiler on all the columns that
|
||||||
@ -351,6 +389,7 @@ class Profiler(Generic[TMetric]):
|
|||||||
columns are of allowed types
|
columns are of allowed types
|
||||||
"""
|
"""
|
||||||
self.run_static_metrics(col)
|
self.run_static_metrics(col)
|
||||||
|
self.run_window_metrics(col)
|
||||||
self.run_query_metrics(col)
|
self.run_query_metrics(col)
|
||||||
self.run_composed_metrics(col)
|
self.run_composed_metrics(col)
|
||||||
|
|
||||||
|
|||||||
@ -31,7 +31,7 @@ def get_default_metrics(table: DeclarativeMeta) -> List[Metric]:
|
|||||||
add_props(table=table)(Metrics.COLUMN_COUNT.value),
|
add_props(table=table)(Metrics.COLUMN_COUNT.value),
|
||||||
add_props(table=table)(Metrics.COLUMN_NAMES.value),
|
add_props(table=table)(Metrics.COLUMN_NAMES.value),
|
||||||
# Column Metrics
|
# Column Metrics
|
||||||
# Metrics.MEDIAN.value, # TODO: enable it back after #5866
|
Metrics.MEDIAN.value,
|
||||||
Metrics.MEAN.value,
|
Metrics.MEAN.value,
|
||||||
Metrics.COUNT.value,
|
Metrics.COUNT.value,
|
||||||
Metrics.DISTINCT_COUNT.value,
|
Metrics.DISTINCT_COUNT.value,
|
||||||
|
|||||||
@ -104,6 +104,7 @@ class ProfilerTest(TestCase):
|
|||||||
variance=None,
|
variance=None,
|
||||||
distinctCount=2.0,
|
distinctCount=2.0,
|
||||||
distinctProportion=1.0,
|
distinctProportion=1.0,
|
||||||
|
median=30.5,
|
||||||
# histogram=Histogram(
|
# histogram=Histogram(
|
||||||
# boundaries=["30.0 to 30.25", "31.0 to 31.25"], frequencies=[1, 1]
|
# boundaries=["30.0 to 30.25", "31.0 to 31.25"], frequencies=[1, 1]
|
||||||
# ),
|
# ),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user