mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-07-26 19:00:02 +00:00
Fix histogram labels (#10797)
* fix: added logic to format bin labels for histogram metric * fix: histogram labels * fix: added types for arguments
This commit is contained in:
parent
52c9a0f0df
commit
fc9b64b52a
@ -24,6 +24,7 @@ from metadata.profiler.metrics.static.count import Count
|
|||||||
from metadata.profiler.metrics.static.max import Max
|
from metadata.profiler.metrics.static.max import Max
|
||||||
from metadata.profiler.metrics.static.min import Min
|
from metadata.profiler.metrics.static.min import Min
|
||||||
from metadata.profiler.orm.registry import is_quantifiable
|
from metadata.profiler.orm.registry import is_quantifiable
|
||||||
|
from metadata.utils.helpers import format_large_string_numbers
|
||||||
from metadata.utils.logger import profiler_logger
|
from metadata.utils.logger import profiler_logger
|
||||||
|
|
||||||
logger = profiler_logger()
|
logger = profiler_logger()
|
||||||
@ -74,6 +75,27 @@ class Histogram(HybridMetric):
|
|||||||
float(res_max),
|
float(res_max),
|
||||||
) # Decimal to float
|
) # Decimal to float
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _format_bin_labels(
|
||||||
|
lower_bin: Union[float, int], upper_bin: Optional[Union[float, int]] = None
|
||||||
|
) -> str:
|
||||||
|
"""format bin labels
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lower_bin: lower bin
|
||||||
|
upper_bin: upper bin. Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: formatted bin labels
|
||||||
|
"""
|
||||||
|
if lower_bin is None:
|
||||||
|
formatted_lower_bin = "null"
|
||||||
|
else:
|
||||||
|
formatted_lower_bin = format_large_string_numbers(lower_bin)
|
||||||
|
if upper_bin is None:
|
||||||
|
return f"{formatted_lower_bin} and up"
|
||||||
|
return f"{formatted_lower_bin} to {format_large_string_numbers(upper_bin)}"
|
||||||
|
|
||||||
def fn(
|
def fn(
|
||||||
self,
|
self,
|
||||||
sample: Optional[DeclarativeMeta],
|
sample: Optional[DeclarativeMeta],
|
||||||
@ -120,14 +142,17 @@ class Histogram(HybridMetric):
|
|||||||
condition = and_(col >= starting_bin_bound)
|
condition = and_(col >= starting_bin_bound)
|
||||||
case_stmts.append(
|
case_stmts.append(
|
||||||
func.count(case([(condition, col)])).label(
|
func.count(case([(condition, col)])).label(
|
||||||
f"{starting_bin_bound:.2f} and up"
|
self._format_bin_labels(starting_bin_bound)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
case_stmts.append(
|
case_stmts.append(
|
||||||
func.count(case([(condition, col)])).label(
|
func.count(case([(condition, col)])).label(
|
||||||
f"{starting_bin_bound:.2f} to {ending_bin_bound:.2f}"
|
self._format_bin_labels(
|
||||||
|
starting_bin_bound,
|
||||||
|
ending_bin_bound,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
starting_bin_bound = ending_bin_bound
|
starting_bin_bound = ending_bin_bound
|
||||||
@ -176,9 +201,9 @@ class Histogram(HybridMetric):
|
|||||||
|
|
||||||
bins = list(np.arange(num_bins) * bind_width + res_min)
|
bins = list(np.arange(num_bins) * bind_width + res_min)
|
||||||
bins_label = [
|
bins_label = [
|
||||||
f"{bins[i]:.2f} to {bins[i+1]:.2f}"
|
self._format_bin_labels(bins[i], bins[i + 1])
|
||||||
if i < len(bins) - 1
|
if i < len(bins) - 1
|
||||||
else f"{bins[i]:.2f} and up"
|
else self._format_bin_labels(bins[i])
|
||||||
for i in range(len(bins))
|
for i in range(len(bins))
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -18,6 +18,7 @@ from __future__ import annotations
|
|||||||
import re
|
import re
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
from math import floor, log
|
||||||
from time import perf_counter
|
from time import perf_counter
|
||||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
||||||
|
|
||||||
@ -327,3 +328,18 @@ def get_entity_tier_from_tags(tags: list[TagLabel]) -> Optional[str]:
|
|||||||
),
|
),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def format_large_string_numbers(number: Union[float, int]) -> str:
|
||||||
|
"""Format large string number to a human readable format.
|
||||||
|
(e.g. 1,000,000 -> 1M, 1,000,000,000 -> 1B, etc)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number: number
|
||||||
|
"""
|
||||||
|
if number == 0:
|
||||||
|
return "0"
|
||||||
|
units = ["", "K", "M", "B", "T"]
|
||||||
|
constant_k = 1000.0
|
||||||
|
magnitude = int(floor(log(abs(number), constant_k)))
|
||||||
|
return f"{number / constant_k**magnitude:.2f}{units[magnitude]}"
|
||||||
|
60
ingestion/tests/unit/profiler/test_utils.py
Normal file
60
ingestion/tests/unit/profiler/test_utils.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Tests utils function for the profiler
|
||||||
|
"""
|
||||||
|
|
||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
from metadata.profiler.metrics.hybrid.histogram import Histogram
|
||||||
|
|
||||||
|
|
||||||
|
class TestHistogramUtils(TestCase):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.histogram = Histogram()
|
||||||
|
|
||||||
|
def test_histogram_label_formatter_positive(self):
|
||||||
|
"""test label formatter for histogram"""
|
||||||
|
formatted_label = self.histogram._format_bin_labels(18927, 23456)
|
||||||
|
assert formatted_label == "18.93K to 23.46K"
|
||||||
|
|
||||||
|
formatted_label = self.histogram._format_bin_labels(18927)
|
||||||
|
assert formatted_label == "18.93K and up"
|
||||||
|
|
||||||
|
def test_histogram_label_formatter_negative(self):
|
||||||
|
"""test label formatter for histogram for negative numbers"""
|
||||||
|
formatted_label = self.histogram._format_bin_labels(-18927, -23456)
|
||||||
|
assert formatted_label == "-18.93K to -23.46K"
|
||||||
|
|
||||||
|
formatted_label = self.histogram._format_bin_labels(-18927)
|
||||||
|
assert formatted_label == "-18.93K and up"
|
||||||
|
|
||||||
|
def test_histogram_label_formatter_none(self):
|
||||||
|
"""test label formatter for histogram for None"""
|
||||||
|
formatted_label = self.histogram._format_bin_labels(None)
|
||||||
|
assert formatted_label == "null and up"
|
||||||
|
|
||||||
|
def test_histogram_label_formatter_zero(self):
|
||||||
|
"""test label formatter for histogram with zero"""
|
||||||
|
formatted_label = self.histogram._format_bin_labels(0)
|
||||||
|
assert formatted_label == "0 and up"
|
||||||
|
|
||||||
|
def test_histogram_label_formatter_nines(self):
|
||||||
|
"""test label formatter for histogram for nines"""
|
||||||
|
formatted_label = self.histogram._format_bin_labels(99999999)
|
||||||
|
assert formatted_label == "100.00M and up"
|
||||||
|
|
||||||
|
def test_histogram_label_formatter_floats(self):
|
||||||
|
"""test label formatter for histogram for floats"""
|
||||||
|
formatted_label = self.histogram._format_bin_labels(167893.98542, 194993.98542)
|
||||||
|
assert formatted_label == "167.89K to 194.99K"
|
@ -103,7 +103,6 @@ const DataDistributionHistogram = ({
|
|||||||
<CartesianGrid stroke={GRAPH_BACKGROUND_COLOR} />
|
<CartesianGrid stroke={GRAPH_BACKGROUND_COLOR} />
|
||||||
<XAxis
|
<XAxis
|
||||||
dataKey="name"
|
dataKey="name"
|
||||||
interval={0}
|
|
||||||
padding={{ left: 16, right: 16 }}
|
padding={{ left: 16, right: 16 }}
|
||||||
tick={{ fontSize: 12 }}
|
tick={{ fontSize: 12 }}
|
||||||
/>
|
/>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user