mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-07-24 09:50:01 +00:00
Fix histogram labels (#10797)
* fix: added logic to format bin labels for histogram metric * fix: histogram labels * fix: added types for arguments
This commit is contained in:
parent
52c9a0f0df
commit
fc9b64b52a
@ -24,6 +24,7 @@ from metadata.profiler.metrics.static.count import Count
|
||||
from metadata.profiler.metrics.static.max import Max
|
||||
from metadata.profiler.metrics.static.min import Min
|
||||
from metadata.profiler.orm.registry import is_quantifiable
|
||||
from metadata.utils.helpers import format_large_string_numbers
|
||||
from metadata.utils.logger import profiler_logger
|
||||
|
||||
logger = profiler_logger()
|
||||
@ -74,6 +75,27 @@ class Histogram(HybridMetric):
|
||||
float(res_max),
|
||||
) # Decimal to float
|
||||
|
||||
@staticmethod
|
||||
def _format_bin_labels(
|
||||
lower_bin: Union[float, int], upper_bin: Optional[Union[float, int]] = None
|
||||
) -> str:
|
||||
"""format bin labels
|
||||
|
||||
Args:
|
||||
lower_bin: lower bin
|
||||
upper_bin: upper bin. Defaults to None.
|
||||
|
||||
Returns:
|
||||
str: formatted bin labels
|
||||
"""
|
||||
if lower_bin is None:
|
||||
formatted_lower_bin = "null"
|
||||
else:
|
||||
formatted_lower_bin = format_large_string_numbers(lower_bin)
|
||||
if upper_bin is None:
|
||||
return f"{formatted_lower_bin} and up"
|
||||
return f"{formatted_lower_bin} to {format_large_string_numbers(upper_bin)}"
|
||||
|
||||
def fn(
|
||||
self,
|
||||
sample: Optional[DeclarativeMeta],
|
||||
@ -120,14 +142,17 @@ class Histogram(HybridMetric):
|
||||
condition = and_(col >= starting_bin_bound)
|
||||
case_stmts.append(
|
||||
func.count(case([(condition, col)])).label(
|
||||
f"{starting_bin_bound:.2f} and up"
|
||||
self._format_bin_labels(starting_bin_bound)
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
case_stmts.append(
|
||||
func.count(case([(condition, col)])).label(
|
||||
f"{starting_bin_bound:.2f} to {ending_bin_bound:.2f}"
|
||||
self._format_bin_labels(
|
||||
starting_bin_bound,
|
||||
ending_bin_bound,
|
||||
)
|
||||
)
|
||||
)
|
||||
starting_bin_bound = ending_bin_bound
|
||||
@ -176,9 +201,9 @@ class Histogram(HybridMetric):
|
||||
|
||||
bins = list(np.arange(num_bins) * bind_width + res_min)
|
||||
bins_label = [
|
||||
f"{bins[i]:.2f} to {bins[i+1]:.2f}"
|
||||
self._format_bin_labels(bins[i], bins[i + 1])
|
||||
if i < len(bins) - 1
|
||||
else f"{bins[i]:.2f} and up"
|
||||
else self._format_bin_labels(bins[i])
|
||||
for i in range(len(bins))
|
||||
]
|
||||
|
||||
|
@ -18,6 +18,7 @@ from __future__ import annotations
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from functools import wraps
|
||||
from math import floor, log
|
||||
from time import perf_counter
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
||||
|
||||
@ -327,3 +328,18 @@ def get_entity_tier_from_tags(tags: list[TagLabel]) -> Optional[str]:
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
|
||||
def format_large_string_numbers(number: Union[float, int]) -> str:
|
||||
"""Format large string number to a human readable format.
|
||||
(e.g. 1,000,000 -> 1M, 1,000,000,000 -> 1B, etc)
|
||||
|
||||
Args:
|
||||
number: number
|
||||
"""
|
||||
if number == 0:
|
||||
return "0"
|
||||
units = ["", "K", "M", "B", "T"]
|
||||
constant_k = 1000.0
|
||||
magnitude = int(floor(log(abs(number), constant_k)))
|
||||
return f"{number / constant_k**magnitude:.2f}{units[magnitude]}"
|
||||
|
60
ingestion/tests/unit/profiler/test_utils.py
Normal file
60
ingestion/tests/unit/profiler/test_utils.py
Normal file
@ -0,0 +1,60 @@
|
||||
# Copyright 2021 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Tests utils function for the profiler
|
||||
"""
|
||||
|
||||
from unittest import TestCase
|
||||
|
||||
from metadata.profiler.metrics.hybrid.histogram import Histogram
|
||||
|
||||
|
||||
class TestHistogramUtils(TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.histogram = Histogram()
|
||||
|
||||
def test_histogram_label_formatter_positive(self):
|
||||
"""test label formatter for histogram"""
|
||||
formatted_label = self.histogram._format_bin_labels(18927, 23456)
|
||||
assert formatted_label == "18.93K to 23.46K"
|
||||
|
||||
formatted_label = self.histogram._format_bin_labels(18927)
|
||||
assert formatted_label == "18.93K and up"
|
||||
|
||||
def test_histogram_label_formatter_negative(self):
|
||||
"""test label formatter for histogram for negative numbers"""
|
||||
formatted_label = self.histogram._format_bin_labels(-18927, -23456)
|
||||
assert formatted_label == "-18.93K to -23.46K"
|
||||
|
||||
formatted_label = self.histogram._format_bin_labels(-18927)
|
||||
assert formatted_label == "-18.93K and up"
|
||||
|
||||
def test_histogram_label_formatter_none(self):
|
||||
"""test label formatter for histogram for None"""
|
||||
formatted_label = self.histogram._format_bin_labels(None)
|
||||
assert formatted_label == "null and up"
|
||||
|
||||
def test_histogram_label_formatter_zero(self):
|
||||
"""test label formatter for histogram with zero"""
|
||||
formatted_label = self.histogram._format_bin_labels(0)
|
||||
assert formatted_label == "0 and up"
|
||||
|
||||
def test_histogram_label_formatter_nines(self):
|
||||
"""test label formatter for histogram for nines"""
|
||||
formatted_label = self.histogram._format_bin_labels(99999999)
|
||||
assert formatted_label == "100.00M and up"
|
||||
|
||||
def test_histogram_label_formatter_floats(self):
|
||||
"""test label formatter for histogram for floats"""
|
||||
formatted_label = self.histogram._format_bin_labels(167893.98542, 194993.98542)
|
||||
assert formatted_label == "167.89K to 194.99K"
|
@ -103,7 +103,6 @@ const DataDistributionHistogram = ({
|
||||
<CartesianGrid stroke={GRAPH_BACKGROUND_COLOR} />
|
||||
<XAxis
|
||||
dataKey="name"
|
||||
interval={0}
|
||||
padding={{ left: 16, right: 16 }}
|
||||
tick={{ fontSize: 12 }}
|
||||
/>
|
||||
|
Loading…
x
Reference in New Issue
Block a user