diff --git a/ingestion/src/metadata/profiler/metrics/hybrid/histogram.py b/ingestion/src/metadata/profiler/metrics/hybrid/histogram.py
index 8eebbc12112..922318cccb7 100644
--- a/ingestion/src/metadata/profiler/metrics/hybrid/histogram.py
+++ b/ingestion/src/metadata/profiler/metrics/hybrid/histogram.py
@@ -24,6 +24,7 @@ from metadata.profiler.metrics.static.count import Count
from metadata.profiler.metrics.static.max import Max
from metadata.profiler.metrics.static.min import Min
from metadata.profiler.orm.registry import is_quantifiable
+from metadata.utils.helpers import format_large_string_numbers
from metadata.utils.logger import profiler_logger
logger = profiler_logger()
@@ -74,6 +75,27 @@ class Histogram(HybridMetric):
float(res_max),
) # Decimal to float
+ @staticmethod
+ def _format_bin_labels(
+ lower_bin: Union[float, int], upper_bin: Optional[Union[float, int]] = None
+ ) -> str:
+ """format bin labels
+
+ Args:
+ lower_bin: lower bin
+ upper_bin: upper bin. Defaults to None.
+
+ Returns:
+ str: formatted bin labels
+ """
+ if lower_bin is None:
+ formatted_lower_bin = "null"
+ else:
+ formatted_lower_bin = format_large_string_numbers(lower_bin)
+ if upper_bin is None:
+ return f"{formatted_lower_bin} and up"
+ return f"{formatted_lower_bin} to {format_large_string_numbers(upper_bin)}"
+
def fn(
self,
sample: Optional[DeclarativeMeta],
@@ -120,14 +142,17 @@ class Histogram(HybridMetric):
condition = and_(col >= starting_bin_bound)
case_stmts.append(
func.count(case([(condition, col)])).label(
- f"{starting_bin_bound:.2f} and up"
+ self._format_bin_labels(starting_bin_bound)
)
)
continue
case_stmts.append(
func.count(case([(condition, col)])).label(
- f"{starting_bin_bound:.2f} to {ending_bin_bound:.2f}"
+ self._format_bin_labels(
+ starting_bin_bound,
+ ending_bin_bound,
+ )
)
)
starting_bin_bound = ending_bin_bound
@@ -176,9 +201,9 @@ class Histogram(HybridMetric):
bins = list(np.arange(num_bins) * bind_width + res_min)
bins_label = [
- f"{bins[i]:.2f} to {bins[i+1]:.2f}"
+ self._format_bin_labels(bins[i], bins[i + 1])
if i < len(bins) - 1
- else f"{bins[i]:.2f} and up"
+ else self._format_bin_labels(bins[i])
for i in range(len(bins))
]
diff --git a/ingestion/src/metadata/utils/helpers.py b/ingestion/src/metadata/utils/helpers.py
index b52b9ba71f7..15ade5f1fe5 100644
--- a/ingestion/src/metadata/utils/helpers.py
+++ b/ingestion/src/metadata/utils/helpers.py
@@ -18,6 +18,7 @@ from __future__ import annotations
import re
from datetime import datetime, timedelta
from functools import wraps
+from math import floor, log
from time import perf_counter
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
@@ -327,3 +328,18 @@ def get_entity_tier_from_tags(tags: list[TagLabel]) -> Optional[str]:
),
None,
)
+
+
+def format_large_string_numbers(number: Union[float, int]) -> str:
+ """Format large string number to a human readable format.
+ (e.g. 1,000,000 -> 1M, 1,000,000,000 -> 1B, etc)
+
+ Args:
+ number: number
+ """
+ if number == 0:
+ return "0"
+ units = ["", "K", "M", "B", "T"]
+ constant_k = 1000.0
+ magnitude = int(floor(log(abs(number), constant_k)))
+ return f"{number / constant_k**magnitude:.2f}{units[magnitude]}"
diff --git a/ingestion/tests/unit/profiler/test_utils.py b/ingestion/tests/unit/profiler/test_utils.py
new file mode 100644
index 00000000000..03a541d0b15
--- /dev/null
+++ b/ingestion/tests/unit/profiler/test_utils.py
@@ -0,0 +1,60 @@
+# Copyright 2021 Collate
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Tests utils function for the profiler
+"""
+
+from unittest import TestCase
+
+from metadata.profiler.metrics.hybrid.histogram import Histogram
+
+
+class TestHistogramUtils(TestCase):
+ @classmethod
+ def setUpClass(cls):
+ cls.histogram = Histogram()
+
+ def test_histogram_label_formatter_positive(self):
+ """test label formatter for histogram"""
+ formatted_label = self.histogram._format_bin_labels(18927, 23456)
+ assert formatted_label == "18.93K to 23.46K"
+
+ formatted_label = self.histogram._format_bin_labels(18927)
+ assert formatted_label == "18.93K and up"
+
+ def test_histogram_label_formatter_negative(self):
+ """test label formatter for histogram for negative numbers"""
+ formatted_label = self.histogram._format_bin_labels(-18927, -23456)
+ assert formatted_label == "-18.93K to -23.46K"
+
+ formatted_label = self.histogram._format_bin_labels(-18927)
+ assert formatted_label == "-18.93K and up"
+
+ def test_histogram_label_formatter_none(self):
+ """test label formatter for histogram for None"""
+ formatted_label = self.histogram._format_bin_labels(None)
+ assert formatted_label == "null and up"
+
+ def test_histogram_label_formatter_zero(self):
+ """test label formatter for histogram with zero"""
+ formatted_label = self.histogram._format_bin_labels(0)
+ assert formatted_label == "0 and up"
+
+ def test_histogram_label_formatter_nines(self):
+ """test label formatter for histogram for nines"""
+ formatted_label = self.histogram._format_bin_labels(99999999)
+ assert formatted_label == "100.00M and up"
+
+ def test_histogram_label_formatter_floats(self):
+ """test label formatter for histogram for floats"""
+ formatted_label = self.histogram._format_bin_labels(167893.98542, 194993.98542)
+ assert formatted_label == "167.89K to 194.99K"
diff --git a/openmetadata-ui/src/main/resources/ui/src/components/Chart/DataDistributionHistogram.component.tsx b/openmetadata-ui/src/main/resources/ui/src/components/Chart/DataDistributionHistogram.component.tsx
index b05f514b56d..03655e9bac8 100644
--- a/openmetadata-ui/src/main/resources/ui/src/components/Chart/DataDistributionHistogram.component.tsx
+++ b/openmetadata-ui/src/main/resources/ui/src/components/Chart/DataDistributionHistogram.component.tsx
@@ -103,7 +103,6 @@ const DataDistributionHistogram = ({