From d228a93fbf246bf14a733519ce663c9775e882e7 Mon Sep 17 00:00:00 2001 From: Teddy Date: Wed, 24 Jan 2024 09:19:19 +0100 Subject: [PATCH] fix: increase floating point precision (#14827) --- ingestion/src/metadata/utils/helpers.py | 2 +- .../tests/unit/profiler/pandas/test_profiler.py | 2 +- .../tests/unit/profiler/sqlalchemy/test_profiler.py | 2 +- ingestion/tests/unit/profiler/test_utils.py | 12 ++++++------ ingestion/tests/unit/test_helpers.py | 9 +++++++++ 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/ingestion/src/metadata/utils/helpers.py b/ingestion/src/metadata/utils/helpers.py index d60cb1a7d2c..f76c979934b 100644 --- a/ingestion/src/metadata/utils/helpers.py +++ b/ingestion/src/metadata/utils/helpers.py @@ -354,7 +354,7 @@ def format_large_string_numbers(number: Union[float, int]) -> str: units = ["", "K", "M", "B", "T"] constant_k = 1000.0 magnitude = int(floor(log(abs(number), constant_k))) - return f"{number / constant_k**magnitude:.2f}{units[magnitude]}" + return f"{number / constant_k**magnitude:.3f}{units[magnitude]}" def clean_uri(uri: str) -> str: diff --git a/ingestion/tests/unit/profiler/pandas/test_profiler.py b/ingestion/tests/unit/profiler/pandas/test_profiler.py index 2f07ae6acef..55f35ff6613 100644 --- a/ingestion/tests/unit/profiler/pandas/test_profiler.py +++ b/ingestion/tests/unit/profiler/pandas/test_profiler.py @@ -176,7 +176,7 @@ class ProfilerTest(TestCase): interQuartileRange=1.0, nonParametricSkew=0.0, histogram=Histogram( - boundaries=["30.00 to 31.26", "31.26 and up"], frequencies=[3, 1] + boundaries=["30.000 to 31.260", "31.260 and up"], frequencies=[3, 1] ), ) diff --git a/ingestion/tests/unit/profiler/sqlalchemy/test_profiler.py b/ingestion/tests/unit/profiler/sqlalchemy/test_profiler.py index 53806592632..1bbd8280e92 100644 --- a/ingestion/tests/unit/profiler/sqlalchemy/test_profiler.py +++ b/ingestion/tests/unit/profiler/sqlalchemy/test_profiler.py @@ -176,7 +176,7 @@ class ProfilerTest(TestCase): thirdQuartile=31.0, interQuartileRange=1.0, nonParametricSkew=2.0, - histogram=Histogram(boundaries=["30.00 and up"], frequencies=[2]), + histogram=Histogram(boundaries=["30.000 and up"], frequencies=[2]), ) def test_required_metrics(self): diff --git a/ingestion/tests/unit/profiler/test_utils.py b/ingestion/tests/unit/profiler/test_utils.py index 164c85f84b1..48175f7ada7 100644 --- a/ingestion/tests/unit/profiler/test_utils.py +++ b/ingestion/tests/unit/profiler/test_utils.py @@ -54,18 +54,18 @@ class TestHistogramUtils(TestCase): def test_histogram_label_formatter_positive(self): """test label formatter for histogram""" formatted_label = self.histogram._format_bin_labels(18927, 23456) - assert formatted_label == "18.93K to 23.46K" + assert formatted_label == "18.927K to 23.456K" formatted_label = self.histogram._format_bin_labels(18927) - assert formatted_label == "18.93K and up" + assert formatted_label == "18.927K and up" def test_histogram_label_formatter_negative(self): """test label formatter for histogram for negative numbers""" formatted_label = self.histogram._format_bin_labels(-18927, -23456) - assert formatted_label == "-18.93K to -23.46K" + assert formatted_label == "-18.927K to -23.456K" formatted_label = self.histogram._format_bin_labels(-18927) - assert formatted_label == "-18.93K and up" + assert formatted_label == "-18.927K and up" def test_histogram_label_formatter_none(self): """test label formatter for histogram for None""" @@ -80,12 +80,12 @@ class TestHistogramUtils(TestCase): def test_histogram_label_formatter_nines(self): """test label formatter for histogram for nines""" formatted_label = self.histogram._format_bin_labels(99999999) - assert formatted_label == "100.00M and up" + assert formatted_label == "100.000M and up" def test_histogram_label_formatter_floats(self): """test label formatter for histogram for floats""" formatted_label = self.histogram._format_bin_labels(167893.98542, 194993.98542) - assert formatted_label == "167.89K to 194.99K" + assert formatted_label == "167.894K to 194.994K" def test_is_array(): diff --git a/ingestion/tests/unit/test_helpers.py b/ingestion/tests/unit/test_helpers.py index 7a1ef45bfe1..d84f3a97360 100644 --- a/ingestion/tests/unit/test_helpers.py +++ b/ingestion/tests/unit/test_helpers.py @@ -24,6 +24,7 @@ from metadata.generated.schema.type.tagLabel import ( from metadata.utils.helpers import ( clean_up_starting_ending_double_quotes_in_string, deep_size_of_dict, + format_large_string_numbers, get_entity_tier_from_tags, is_safe_sql_query, list_to_dict, @@ -145,3 +146,11 @@ class TestHelpers(TestCase): self.assertTrue(is_safe_sql_query(select_query)) self.assertTrue(is_safe_sql_query(cte_query)) self.assertFalse(is_safe_sql_query(transaction_query)) + + def test_format_large_string_numbers(self): + """test format_large_string_numbers""" + assert format_large_string_numbers(1000) == "1.000K" + assert format_large_string_numbers(1001) == "1.001K" + assert format_large_string_numbers(1000000) == "1.000M" + assert format_large_string_numbers(1000000000) == "1.000B" + assert format_large_string_numbers(1000000000000) == "1.000T"