fix: increase floating point precision (#14827)

This commit is contained in:
Teddy 2024-01-24 09:19:19 +01:00 committed by GitHub
parent 115870b2af
commit d228a93fbf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 18 additions and 9 deletions

View File

@ -354,7 +354,7 @@ def format_large_string_numbers(number: Union[float, int]) -> str:
units = ["", "K", "M", "B", "T"]
constant_k = 1000.0
magnitude = int(floor(log(abs(number), constant_k)))
return f"{number / constant_k**magnitude:.2f}{units[magnitude]}"
return f"{number / constant_k**magnitude:.3f}{units[magnitude]}"
def clean_uri(uri: str) -> str:

View File

@ -176,7 +176,7 @@ class ProfilerTest(TestCase):
interQuartileRange=1.0,
nonParametricSkew=0.0,
histogram=Histogram(
boundaries=["30.00 to 31.26", "31.26 and up"], frequencies=[3, 1]
boundaries=["30.000 to 31.260", "31.260 and up"], frequencies=[3, 1]
),
)

View File

@ -176,7 +176,7 @@ class ProfilerTest(TestCase):
thirdQuartile=31.0,
interQuartileRange=1.0,
nonParametricSkew=2.0,
histogram=Histogram(boundaries=["30.00 and up"], frequencies=[2]),
histogram=Histogram(boundaries=["30.000 and up"], frequencies=[2]),
)
def test_required_metrics(self):

View File

@ -54,18 +54,18 @@ class TestHistogramUtils(TestCase):
def test_histogram_label_formatter_positive(self):
"""test label formatter for histogram"""
formatted_label = self.histogram._format_bin_labels(18927, 23456)
assert formatted_label == "18.93K to 23.46K"
assert formatted_label == "18.927K to 23.456K"
formatted_label = self.histogram._format_bin_labels(18927)
assert formatted_label == "18.93K and up"
assert formatted_label == "18.927K and up"
def test_histogram_label_formatter_negative(self):
"""test label formatter for histogram for negative numbers"""
formatted_label = self.histogram._format_bin_labels(-18927, -23456)
assert formatted_label == "-18.93K to -23.46K"
assert formatted_label == "-18.927K to -23.456K"
formatted_label = self.histogram._format_bin_labels(-18927)
assert formatted_label == "-18.93K and up"
assert formatted_label == "-18.927K and up"
def test_histogram_label_formatter_none(self):
"""test label formatter for histogram for None"""
@ -80,12 +80,12 @@ class TestHistogramUtils(TestCase):
def test_histogram_label_formatter_nines(self):
"""test label formatter for histogram for nines"""
formatted_label = self.histogram._format_bin_labels(99999999)
assert formatted_label == "100.00M and up"
assert formatted_label == "100.000M and up"
def test_histogram_label_formatter_floats(self):
"""test label formatter for histogram for floats"""
formatted_label = self.histogram._format_bin_labels(167893.98542, 194993.98542)
assert formatted_label == "167.89K to 194.99K"
assert formatted_label == "167.894K to 194.994K"
def test_is_array():

View File

@ -24,6 +24,7 @@ from metadata.generated.schema.type.tagLabel import (
from metadata.utils.helpers import (
clean_up_starting_ending_double_quotes_in_string,
deep_size_of_dict,
format_large_string_numbers,
get_entity_tier_from_tags,
is_safe_sql_query,
list_to_dict,
@ -145,3 +146,11 @@ class TestHelpers(TestCase):
self.assertTrue(is_safe_sql_query(select_query))
self.assertTrue(is_safe_sql_query(cte_query))
self.assertFalse(is_safe_sql_query(transaction_query))
def test_format_large_string_numbers(self):
"""test format_large_string_numbers"""
assert format_large_string_numbers(1000) == "1.000K"
assert format_large_string_numbers(1001) == "1.001K"
assert format_large_string_numbers(1000000) == "1.000M"
assert format_large_string_numbers(1000000000) == "1.000B"
assert format_large_string_numbers(1000000000000) == "1.000T"