fix: increase floating point precision (#14827)

This commit is contained in:
Teddy 2024-01-24 09:19:19 +01:00 committed by GitHub
parent 115870b2af
commit d228a93fbf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 18 additions and 9 deletions

View File

@ -354,7 +354,7 @@ def format_large_string_numbers(number: Union[float, int]) -> str:
units = ["", "K", "M", "B", "T"] units = ["", "K", "M", "B", "T"]
constant_k = 1000.0 constant_k = 1000.0
magnitude = int(floor(log(abs(number), constant_k))) magnitude = int(floor(log(abs(number), constant_k)))
return f"{number / constant_k**magnitude:.2f}{units[magnitude]}" return f"{number / constant_k**magnitude:.3f}{units[magnitude]}"
def clean_uri(uri: str) -> str: def clean_uri(uri: str) -> str:

View File

@ -176,7 +176,7 @@ class ProfilerTest(TestCase):
interQuartileRange=1.0, interQuartileRange=1.0,
nonParametricSkew=0.0, nonParametricSkew=0.0,
histogram=Histogram( histogram=Histogram(
boundaries=["30.00 to 31.26", "31.26 and up"], frequencies=[3, 1] boundaries=["30.000 to 31.260", "31.260 and up"], frequencies=[3, 1]
), ),
) )

View File

@ -176,7 +176,7 @@ class ProfilerTest(TestCase):
thirdQuartile=31.0, thirdQuartile=31.0,
interQuartileRange=1.0, interQuartileRange=1.0,
nonParametricSkew=2.0, nonParametricSkew=2.0,
histogram=Histogram(boundaries=["30.00 and up"], frequencies=[2]), histogram=Histogram(boundaries=["30.000 and up"], frequencies=[2]),
) )
def test_required_metrics(self): def test_required_metrics(self):

View File

@ -54,18 +54,18 @@ class TestHistogramUtils(TestCase):
def test_histogram_label_formatter_positive(self): def test_histogram_label_formatter_positive(self):
"""test label formatter for histogram""" """test label formatter for histogram"""
formatted_label = self.histogram._format_bin_labels(18927, 23456) formatted_label = self.histogram._format_bin_labels(18927, 23456)
assert formatted_label == "18.93K to 23.46K" assert formatted_label == "18.927K to 23.456K"
formatted_label = self.histogram._format_bin_labels(18927) formatted_label = self.histogram._format_bin_labels(18927)
assert formatted_label == "18.93K and up" assert formatted_label == "18.927K and up"
def test_histogram_label_formatter_negative(self): def test_histogram_label_formatter_negative(self):
"""test label formatter for histogram for negative numbers""" """test label formatter for histogram for negative numbers"""
formatted_label = self.histogram._format_bin_labels(-18927, -23456) formatted_label = self.histogram._format_bin_labels(-18927, -23456)
assert formatted_label == "-18.93K to -23.46K" assert formatted_label == "-18.927K to -23.456K"
formatted_label = self.histogram._format_bin_labels(-18927) formatted_label = self.histogram._format_bin_labels(-18927)
assert formatted_label == "-18.93K and up" assert formatted_label == "-18.927K and up"
def test_histogram_label_formatter_none(self): def test_histogram_label_formatter_none(self):
"""test label formatter for histogram for None""" """test label formatter for histogram for None"""
@ -80,12 +80,12 @@ class TestHistogramUtils(TestCase):
def test_histogram_label_formatter_nines(self): def test_histogram_label_formatter_nines(self):
"""test label formatter for histogram for nines""" """test label formatter for histogram for nines"""
formatted_label = self.histogram._format_bin_labels(99999999) formatted_label = self.histogram._format_bin_labels(99999999)
assert formatted_label == "100.00M and up" assert formatted_label == "100.000M and up"
def test_histogram_label_formatter_floats(self): def test_histogram_label_formatter_floats(self):
"""test label formatter for histogram for floats""" """test label formatter for histogram for floats"""
formatted_label = self.histogram._format_bin_labels(167893.98542, 194993.98542) formatted_label = self.histogram._format_bin_labels(167893.98542, 194993.98542)
assert formatted_label == "167.89K to 194.99K" assert formatted_label == "167.894K to 194.994K"
def test_is_array(): def test_is_array():

View File

@ -24,6 +24,7 @@ from metadata.generated.schema.type.tagLabel import (
from metadata.utils.helpers import ( from metadata.utils.helpers import (
clean_up_starting_ending_double_quotes_in_string, clean_up_starting_ending_double_quotes_in_string,
deep_size_of_dict, deep_size_of_dict,
format_large_string_numbers,
get_entity_tier_from_tags, get_entity_tier_from_tags,
is_safe_sql_query, is_safe_sql_query,
list_to_dict, list_to_dict,
@ -145,3 +146,11 @@ class TestHelpers(TestCase):
self.assertTrue(is_safe_sql_query(select_query)) self.assertTrue(is_safe_sql_query(select_query))
self.assertTrue(is_safe_sql_query(cte_query)) self.assertTrue(is_safe_sql_query(cte_query))
self.assertFalse(is_safe_sql_query(transaction_query)) self.assertFalse(is_safe_sql_query(transaction_query))
def test_format_large_string_numbers(self):
"""test format_large_string_numbers"""
assert format_large_string_numbers(1000) == "1.000K"
assert format_large_string_numbers(1001) == "1.001K"
assert format_large_string_numbers(1000000) == "1.000M"
assert format_large_string_numbers(1000000000) == "1.000B"
assert format_large_string_numbers(1000000000000) == "1.000T"