MINOR - Fix General Profiler Bugs (#17995)

* fix import issue

* fix: better handle None values in profiler processing

* fix: profiler errors

* chore: fix comment

* style: fix python linting failure

* fix: null byte error with the database client

---------

Co-authored-by: Chirag Madlani <12962843+chirag-madlani@users.noreply.github.com>
(cherry picked from commit 8dc6b7d282a700c2a3d4659f904a3966e16843a2)
This commit is contained in:
Teddy 2024-09-27 14:36:30 +02:00 committed by Teddy Crepineau
parent d83e3a6de8
commit d3325cda93
3 changed files with 23 additions and 7 deletions

View File

@ -50,8 +50,9 @@ class NullRatio(ComposedMetric):
results of other Metrics
"""
count = res.get(Count.name())
null_count = res.get(NullCount.name())
if count + null_count == 0:
count = res.get(Count.name(), 0)
null_count = res.get(NullCount.name(), 0)
total = count + null_count
if total == 0:
return None
return null_count / (null_count + count)
return null_count / total

View File

@ -176,6 +176,8 @@ class SnowflakeTableMetricComputer(BaseTableMetricComputer):
)
rest = self._runner._session.execute(query).first()
if not rest:
return None
if rest.rowCount is None:
# if we don't have any row count, fallback to the base logic
return super().compute()

View File

@ -22,6 +22,7 @@ from sqlalchemy.sql.sqltypes import String, TypeDecorator
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
NULL_BYTE = "\x00"
class HexByteString(TypeDecorator):
@ -63,10 +64,22 @@ class HexByteString(TypeDecorator):
detected_encoding = chardet.detect(bytes_value).get("encoding")
if detected_encoding:
try:
value = bytes_value.decode(encoding=detected_encoding)
return value
# Decode the bytes value with the detected encoding and replace errors with "?"
# if bytes cannot be decoded e.g. b"\x66\x67\x67\x9c", if detected_encoding="utf-8"
# will result in 'foo<6F>' (instead of failing)
str_value = bytes_value.decode(
encoding=detected_encoding, errors="replace"
)
# Replace NULL_BYTE with empty string to avoid errors with
# the database client (should be O(n))
str_value = (
str_value.replace(NULL_BYTE, "")
if NULL_BYTE in str_value
else str_value
)
return str_value
except Exception as exc:
logger.debug("Failed to parse bytes valud as string: %s", exc)
logger.debug("Failed to parse bytes value as string: %s", exc)
logger.debug(traceback.format_exc())
return value.hex()