mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-02 03:29:03 +00:00
MINOR: handle "ignored exception" error message when running table diff (#17322)
* fix: table diff implemented a safe iterator to handle the sneaky `KeyError` * changed method to safe_table_diff_iterator * format --------- Co-authored-by: Pere Miquel Brull <peremiquelbrull@gmail.com>
This commit is contained in:
parent
af9f45b00a
commit
62c33e026c
@ -9,6 +9,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# pylint: disable=missing-module-docstring
|
||||
import logging
|
||||
import traceback
|
||||
from itertools import islice
|
||||
from typing import Dict, Iterable, List, Optional, Tuple
|
||||
@ -59,6 +60,21 @@ class UnsupportedDialectError(Exception):
|
||||
super().__init__(f"Unsupported dialect in param {param}: {dialect}")
|
||||
|
||||
|
||||
def masked(s: str, mask: bool = True) -> str:
|
||||
"""Mask a string if masked is True otherwise return the string.
|
||||
Only for development purposes, do not use in production.
|
||||
Change it False if you want to see the data in the logs.
|
||||
|
||||
Args:
|
||||
s: string to mask
|
||||
mask: boolean to mask the string
|
||||
|
||||
Returns:
|
||||
masked string if mask is True otherwise return the string
|
||||
"""
|
||||
return "***" if mask else s
|
||||
|
||||
|
||||
class TableDiffValidator(BaseTestValidator, SQAValidatorMixin):
|
||||
"""
|
||||
Compare two tables and fail if the number of differences exceeds a threshold
|
||||
@ -110,12 +126,14 @@ class TableDiffValidator(BaseTestValidator, SQAValidatorMixin):
|
||||
stats = table_diff_iter.get_stats_dict()
|
||||
if stats["total"] > 0:
|
||||
logger.debug("Sample of failed rows:")
|
||||
for s in islice(self.get_table_diff(), 10):
|
||||
# since the data can contiant sensitive information, we don't want to log it
|
||||
# we can uncomment this line if we must see the data in the logs
|
||||
# logger.debug(s)
|
||||
# by default we will log the data masked
|
||||
logger.debug([s[0], ["*" for _ in s[1]]])
|
||||
# depending on the data, this require scanning a lot of data
|
||||
# so we only log the sample in debug mode. data can be sensitive
|
||||
# so it is masked by default
|
||||
for s in islice(
|
||||
self.safe_table_diff_iterator(),
|
||||
10 if logger.level <= logging.DEBUG else 0,
|
||||
):
|
||||
logger.debug("%s", str([s[0]] + [masked(st) for st in s[1]]))
|
||||
test_case_result = self.get_row_diff_test_case_result(
|
||||
threshold,
|
||||
stats["total"],
|
||||
@ -222,12 +240,10 @@ class TableDiffValidator(BaseTestValidator, SQAValidatorMixin):
|
||||
"where": self.get_where(),
|
||||
}
|
||||
logger.debug(
|
||||
"Calling table diff with parameters:" # pylint: disable=consider-using-f-string
|
||||
" table1={}, table2={}, kwargs={}".format(
|
||||
table1.table_path,
|
||||
table2.table_path,
|
||||
",".join(f"{k}={v}" for k, v in data_diff_kwargs.items()),
|
||||
)
|
||||
"Calling table diff with parameters: table1=%s, table2=%s, kwargs=%s",
|
||||
table1.table_path,
|
||||
table2.table_path,
|
||||
",".join(f"{k}={v}" for k, v in data_diff_kwargs.items()),
|
||||
)
|
||||
return data_diff.diff_tables(table1, table2, **data_diff_kwargs) # type: ignore
|
||||
|
||||
@ -400,3 +416,19 @@ class TableDiffValidator(BaseTestValidator, SQAValidatorMixin):
|
||||
if len(key_set) > limit:
|
||||
len(key_set)
|
||||
return len(key_set)
|
||||
|
||||
def safe_table_diff_iterator(self) -> DiffResultWrapper:
|
||||
"""A safe iterator object which properly closes the diff object when the generator is exhausted.
|
||||
Otherwise the data_diff library will continue to hold the connection open and eventually
|
||||
raise a KeyError.
|
||||
"""
|
||||
gen = self.get_table_diff()
|
||||
try:
|
||||
yield from gen
|
||||
finally:
|
||||
try:
|
||||
gen.diff.close()
|
||||
except KeyError as ex:
|
||||
if str(ex) == "2":
|
||||
# This is a known issue in data_diff where the diff object is closed
|
||||
pass
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user