mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-11-03 20:19:31 +00:00 
			
		
		
		
	MINOR: handle "ignored exception" error message when running table diff (#17322)
* fix: table diff implemented a safe iterator to handle the sneaky `KeyError` * changed method to safe_table_diff_iterator * format --------- Co-authored-by: Pere Miquel Brull <peremiquelbrull@gmail.com>
This commit is contained in:
		
							parent
							
								
									af9f45b00a
								
							
						
					
					
						commit
						62c33e026c
					
				@ -9,6 +9,7 @@
 | 
			
		||||
#  See the License for the specific language governing permissions and
 | 
			
		||||
#  limitations under the License.
 | 
			
		||||
# pylint: disable=missing-module-docstring
 | 
			
		||||
import logging
 | 
			
		||||
import traceback
 | 
			
		||||
from itertools import islice
 | 
			
		||||
from typing import Dict, Iterable, List, Optional, Tuple
 | 
			
		||||
@ -59,6 +60,21 @@ class UnsupportedDialectError(Exception):
 | 
			
		||||
        super().__init__(f"Unsupported dialect in param {param}: {dialect}")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def masked(s: str, mask: bool = True) -> str:
 | 
			
		||||
    """Mask a string if masked is True otherwise return the string.
 | 
			
		||||
    Only for development purposes, do not use in production.
 | 
			
		||||
    Change it False if you want to see the data in the logs.
 | 
			
		||||
 | 
			
		||||
    Args:
 | 
			
		||||
        s: string to mask
 | 
			
		||||
        mask: boolean to mask the string
 | 
			
		||||
 | 
			
		||||
    Returns:
 | 
			
		||||
        masked string if mask is True otherwise return the string
 | 
			
		||||
    """
 | 
			
		||||
    return "***" if mask else s
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TableDiffValidator(BaseTestValidator, SQAValidatorMixin):
 | 
			
		||||
    """
 | 
			
		||||
    Compare two tables and fail if the number of differences exceeds a threshold
 | 
			
		||||
@ -110,12 +126,14 @@ class TableDiffValidator(BaseTestValidator, SQAValidatorMixin):
 | 
			
		||||
            stats = table_diff_iter.get_stats_dict()
 | 
			
		||||
            if stats["total"] > 0:
 | 
			
		||||
                logger.debug("Sample of failed rows:")
 | 
			
		||||
                for s in islice(self.get_table_diff(), 10):
 | 
			
		||||
                    # since the data can contiant sensitive information, we don't want to log it
 | 
			
		||||
                    # we can uncomment this line if we must see the data in the logs
 | 
			
		||||
                    # logger.debug(s)
 | 
			
		||||
                    # by default we will log the data masked
 | 
			
		||||
                    logger.debug([s[0], ["*" for _ in s[1]]])
 | 
			
		||||
                # depending on the data, this require scanning a lot of data
 | 
			
		||||
                # so we only log the sample in debug mode. data can be sensitive
 | 
			
		||||
                # so it is masked by default
 | 
			
		||||
                for s in islice(
 | 
			
		||||
                    self.safe_table_diff_iterator(),
 | 
			
		||||
                    10 if logger.level <= logging.DEBUG else 0,
 | 
			
		||||
                ):
 | 
			
		||||
                    logger.debug("%s", str([s[0]] + [masked(st) for st in s[1]]))
 | 
			
		||||
            test_case_result = self.get_row_diff_test_case_result(
 | 
			
		||||
                threshold,
 | 
			
		||||
                stats["total"],
 | 
			
		||||
@ -222,12 +240,10 @@ class TableDiffValidator(BaseTestValidator, SQAValidatorMixin):
 | 
			
		||||
            "where": self.get_where(),
 | 
			
		||||
        }
 | 
			
		||||
        logger.debug(
 | 
			
		||||
            "Calling table diff with parameters:"  # pylint: disable=consider-using-f-string
 | 
			
		||||
            " table1={}, table2={}, kwargs={}".format(
 | 
			
		||||
                table1.table_path,
 | 
			
		||||
                table2.table_path,
 | 
			
		||||
                ",".join(f"{k}={v}" for k, v in data_diff_kwargs.items()),
 | 
			
		||||
            )
 | 
			
		||||
            "Calling table diff with parameters: table1=%s, table2=%s, kwargs=%s",
 | 
			
		||||
            table1.table_path,
 | 
			
		||||
            table2.table_path,
 | 
			
		||||
            ",".join(f"{k}={v}" for k, v in data_diff_kwargs.items()),
 | 
			
		||||
        )
 | 
			
		||||
        return data_diff.diff_tables(table1, table2, **data_diff_kwargs)  # type: ignore
 | 
			
		||||
 | 
			
		||||
@ -400,3 +416,19 @@ class TableDiffValidator(BaseTestValidator, SQAValidatorMixin):
 | 
			
		||||
            if len(key_set) > limit:
 | 
			
		||||
                len(key_set)
 | 
			
		||||
        return len(key_set)
 | 
			
		||||
 | 
			
		||||
    def safe_table_diff_iterator(self) -> DiffResultWrapper:
 | 
			
		||||
        """A safe iterator object which properly closes the diff object when the generator is exhausted.
 | 
			
		||||
        Otherwise the data_diff library will continue to hold the connection open and eventually
 | 
			
		||||
        raise a KeyError.
 | 
			
		||||
        """
 | 
			
		||||
        gen = self.get_table_diff()
 | 
			
		||||
        try:
 | 
			
		||||
            yield from gen
 | 
			
		||||
        finally:
 | 
			
		||||
            try:
 | 
			
		||||
                gen.diff.close()
 | 
			
		||||
            except KeyError as ex:
 | 
			
		||||
                if str(ex) == "2":
 | 
			
		||||
                    # This is a known issue in data_diff where the diff object is closed
 | 
			
		||||
                    pass
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user