From ec056211a828fec6e0320ae702624f1d4a48d7ab Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Tue, 29 Nov 2022 16:54:24 +0530 Subject: [PATCH] fix(ingest): snowflake - graceful error handling in snowflake classification (#6568) --- metadata-ingestion/setup.py | 2 +- .../source/snowflake/snowflake_config.py | 5 ++- .../source/snowflake/snowflake_v2.py | 31 +++++++++++++++---- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 86c08426f6..ec932e3a1e 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -43,7 +43,7 @@ framework_common = { "stackprinter>=0.2.6", "tabulate", "progressbar2", - "termcolor>=2.0.0", + "termcolor>=1.0.0", "psutil>=5.8.0", "ratelimiter", "Deprecated", diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py index a54c03ef2c..b6729bd43f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py @@ -39,7 +39,10 @@ class SnowflakeV2Config(SnowflakeConfig, SnowflakeUsageConfig): default=None, description="Not supported" ) - classification: Optional[ClassificationConfig] = None + classification: Optional[ClassificationConfig] = Field( + default=None, + description="For details, refer [Classification](../../../../metadata-ingestion/docs/dev_guides/classification.md).", + ) @root_validator(pre=False) def validate_unsupported_configs(cls, values: Dict) -> Dict: diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index b3cfd7b11d..5cee4cb719 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -566,9 +566,16 @@ class SnowflakeV2Source( dataset_name = self.get_dataset_identifier(table.name, schema_name, db_name) if self.is_classification_enabled_for_table(dataset_name): - table.sample_data = self.get_sample_values_for_table( - conn, table.name, schema_name, db_name - ) + try: + table.sample_data = self.get_sample_values_for_table( + conn, table.name, schema_name, db_name + ) + except Exception as e: + self.warn( + self.logger, + dataset_name, + f"unable to get table sample data due to error -> {e}", + ) lineage_info = None if self.config.include_table_lineage: @@ -757,9 +764,21 @@ class SnowflakeV2Source( self.snowflake_identifier(col) for col in table.sample_data.columns ] logger.debug(f"Classifying Table {dataset_name}") - self.classify_schema_fields( - dataset_name, schema_metadata, table.sample_data.to_dict(orient="list") - ) + + try: + self.classify_schema_fields( + dataset_name, + schema_metadata, + table.sample_data.to_dict(orient="list") + if table.sample_data is not None + else {}, + ) + except Exception as e: + self.warn( + self.logger, + dataset_name, + f"unable to classify table columns due to error -> {e}", + ) return schema_metadata