mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-15 20:57:15 +00:00
fix(ingest): snowflake - graceful error handling in snowflake classification (#6568)
This commit is contained in:
parent
d4bf6ce3fc
commit
ec056211a8
@ -43,7 +43,7 @@ framework_common = {
|
||||
"stackprinter>=0.2.6",
|
||||
"tabulate",
|
||||
"progressbar2",
|
||||
"termcolor>=2.0.0",
|
||||
"termcolor>=1.0.0",
|
||||
"psutil>=5.8.0",
|
||||
"ratelimiter",
|
||||
"Deprecated",
|
||||
|
@ -39,7 +39,10 @@ class SnowflakeV2Config(SnowflakeConfig, SnowflakeUsageConfig):
|
||||
default=None, description="Not supported"
|
||||
)
|
||||
|
||||
classification: Optional[ClassificationConfig] = None
|
||||
classification: Optional[ClassificationConfig] = Field(
|
||||
default=None,
|
||||
description="For details, refer [Classification](../../../../metadata-ingestion/docs/dev_guides/classification.md).",
|
||||
)
|
||||
|
||||
@root_validator(pre=False)
|
||||
def validate_unsupported_configs(cls, values: Dict) -> Dict:
|
||||
|
@ -566,9 +566,16 @@ class SnowflakeV2Source(
|
||||
dataset_name = self.get_dataset_identifier(table.name, schema_name, db_name)
|
||||
|
||||
if self.is_classification_enabled_for_table(dataset_name):
|
||||
table.sample_data = self.get_sample_values_for_table(
|
||||
conn, table.name, schema_name, db_name
|
||||
)
|
||||
try:
|
||||
table.sample_data = self.get_sample_values_for_table(
|
||||
conn, table.name, schema_name, db_name
|
||||
)
|
||||
except Exception as e:
|
||||
self.warn(
|
||||
self.logger,
|
||||
dataset_name,
|
||||
f"unable to get table sample data due to error -> {e}",
|
||||
)
|
||||
|
||||
lineage_info = None
|
||||
if self.config.include_table_lineage:
|
||||
@ -757,9 +764,21 @@ class SnowflakeV2Source(
|
||||
self.snowflake_identifier(col) for col in table.sample_data.columns
|
||||
]
|
||||
logger.debug(f"Classifying Table {dataset_name}")
|
||||
self.classify_schema_fields(
|
||||
dataset_name, schema_metadata, table.sample_data.to_dict(orient="list")
|
||||
)
|
||||
|
||||
try:
|
||||
self.classify_schema_fields(
|
||||
dataset_name,
|
||||
schema_metadata,
|
||||
table.sample_data.to_dict(orient="list")
|
||||
if table.sample_data is not None
|
||||
else {},
|
||||
)
|
||||
except Exception as e:
|
||||
self.warn(
|
||||
self.logger,
|
||||
dataset_name,
|
||||
f"unable to classify table columns due to error -> {e}",
|
||||
)
|
||||
|
||||
return schema_metadata
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user