Added condition for datatype=array without type consistency (#19312)

This commit is contained in:
Akash Verma 2025-01-14 14:05:42 +05:30 committed by GitHub
parent f1e015c89d
commit 072711a563
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 26 additions and 3 deletions

View File

@ -227,6 +227,17 @@ class SupersetSourceMixin(DashboardServiceSource):
"""clean datatype of column fetched from superset"""
return datatype.replace("()", "")
def parse_array_data_type(self, col_parse: dict) -> Optional[str]:
"""
Set arrayDataType to UNKNOWN for Snowflake table array columns
to prevent validation error requiring non-null arrayDataType
"""
if col_parse["dataType"] == "ARRAY" and not col_parse.get("arrayDataType"):
return DataType.UNKNOWN
if col_parse.get("arrayDataType"):
return DataType(col_parse["arrayDataType"])
return None
def get_column_info(
self, data_source: List[Union[DataSourceResult, FetchColumn]]
) -> Optional[List[Column]]:
@ -247,9 +258,7 @@ class SupersetSourceMixin(DashboardServiceSource):
parsed_fields = Column(
dataTypeDisplay=field.type,
dataType=col_parse["dataType"],
arrayDataType=DataType(col_parse["arrayDataType"])
if col_parse.get("arrayDataType")
else None,
arrayDataType=self.parse_array_data_type(col_parse),
children=list(col_parse["children"])
if col_parse.get("children")
else None,

View File

@ -17,6 +17,7 @@ import os
from unittest import TestCase
from metadata.generated.schema.entity.data.table import DataType
from metadata.ingestion.source.dashboard.superset.mixin import SupersetSourceMixin
from metadata.ingestion.source.database.column_type_parser import ColumnTypeParser
from metadata.utils.datalake.datalake_utils import GenericDataFrameColumnParser
@ -132,3 +133,16 @@ def test_check_datalake_type():
assert assert_col_type_dict.get(
column_name
) == GenericDataFrameColumnParser.fetch_col_types(df, column_name)
def test_superset_parse_array_data_type():
"""Test the parse_array_data_type method with different input scenarios"""
col_parse = {"dataType": "ARRAY", "arrayDataType": "STRING"}
result = SupersetSourceMixin.parse_array_data_type(None, col_parse)
assert result == DataType.STRING
col_parse = {"dataType": "ARRAY", "arrayDataType": None}
result = SupersetSourceMixin.parse_array_data_type(None, col_parse)
assert result == DataType.UNKNOWN
col_parse = {"dataType": "STRING", "arrayDataType": None}
result = SupersetSourceMixin.parse_array_data_type(None, col_parse)
assert result == None