mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2026-01-07 13:07:22 +00:00
* Fix: Trino Column validation errors for highly complex fields * addressed copilot comms * fixed tests * fixed tests and addressed comms * missed file
This commit is contained in:
parent
d0739f0a23
commit
54dcdc7d82
@ -444,8 +444,13 @@ class ColumnTypeParser:
|
||||
if not data_type:
|
||||
return {"dataType": "NULL", "dataTypeDisplay": dtype}
|
||||
data_length = 1
|
||||
if re.match(r".*(\([\w]*\))", dtype):
|
||||
data_length = re.match(r".*\(([\w]*)\)", dtype).groups()[0]
|
||||
match = re.match(r".*\(([\w]*)\)", dtype)
|
||||
if match:
|
||||
try:
|
||||
data_length = int(match.groups()[0])
|
||||
except ValueError:
|
||||
# If conversion fails, keep it as 1 (default)
|
||||
data_length = 1
|
||||
return {
|
||||
"dataType": data_type,
|
||||
"dataTypeDisplay": data_type,
|
||||
|
||||
@ -160,7 +160,7 @@
|
||||
{
|
||||
"dataType": "VARCHAR",
|
||||
"dataTypeDisplay": "VARCHAR",
|
||||
"dataLength": "16777216"
|
||||
"dataLength": 16777216
|
||||
},
|
||||
{
|
||||
"dataType": "DATE",
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
from unittest import TestCase
|
||||
|
||||
from metadata.ingestion.source.database.column_type_parser import ColumnTypeParser
|
||||
from metadata.ingestion.source.database.trino.metadata import (
|
||||
parse_array_data_type,
|
||||
parse_row_data_type,
|
||||
@ -21,6 +22,7 @@ RAW_ROW_DATA_TYPES = [
|
||||
"row(a array(string))",
|
||||
"row(bigquerytestdatatype51 array(row(bigquery_test_datatype_511 array(string))))",
|
||||
"row(record_1 row(record_2 row(record_3 row(record_4 string))))",
|
||||
"row(splash row(color varchar, icon varchar, custom_story_board varchar, custom_android_splash varchar), android row(firebase_app_id varchar, google_services_json varchar, whitelisted_audience varchar, android_client_id varchar, build_preview row(download_url varchar, app_version varchar)), name varchar, app_icon varchar, url_scheme varchar, universal_link varchar, universal_links array(varchar), ios row(ios_client_id varchar))",
|
||||
]
|
||||
|
||||
EXPECTED_ROW_DATA_TYPES = [
|
||||
@ -29,10 +31,37 @@ EXPECTED_ROW_DATA_TYPES = [
|
||||
"struct<a:array<string>>",
|
||||
"struct<bigquerytestdatatype51:array<struct<bigquery_test_datatype_511:array<string>>>>",
|
||||
"struct<record_1:struct<record_2:struct<record_3:struct<record_4:string>>>>",
|
||||
"struct<splash:struct<color:varchar,icon:varchar,custom_story_board:varchar,custom_android_splash:varchar>,android:struct<firebase_app_id:varchar,google_services_json:varchar,whitelisted_audience:varchar,android_client_id:varchar,build_preview:struct<download_url:varchar,app_version:varchar>>,name:varchar,app_icon:varchar,url_scheme:varchar,universal_link:varchar,universal_links:array<varchar>,ios:struct<ios_client_id:varchar>>",
|
||||
]
|
||||
|
||||
# Test cases for dataLength conversion - only for types that should have dataLength
|
||||
DATA_LENGTH_TEST_CASES = [
|
||||
("varchar(20)", 20),
|
||||
("varchar(255)", 255),
|
||||
("char(10)", 10),
|
||||
("timestamp(3)", 3),
|
||||
("timestamp(6)", 6),
|
||||
("time(3)", 3),
|
||||
]
|
||||
|
||||
# Test cases for non-numeric length values (should default to 1)
|
||||
NON_NUMERIC_LENGTH_TEST_CASES = [
|
||||
("varchar(abc)", 1),
|
||||
("char(xyz)", 1),
|
||||
("timestamp(precision)", 1),
|
||||
]
|
||||
|
||||
# Test cases for types that should NOT have dataLength field
|
||||
NO_DATA_LENGTH_TEST_CASES = [
|
||||
"varchar", # No length specified
|
||||
"int", # No length specified
|
||||
"bigint", # No length specified
|
||||
"date", # No length specified
|
||||
"timestamp", # No length specified (without precision)
|
||||
]
|
||||
|
||||
|
||||
class SouceConnectionTest(TestCase):
|
||||
class SourceConnectionTest(TestCase):
|
||||
def test_array_datatype(self):
|
||||
for i in range(len(RAW_ARRAY_DATA_TYPES)):
|
||||
parsed_type = parse_array_data_type(RAW_ARRAY_DATA_TYPES[i])
|
||||
@ -42,3 +71,57 @@ class SouceConnectionTest(TestCase):
|
||||
for i in range(len(RAW_ROW_DATA_TYPES)):
|
||||
parsed_type = parse_row_data_type(RAW_ROW_DATA_TYPES[i])
|
||||
assert parsed_type == EXPECTED_ROW_DATA_TYPES[i]
|
||||
|
||||
def test_data_length_conversion(self):
|
||||
"""Test that dataLength is properly converted to integer for various data types"""
|
||||
parser = ColumnTypeParser()
|
||||
|
||||
for dtype, expected_length in DATA_LENGTH_TEST_CASES:
|
||||
with self.subTest(dtype=dtype):
|
||||
result = parser._parse_primitive_datatype_string(dtype)
|
||||
self.assertIn("dataLength", result)
|
||||
self.assertIsInstance(result["dataLength"], int)
|
||||
self.assertEqual(result["dataLength"], expected_length)
|
||||
self.assertEqual(result["dataType"], dtype.split("(")[0].upper())
|
||||
|
||||
def test_non_numeric_length_handling(self):
|
||||
"""Test that non-numeric length values default to 1"""
|
||||
parser = ColumnTypeParser()
|
||||
|
||||
for dtype, expected_length in NON_NUMERIC_LENGTH_TEST_CASES:
|
||||
with self.subTest(dtype=dtype):
|
||||
result = parser._parse_primitive_datatype_string(dtype)
|
||||
self.assertIn("dataLength", result)
|
||||
self.assertIsInstance(result["dataLength"], int)
|
||||
self.assertEqual(result["dataLength"], expected_length)
|
||||
self.assertEqual(result["dataType"], dtype.split("(")[0].upper())
|
||||
|
||||
def test_no_data_length_for_simple_types(self):
|
||||
"""Test that simple types without length specification don't have dataLength field"""
|
||||
parser = ColumnTypeParser()
|
||||
|
||||
for dtype in NO_DATA_LENGTH_TEST_CASES:
|
||||
with self.subTest(dtype=dtype):
|
||||
result = parser._parse_primitive_datatype_string(dtype)
|
||||
# These types should not have dataLength field
|
||||
self.assertNotIn("dataLength", result)
|
||||
|
||||
def test_data_length_type_safety(self):
|
||||
"""Test that dataLength is always an integer type when present"""
|
||||
parser = ColumnTypeParser()
|
||||
|
||||
test_dtypes = [
|
||||
"varchar(100)",
|
||||
"char(50)",
|
||||
"timestamp(3)",
|
||||
"decimal(10,2)",
|
||||
]
|
||||
|
||||
for dtype in test_dtypes:
|
||||
with self.subTest(dtype=dtype):
|
||||
result = parser._parse_primitive_datatype_string(dtype)
|
||||
# Verify dataLength is always an integer when present
|
||||
if "dataLength" in result:
|
||||
self.assertIsInstance(result["dataLength"], int)
|
||||
# Verify dataLength is never a string
|
||||
self.assertNotIsInstance(result["dataLength"], str)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user