diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py index 302e2c39a20..3b4a7e1dc02 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py @@ -237,10 +237,17 @@ def resolve_trino_modified_type(type_string: str) -> Any: def resolve_athena_modified_type(type_string: str) -> Any: + # for cases like struct<...>, array<...>, map<...> + match_complex = re.match(r"([a-zA-Z]+)<.+>", type_string) # for cases like timestamp(3), decimal(10,0) - match = re.match(r"([a-zA-Z]+)\(.+\)", type_string) - if match: - modified_type_base: str = match.group(1) + match_simple = re.match(r"([a-zA-Z]+)\(.+\)", type_string) + + modified_type_base = "" + if match_complex: + modified_type_base = match_complex.group(1) + elif match_simple: + modified_type_base = match_simple.group(1) + if modified_type_base: return ATHENA_SQL_TYPES_MAP[modified_type_base] return ATHENA_SQL_TYPES_MAP[type_string] diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index 0a66a2382d7..a970ff6a5de 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -326,9 +326,9 @@ def test_resolve_trino_modified_type(data_type, expected_data_type): ("date", "date"), ("timestamp", "timestamp"), ("timestamp(3)", "timestamp"), - ("struct(x bigint, y double)", "struct"), - ("array(struct(x bigint, y double))", "array"), - ("map(varchar, varchar)", "map"), + ("struct", "struct"), + ("array>", "array"), + ("map", "map"), ], ) def test_resolve_athena_modified_type(data_type, expected_data_type):