From 5b9fd977eb60d4a1a41d6aa7cadfb7bce01a93ab Mon Sep 17 00:00:00 2001 From: Serhii Dimchenko <39801237+svdimchenko@users.noreply.github.com> Date: Thu, 22 Jun 2023 11:40:27 +0200 Subject: [PATCH] fix(ingest/dbt-athena): dbt-athena types mapping for complex types (#8264) Co-authored-by: Harshal Sheth --- .../src/datahub/ingestion/source/sql/sql_types.py | 13 ++++++++++--- .../tests/integration/dbt/test_dbt.py | 6 +++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py index 302e2c39a2..3b4a7e1dc0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py @@ -237,10 +237,17 @@ def resolve_trino_modified_type(type_string: str) -> Any: def resolve_athena_modified_type(type_string: str) -> Any: + # for cases like struct<...>, array<...>, map<...> + match_complex = re.match(r"([a-zA-Z]+)<.+>", type_string) # for cases like timestamp(3), decimal(10,0) - match = re.match(r"([a-zA-Z]+)\(.+\)", type_string) - if match: - modified_type_base: str = match.group(1) + match_simple = re.match(r"([a-zA-Z]+)\(.+\)", type_string) + + modified_type_base = "" + if match_complex: + modified_type_base = match_complex.group(1) + elif match_simple: + modified_type_base = match_simple.group(1) + if modified_type_base: return ATHENA_SQL_TYPES_MAP[modified_type_base] return ATHENA_SQL_TYPES_MAP[type_string] diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index 0a66a2382d..a970ff6a5d 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -326,9 +326,9 @@ def test_resolve_trino_modified_type(data_type, expected_data_type): ("date", "date"), ("timestamp", "timestamp"), ("timestamp(3)", "timestamp"), - ("struct(x bigint, y double)", "struct"), - ("array(struct(x bigint, y double))", "array"), - ("map(varchar, varchar)", "map"), + ("struct", "struct"), + ("array>", "array"), + ("map", "map"), ], ) def test_resolve_athena_modified_type(data_type, expected_data_type):