fix(ingestion): correct trino datatype handling (#5541)

Co-authored-by: Ravindra Lanka <rlanka@acryl.io>
This commit is contained in:
Marcin Szymański 2022-08-04 04:55:14 +01:00 committed by GitHub
parent 9a3ee1cb6b
commit fa42b59d9f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 23 additions and 4 deletions

View File

@ -228,9 +228,10 @@ def resolve_postgres_modified_type(type_string: str) -> Any:
def resolve_trino_modified_type(type_string: str) -> Any:
# for cases like timestamp(3)
if re.match(r"[a-zA-Z]+\([0-9]+\)", type_string):
modified_type_base = re.match(r"([a-zA-Z]+)\([0-9]+\)", type_string).group(1) # type: ignore
# for cases like timestamp(3), decimal(10,0), row(...)
match = re.match(r"([a-zA-Z]+)\(.+\)", type_string)
if match:
modified_type_base: str = match.group(1)
return TRINO_SQL_TYPES_MAP[modified_type_base]
else:
return TRINO_SQL_TYPES_MAP[type_string]
@ -337,4 +338,5 @@ TRINO_SQL_TYPES_MAP = {
"date": DateType,
"time": TimeType,
"timestamp": TimeType,
"row": RecordType,
}

View File

@ -644,8 +644,25 @@ def test_dbt_stateful_tests(pytestconfig, tmp_path, mock_time, mock_datahub_grap
@pytest.mark.parametrize(
"data_type, expected_data_type",
[
("timestamp(3)", "timestamp"),
("boolean", "boolean"),
("tinyint", "tinyint"),
("smallint", "smallint"),
("int", "int"),
("integer", "integer"),
("bigint", "bigint"),
("real", "real"),
("double", "double"),
("decimal(10,0)", "decimal"),
("varchar(20)", "varchar"),
("char", "char"),
("varbinary", "varbinary"),
("json", "json"),
("date", "date"),
("time", "time"),
("time(12)", "time"),
("timestamp", "timestamp"),
("timestamp(3)", "timestamp"),
("row(x bigint, y double)", "row"),
],
)
def test_resolve_trino_modified_type(data_type, expected_data_type):