From 2e3328fce0643f0e4d50bcc4fe6969aaa8c3559a Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 5 May 2025 09:24:50 -0700 Subject: [PATCH] chore(ingest): bump sqlglot dep (#13411) --- metadata-ingestion/setup.py | 2 +- metadata-ingestion/src/datahub/sql_parsing/_sqlglot_patch.py | 3 +-- .../sql_parsing/goldens/test_postgres_update_subselect.json | 2 +- .../goldens/test_snowflake_default_normalization.json | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index ee28c2ead2..86e48c90f5 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -105,7 +105,7 @@ sqlglot_lib = { # We heavily monkeypatch sqlglot. # We used to maintain an acryl-sqlglot fork: https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:main?expand=1 # but not longer do. - "sqlglot[rs]==26.6.0", + "sqlglot[rs]==26.16.4", "patchy==2.8.0", } diff --git a/metadata-ingestion/src/datahub/sql_parsing/_sqlglot_patch.py b/metadata-ingestion/src/datahub/sql_parsing/_sqlglot_patch.py index af1a886549..88cc68ef85 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/_sqlglot_patch.py +++ b/metadata-ingestion/src/datahub/sql_parsing/_sqlglot_patch.py @@ -163,8 +163,7 @@ def _patch_lineage() -> None: - source_columns = set(find_all_in_scope(select, exp.Column)) + source_columns = list(find_all_in_scope(select, exp.Column)) -- # If the source is a UDTF find columns used in the UTDF to generate the table -+ # If the source is a UDTF find columns used in the UDTF to generate the table + # If the source is a UDTF find columns used in the UDTF to generate the table + source = scope.expression if isinstance(source, exp.UDTF): - source_columns |= set(source.find_all(exp.Column)) diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_postgres_update_subselect.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_postgres_update_subselect.json index 129912dd86..daf5a688de 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_postgres_update_subselect.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_postgres_update_subselect.json @@ -19,7 +19,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "native_column_type": "VARCHAR" + "native_column_type": "VARCHAR(16777216)" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json index c56ce87261..0d742d712b 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json @@ -75,7 +75,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "native_column_type": "DECIMAL" + "native_column_type": "DECIMAL(10, 2)" }, "upstreams": [ {