fix(ingest): consider sql parsing fallback as failure (#11896)

This commit is contained in:
Harshal Sheth 2024-11-19 15:06:16 -08:00 committed by GitHub
parent 44affd7f82
commit 85c8e605be
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 35 additions and 1 deletions

View File

@ -268,7 +268,9 @@ def sql_lineage(
)
logger.debug("Sql parsing debug info: %s", lineage.debug_info)
if lineage.debug_info.error:
if lineage.debug_info.table_error:
raise lineage.debug_info.table_error
elif lineage.debug_info.error:
logger.debug("Sql parsing error details", exc_info=lineage.debug_info.error)
click.echo(lineage.json(indent=4))

View File

@ -904,6 +904,15 @@ def _sqlglot_lineage_inner(
logger.debug("Parsing lineage from sql statement: %s", sql)
statement = parse_statement(sql, dialect=dialect)
if isinstance(statement, sqlglot.exp.Command):
# For unsupported syntax, sqlglot will usually fallback to parsing as a Command.
# This is effectively a parsing error, and we won't get any lineage from it.
# See https://github.com/tobymao/sqlglot/commit/3a13fdf4e597a2f0a3f9fc126a129183fe98262f
# and https://github.com/tobymao/sqlglot/pull/2874
raise UnsupportedStatementTypeError(
f"Got unsupported syntax for statement: {sql}"
)
original_statement, statement = statement, statement.copy()
# logger.debug(
# "Formatted sql statement: %s",

View File

@ -0,0 +1,12 @@
{
"query_type": "UNKNOWN",
"query_type_props": {},
"query_fingerprint": null,
"in_tables": [],
"out_tables": [],
"column_lineage": null,
"debug_info": {
"confidence": 0.0,
"generalized_statement": null
}
}

View File

@ -1268,3 +1268,14 @@ WHERE rank_ = 1
dialect="bigquery",
expected_file=RESOURCE_DIR / "test_bigquery_subquery_column_inference.json",
)
def test_sqlite_attach_database() -> None:
assert_sql_result(
"""\
ATTACH DATABASE ':memory:' AS aux1
""",
dialect="sqlite",
expected_file=RESOURCE_DIR / "test_sqlite_attach_database.json",
allow_table_error=True,
)