feat(ingestion/sqlglot): add optional default_dialect parameter to sqlglot lineage (#10830)

This commit is contained in:
Nadav Gross 2024-07-16 22:28:14 +03:00 committed by GitHub
parent bb24651264
commit a8b07c5fe6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 15 additions and 3 deletions

View File

@ -1241,6 +1241,7 @@ class DataHubGraph(DatahubRestEmitter):
env: str = DEFAULT_ENV, env: str = DEFAULT_ENV,
default_db: Optional[str] = None, default_db: Optional[str] = None,
default_schema: Optional[str] = None, default_schema: Optional[str] = None,
default_dialect: Optional[str] = None,
) -> "SqlParsingResult": ) -> "SqlParsingResult":
from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage
@ -1254,6 +1255,7 @@ class DataHubGraph(DatahubRestEmitter):
schema_resolver=schema_resolver, schema_resolver=schema_resolver,
default_db=default_db, default_db=default_db,
default_schema=default_schema, default_schema=default_schema,
default_dialect=default_dialect,
) )
def create_tag(self, tag_name: str) -> str: def create_tag(self, tag_name: str) -> str:

View File

@ -843,8 +843,14 @@ def _sqlglot_lineage_inner(
schema_resolver: SchemaResolverInterface, schema_resolver: SchemaResolverInterface,
default_db: Optional[str] = None, default_db: Optional[str] = None,
default_schema: Optional[str] = None, default_schema: Optional[str] = None,
default_dialect: Optional[str] = None,
) -> SqlParsingResult: ) -> SqlParsingResult:
dialect = get_dialect(schema_resolver.platform)
if not default_dialect:
dialect = get_dialect(schema_resolver.platform)
else:
dialect = get_dialect(default_dialect)
if is_dialect_instance(dialect, "snowflake"): if is_dialect_instance(dialect, "snowflake"):
# in snowflake, table identifiers must be uppercased to match sqlglot's behavior. # in snowflake, table identifiers must be uppercased to match sqlglot's behavior.
if default_db: if default_db:
@ -1003,6 +1009,7 @@ def sqlglot_lineage(
schema_resolver: SchemaResolverInterface, schema_resolver: SchemaResolverInterface,
default_db: Optional[str] = None, default_db: Optional[str] = None,
default_schema: Optional[str] = None, default_schema: Optional[str] = None,
default_dialect: Optional[str] = None,
) -> SqlParsingResult: ) -> SqlParsingResult:
"""Parse a SQL statement and generate lineage information. """Parse a SQL statement and generate lineage information.
@ -1020,8 +1027,9 @@ def sqlglot_lineage(
can be brittle with respect to missing schema information and complex can be brittle with respect to missing schema information and complex
SQL logic like UNNESTs. SQL logic like UNNESTs.
The SQL dialect is inferred from the schema_resolver's platform. The The SQL dialect can be given as an argument called default_dialect or it can
set of supported dialects is the same as sqlglot's. See their be inferred from the schema_resolver's platform.
The set of supported dialects is the same as sqlglot's. See their
`documentation <https://sqlglot.com/sqlglot/dialects/dialect.html#Dialects>`_ `documentation <https://sqlglot.com/sqlglot/dialects/dialect.html#Dialects>`_
for the full list. for the full list.
@ -1035,6 +1043,7 @@ def sqlglot_lineage(
schema_resolver: The schema resolver to use for resolving table schemas. schema_resolver: The schema resolver to use for resolving table schemas.
default_db: The default database to use for unqualified table names. default_db: The default database to use for unqualified table names.
default_schema: The default schema to use for unqualified table names. default_schema: The default schema to use for unqualified table names.
default_dialect: A default dialect to override the dialect provided by 'schema_resolver'.
Returns: Returns:
A SqlParsingResult object containing the parsed lineage information. A SqlParsingResult object containing the parsed lineage information.
@ -1059,6 +1068,7 @@ def sqlglot_lineage(
schema_resolver=schema_resolver, schema_resolver=schema_resolver,
default_db=default_db, default_db=default_db,
default_schema=default_schema, default_schema=default_schema,
default_dialect=default_dialect,
) )
except Exception as e: except Exception as e:
return SqlParsingResult.make_from_error(e) return SqlParsingResult.make_from_error(e)