refactor(sql-parsing): rename default_dialect to override_dialect parameter (#14015)

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Harshal Sheth 2025-07-10 12:26:58 -04:00 committed by GitHub
parent dd87bff90e
commit 02da726cf5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 29 additions and 21 deletions

View File

@ -89,6 +89,11 @@ DataHub is a **schema-first, event-driven metadata platform** with three core la
- Frontend: Tests in `__tests__/` or `.test.tsx` files - Frontend: Tests in `__tests__/` or `.test.tsx` files
- Smoke tests go in the `smoke-test/` directory - Smoke tests go in the `smoke-test/` directory
### Commits
- Follow Conventional Commits format for commit messages
- Breaking Changes: Always update `docs/how/updating-datahub.md` for breaking changes. Write entries for non-technical audiences, reference the PR number, and focus on what users need to change rather than internal implementation details
## Key Documentation ## Key Documentation
**Essential reading:** **Essential reading:**
@ -107,4 +112,3 @@ DataHub is a **schema-first, event-driven metadata platform** with three core la
- Entity Registry is defined in YAML, not code (`entity-registry.yml`) - Entity Registry is defined in YAML, not code (`entity-registry.yml`)
- All metadata changes flow through the event streaming system - All metadata changes flow through the event streaming system
- GraphQL schema is generated from backend GMS APIs - GraphQL schema is generated from backend GMS APIs
- Follow Conventional Commits format for commit messages

View File

@ -37,6 +37,9 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
- `acryl-datahub-gx-plugin` - `acryl-datahub-gx-plugin`
- `acryl-datahub-dagster-plugin` (already required Python 3.9+) - `acryl-datahub-dagster-plugin` (already required Python 3.9+)
- #13619: The `acryl-datahub-airflow-plugin` has dropped support for Airflow versions less than 2.7. - #13619: The `acryl-datahub-airflow-plugin` has dropped support for Airflow versions less than 2.7.
- #14015: In the sql-queries source, the `default_dialect` configuration parameter has been renamed to `override_dialect`. This also affects the Python SDK methods:
- `DataHubGraph.parse_sql_lineage(default_dialect=...)``DataHubGraph.parse_sql_lineage(override_dialect=...)`
- `LineageClient.add_lineage_via_sql(default_dialect=...)``LineageClient.add_lineage_via_sql(override_dialect=...)`
### Known Issues ### Known Issues

View File

@ -1576,7 +1576,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
env: str = DEFAULT_ENV, env: str = DEFAULT_ENV,
default_db: Optional[str] = None, default_db: Optional[str] = None,
default_schema: Optional[str] = None, default_schema: Optional[str] = None,
default_dialect: Optional[str] = None, override_dialect: Optional[str] = None,
) -> "SqlParsingResult": ) -> "SqlParsingResult":
from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage
@ -1590,7 +1590,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
schema_resolver=schema_resolver, schema_resolver=schema_resolver,
default_db=default_db, default_db=default_db,
default_schema=default_schema, default_schema=default_schema,
default_dialect=default_dialect, override_dialect=override_dialect,
) )
def create_tag(self, tag_name: str) -> str: def create_tag(self, tag_name: str) -> str:

View File

@ -66,7 +66,7 @@ class SqlQueriesSourceConfig(PlatformInstanceConfigMixin, EnvConfigMixin):
description="The default schema to use for unqualified table names", description="The default schema to use for unqualified table names",
default=None, default=None,
) )
default_dialect: Optional[str] = Field( override_dialect: Optional[str] = Field(
description="The SQL dialect to use when parsing queries. Overrides automatic dialect detection.", description="The SQL dialect to use when parsing queries. Overrides automatic dialect detection.",
default=None, default=None,
) )
@ -181,7 +181,7 @@ class SqlQueriesSource(Source):
schema_resolver=self.schema_resolver, schema_resolver=self.schema_resolver,
default_db=self.config.default_db, default_db=self.config.default_db,
default_schema=self.config.default_schema, default_schema=self.config.default_schema,
default_dialect=self.config.default_dialect, override_dialect=self.config.override_dialect,
) )
if result.debug_info.table_error: if result.debug_info.table_error:
logger.info(f"Error parsing table lineage, {result.debug_info.table_error}") logger.info(f"Error parsing table lineage, {result.debug_info.table_error}")

View File

@ -478,7 +478,7 @@ class LineageClient:
env: str = "PROD", env: str = "PROD",
default_db: Optional[str] = None, default_db: Optional[str] = None,
default_schema: Optional[str] = None, default_schema: Optional[str] = None,
default_dialect: Optional[str] = None, override_dialect: Optional[str] = None,
) -> None: ) -> None:
"""Add lineage by parsing a SQL query.""" """Add lineage by parsing a SQL query."""
from datahub.sql_parsing.sqlglot_lineage import ( from datahub.sql_parsing.sqlglot_lineage import (
@ -494,7 +494,7 @@ class LineageClient:
platform_instance=platform_instance, platform_instance=platform_instance,
env=env, env=env,
graph=self._client._graph, graph=self._client._graph,
default_dialect=default_dialect, override_dialect=override_dialect,
) )
if parsed_result.debug_info.table_error: if parsed_result.debug_info.table_error:

View File

@ -56,6 +56,7 @@ from datahub.sql_parsing.sql_parsing_common import (
QueryTypeProps, QueryTypeProps,
) )
from datahub.sql_parsing.sqlglot_utils import ( from datahub.sql_parsing.sqlglot_utils import (
DialectOrStr,
get_dialect, get_dialect,
get_query_fingerprint_debug, get_query_fingerprint_debug,
is_dialect_instance, is_dialect_instance,
@ -1231,12 +1232,12 @@ def _sqlglot_lineage_inner(
schema_resolver: SchemaResolverInterface, schema_resolver: SchemaResolverInterface,
default_db: Optional[str] = None, default_db: Optional[str] = None,
default_schema: Optional[str] = None, default_schema: Optional[str] = None,
default_dialect: Optional[str] = None, override_dialect: Optional[DialectOrStr] = None,
) -> SqlParsingResult: ) -> SqlParsingResult:
if not default_dialect: if override_dialect:
dialect = get_dialect(schema_resolver.platform) dialect = get_dialect(override_dialect)
else: else:
dialect = get_dialect(default_dialect) dialect = get_dialect(schema_resolver.platform)
default_db = _normalize_db_or_schema(default_db, dialect) default_db = _normalize_db_or_schema(default_db, dialect)
default_schema = _normalize_db_or_schema(default_schema, dialect) default_schema = _normalize_db_or_schema(default_schema, dialect)
@ -1423,7 +1424,7 @@ def _sqlglot_lineage_nocache(
schema_resolver: SchemaResolverInterface, schema_resolver: SchemaResolverInterface,
default_db: Optional[str] = None, default_db: Optional[str] = None,
default_schema: Optional[str] = None, default_schema: Optional[str] = None,
default_dialect: Optional[str] = None, override_dialect: Optional[DialectOrStr] = None,
) -> SqlParsingResult: ) -> SqlParsingResult:
"""Parse a SQL statement and generate lineage information. """Parse a SQL statement and generate lineage information.
@ -1441,8 +1442,8 @@ def _sqlglot_lineage_nocache(
can be brittle with respect to missing schema information and complex can be brittle with respect to missing schema information and complex
SQL logic like UNNESTs. SQL logic like UNNESTs.
The SQL dialect can be given as an argument called default_dialect or it can The SQL dialect will be inferred from the schema_resolver's platform.
be inferred from the schema_resolver's platform. That inference can be overridden by passing an override_dialect argument.
The set of supported dialects is the same as sqlglot's. See their The set of supported dialects is the same as sqlglot's. See their
`documentation <https://sqlglot.com/sqlglot/dialects/dialect.html#Dialects>`_ `documentation <https://sqlglot.com/sqlglot/dialects/dialect.html#Dialects>`_
for the full list. for the full list.
@ -1457,7 +1458,7 @@ def _sqlglot_lineage_nocache(
schema_resolver: The schema resolver to use for resolving table schemas. schema_resolver: The schema resolver to use for resolving table schemas.
default_db: The default database to use for unqualified table names. default_db: The default database to use for unqualified table names.
default_schema: The default schema to use for unqualified table names. default_schema: The default schema to use for unqualified table names.
default_dialect: A default dialect to override the dialect provided by 'schema_resolver'. override_dialect: Override the dialect provided by 'schema_resolver'.
Returns: Returns:
A SqlParsingResult object containing the parsed lineage information. A SqlParsingResult object containing the parsed lineage information.
@ -1482,7 +1483,7 @@ def _sqlglot_lineage_nocache(
schema_resolver=schema_resolver, schema_resolver=schema_resolver,
default_db=default_db, default_db=default_db,
default_schema=default_schema, default_schema=default_schema,
default_dialect=default_dialect, override_dialect=override_dialect,
) )
except Exception as e: except Exception as e:
return SqlParsingResult.make_from_error(e) return SqlParsingResult.make_from_error(e)
@ -1520,15 +1521,15 @@ def sqlglot_lineage(
schema_resolver: SchemaResolverInterface, schema_resolver: SchemaResolverInterface,
default_db: Optional[str] = None, default_db: Optional[str] = None,
default_schema: Optional[str] = None, default_schema: Optional[str] = None,
default_dialect: Optional[str] = None, override_dialect: Optional[DialectOrStr] = None,
) -> SqlParsingResult: ) -> SqlParsingResult:
if schema_resolver.includes_temp_tables(): if schema_resolver.includes_temp_tables():
return _sqlglot_lineage_nocache( return _sqlglot_lineage_nocache(
sql, schema_resolver, default_db, default_schema, default_dialect sql, schema_resolver, default_db, default_schema, override_dialect
) )
else: else:
return _sqlglot_lineage_cached( return _sqlglot_lineage_cached(
sql, schema_resolver, default_db, default_schema, default_dialect sql, schema_resolver, default_db, default_schema, override_dialect
) )
@ -1580,7 +1581,7 @@ def create_lineage_sql_parsed_result(
default_schema: Optional[str] = None, default_schema: Optional[str] = None,
graph: Optional[DataHubGraph] = None, graph: Optional[DataHubGraph] = None,
schema_aware: bool = True, schema_aware: bool = True,
default_dialect: Optional[str] = None, override_dialect: Optional[DialectOrStr] = None,
) -> SqlParsingResult: ) -> SqlParsingResult:
schema_resolver = create_schema_resolver( schema_resolver = create_schema_resolver(
platform=platform, platform=platform,
@ -1600,7 +1601,7 @@ def create_lineage_sql_parsed_result(
schema_resolver=schema_resolver, schema_resolver=schema_resolver,
default_db=default_db, default_db=default_db,
default_schema=default_schema, default_schema=default_schema,
default_dialect=default_dialect, override_dialect=override_dialect,
) )
except Exception as e: except Exception as e:
return SqlParsingResult.make_from_error(e) return SqlParsingResult.make_from_error(e)