refactor(sql-parsing): rename default_dialect to override_dialect parameter (#14015)

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Harshal Sheth 2025-07-10 12:26:58 -04:00 committed by GitHub
parent dd87bff90e
commit 02da726cf5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 29 additions and 21 deletions

View File

@ -89,6 +89,11 @@ DataHub is a **schema-first, event-driven metadata platform** with three core la
- Frontend: Tests in `__tests__/` or `.test.tsx` files
- Smoke tests go in the `smoke-test/` directory
### Commits
- Follow Conventional Commits format for commit messages
- Breaking Changes: Always update `docs/how/updating-datahub.md` for breaking changes. Write entries for non-technical audiences, reference the PR number, and focus on what users need to change rather than internal implementation details
## Key Documentation
**Essential reading:**
@ -107,4 +112,3 @@ DataHub is a **schema-first, event-driven metadata platform** with three core la
- Entity Registry is defined in YAML, not code (`entity-registry.yml`)
- All metadata changes flow through the event streaming system
- GraphQL schema is generated from backend GMS APIs
- Follow Conventional Commits format for commit messages

View File

@ -37,6 +37,9 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
- `acryl-datahub-gx-plugin`
- `acryl-datahub-dagster-plugin` (already required Python 3.9+)
- #13619: The `acryl-datahub-airflow-plugin` has dropped support for Airflow versions less than 2.7.
- #14015: In the sql-queries source, the `default_dialect` configuration parameter has been renamed to `override_dialect`. This also affects the Python SDK methods:
- `DataHubGraph.parse_sql_lineage(default_dialect=...)``DataHubGraph.parse_sql_lineage(override_dialect=...)`
- `LineageClient.add_lineage_via_sql(default_dialect=...)``LineageClient.add_lineage_via_sql(override_dialect=...)`
### Known Issues

View File

@ -1576,7 +1576,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
env: str = DEFAULT_ENV,
default_db: Optional[str] = None,
default_schema: Optional[str] = None,
default_dialect: Optional[str] = None,
override_dialect: Optional[str] = None,
) -> "SqlParsingResult":
from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage
@ -1590,7 +1590,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
schema_resolver=schema_resolver,
default_db=default_db,
default_schema=default_schema,
default_dialect=default_dialect,
override_dialect=override_dialect,
)
def create_tag(self, tag_name: str) -> str:

View File

@ -66,7 +66,7 @@ class SqlQueriesSourceConfig(PlatformInstanceConfigMixin, EnvConfigMixin):
description="The default schema to use for unqualified table names",
default=None,
)
default_dialect: Optional[str] = Field(
override_dialect: Optional[str] = Field(
description="The SQL dialect to use when parsing queries. Overrides automatic dialect detection.",
default=None,
)
@ -181,7 +181,7 @@ class SqlQueriesSource(Source):
schema_resolver=self.schema_resolver,
default_db=self.config.default_db,
default_schema=self.config.default_schema,
default_dialect=self.config.default_dialect,
override_dialect=self.config.override_dialect,
)
if result.debug_info.table_error:
logger.info(f"Error parsing table lineage, {result.debug_info.table_error}")

View File

@ -478,7 +478,7 @@ class LineageClient:
env: str = "PROD",
default_db: Optional[str] = None,
default_schema: Optional[str] = None,
default_dialect: Optional[str] = None,
override_dialect: Optional[str] = None,
) -> None:
"""Add lineage by parsing a SQL query."""
from datahub.sql_parsing.sqlglot_lineage import (
@ -494,7 +494,7 @@ class LineageClient:
platform_instance=platform_instance,
env=env,
graph=self._client._graph,
default_dialect=default_dialect,
override_dialect=override_dialect,
)
if parsed_result.debug_info.table_error:

View File

@ -56,6 +56,7 @@ from datahub.sql_parsing.sql_parsing_common import (
QueryTypeProps,
)
from datahub.sql_parsing.sqlglot_utils import (
DialectOrStr,
get_dialect,
get_query_fingerprint_debug,
is_dialect_instance,
@ -1231,12 +1232,12 @@ def _sqlglot_lineage_inner(
schema_resolver: SchemaResolverInterface,
default_db: Optional[str] = None,
default_schema: Optional[str] = None,
default_dialect: Optional[str] = None,
override_dialect: Optional[DialectOrStr] = None,
) -> SqlParsingResult:
if not default_dialect:
dialect = get_dialect(schema_resolver.platform)
if override_dialect:
dialect = get_dialect(override_dialect)
else:
dialect = get_dialect(default_dialect)
dialect = get_dialect(schema_resolver.platform)
default_db = _normalize_db_or_schema(default_db, dialect)
default_schema = _normalize_db_or_schema(default_schema, dialect)
@ -1423,7 +1424,7 @@ def _sqlglot_lineage_nocache(
schema_resolver: SchemaResolverInterface,
default_db: Optional[str] = None,
default_schema: Optional[str] = None,
default_dialect: Optional[str] = None,
override_dialect: Optional[DialectOrStr] = None,
) -> SqlParsingResult:
"""Parse a SQL statement and generate lineage information.
@ -1441,8 +1442,8 @@ def _sqlglot_lineage_nocache(
can be brittle with respect to missing schema information and complex
SQL logic like UNNESTs.
The SQL dialect can be given as an argument called default_dialect or it can
be inferred from the schema_resolver's platform.
The SQL dialect will be inferred from the schema_resolver's platform.
That inference can be overridden by passing an override_dialect argument.
The set of supported dialects is the same as sqlglot's. See their
`documentation <https://sqlglot.com/sqlglot/dialects/dialect.html#Dialects>`_
for the full list.
@ -1457,7 +1458,7 @@ def _sqlglot_lineage_nocache(
schema_resolver: The schema resolver to use for resolving table schemas.
default_db: The default database to use for unqualified table names.
default_schema: The default schema to use for unqualified table names.
default_dialect: A default dialect to override the dialect provided by 'schema_resolver'.
override_dialect: Override the dialect provided by 'schema_resolver'.
Returns:
A SqlParsingResult object containing the parsed lineage information.
@ -1482,7 +1483,7 @@ def _sqlglot_lineage_nocache(
schema_resolver=schema_resolver,
default_db=default_db,
default_schema=default_schema,
default_dialect=default_dialect,
override_dialect=override_dialect,
)
except Exception as e:
return SqlParsingResult.make_from_error(e)
@ -1520,15 +1521,15 @@ def sqlglot_lineage(
schema_resolver: SchemaResolverInterface,
default_db: Optional[str] = None,
default_schema: Optional[str] = None,
default_dialect: Optional[str] = None,
override_dialect: Optional[DialectOrStr] = None,
) -> SqlParsingResult:
if schema_resolver.includes_temp_tables():
return _sqlglot_lineage_nocache(
sql, schema_resolver, default_db, default_schema, default_dialect
sql, schema_resolver, default_db, default_schema, override_dialect
)
else:
return _sqlglot_lineage_cached(
sql, schema_resolver, default_db, default_schema, default_dialect
sql, schema_resolver, default_db, default_schema, override_dialect
)
@ -1580,7 +1581,7 @@ def create_lineage_sql_parsed_result(
default_schema: Optional[str] = None,
graph: Optional[DataHubGraph] = None,
schema_aware: bool = True,
default_dialect: Optional[str] = None,
override_dialect: Optional[DialectOrStr] = None,
) -> SqlParsingResult:
schema_resolver = create_schema_resolver(
platform=platform,
@ -1600,7 +1601,7 @@ def create_lineage_sql_parsed_result(
schema_resolver=schema_resolver,
default_db=default_db,
default_schema=default_schema,
default_dialect=default_dialect,
override_dialect=override_dialect,
)
except Exception as e:
return SqlParsingResult.make_from_error(e)