mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-02 13:53:06 +00:00
refactor(sql-parsing): rename default_dialect to override_dialect parameter (#14015)
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
parent
dd87bff90e
commit
02da726cf5
@ -89,6 +89,11 @@ DataHub is a **schema-first, event-driven metadata platform** with three core la
|
||||
- Frontend: Tests in `__tests__/` or `.test.tsx` files
|
||||
- Smoke tests go in the `smoke-test/` directory
|
||||
|
||||
### Commits
|
||||
|
||||
- Follow Conventional Commits format for commit messages
|
||||
- Breaking Changes: Always update `docs/how/updating-datahub.md` for breaking changes. Write entries for non-technical audiences, reference the PR number, and focus on what users need to change rather than internal implementation details
|
||||
|
||||
## Key Documentation
|
||||
|
||||
**Essential reading:**
|
||||
@ -107,4 +112,3 @@ DataHub is a **schema-first, event-driven metadata platform** with three core la
|
||||
- Entity Registry is defined in YAML, not code (`entity-registry.yml`)
|
||||
- All metadata changes flow through the event streaming system
|
||||
- GraphQL schema is generated from backend GMS APIs
|
||||
- Follow Conventional Commits format for commit messages
|
||||
|
@ -37,6 +37,9 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
|
||||
- `acryl-datahub-gx-plugin`
|
||||
- `acryl-datahub-dagster-plugin` (already required Python 3.9+)
|
||||
- #13619: The `acryl-datahub-airflow-plugin` has dropped support for Airflow versions less than 2.7.
|
||||
- #14015: In the sql-queries source, the `default_dialect` configuration parameter has been renamed to `override_dialect`. This also affects the Python SDK methods:
|
||||
- `DataHubGraph.parse_sql_lineage(default_dialect=...)` → `DataHubGraph.parse_sql_lineage(override_dialect=...)`
|
||||
- `LineageClient.add_lineage_via_sql(default_dialect=...)` → `LineageClient.add_lineage_via_sql(override_dialect=...)`
|
||||
|
||||
### Known Issues
|
||||
|
||||
|
@ -1576,7 +1576,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
||||
env: str = DEFAULT_ENV,
|
||||
default_db: Optional[str] = None,
|
||||
default_schema: Optional[str] = None,
|
||||
default_dialect: Optional[str] = None,
|
||||
override_dialect: Optional[str] = None,
|
||||
) -> "SqlParsingResult":
|
||||
from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage
|
||||
|
||||
@ -1590,7 +1590,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
||||
schema_resolver=schema_resolver,
|
||||
default_db=default_db,
|
||||
default_schema=default_schema,
|
||||
default_dialect=default_dialect,
|
||||
override_dialect=override_dialect,
|
||||
)
|
||||
|
||||
def create_tag(self, tag_name: str) -> str:
|
||||
|
@ -66,7 +66,7 @@ class SqlQueriesSourceConfig(PlatformInstanceConfigMixin, EnvConfigMixin):
|
||||
description="The default schema to use for unqualified table names",
|
||||
default=None,
|
||||
)
|
||||
default_dialect: Optional[str] = Field(
|
||||
override_dialect: Optional[str] = Field(
|
||||
description="The SQL dialect to use when parsing queries. Overrides automatic dialect detection.",
|
||||
default=None,
|
||||
)
|
||||
@ -181,7 +181,7 @@ class SqlQueriesSource(Source):
|
||||
schema_resolver=self.schema_resolver,
|
||||
default_db=self.config.default_db,
|
||||
default_schema=self.config.default_schema,
|
||||
default_dialect=self.config.default_dialect,
|
||||
override_dialect=self.config.override_dialect,
|
||||
)
|
||||
if result.debug_info.table_error:
|
||||
logger.info(f"Error parsing table lineage, {result.debug_info.table_error}")
|
||||
|
@ -478,7 +478,7 @@ class LineageClient:
|
||||
env: str = "PROD",
|
||||
default_db: Optional[str] = None,
|
||||
default_schema: Optional[str] = None,
|
||||
default_dialect: Optional[str] = None,
|
||||
override_dialect: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Add lineage by parsing a SQL query."""
|
||||
from datahub.sql_parsing.sqlglot_lineage import (
|
||||
@ -494,7 +494,7 @@ class LineageClient:
|
||||
platform_instance=platform_instance,
|
||||
env=env,
|
||||
graph=self._client._graph,
|
||||
default_dialect=default_dialect,
|
||||
override_dialect=override_dialect,
|
||||
)
|
||||
|
||||
if parsed_result.debug_info.table_error:
|
||||
|
@ -56,6 +56,7 @@ from datahub.sql_parsing.sql_parsing_common import (
|
||||
QueryTypeProps,
|
||||
)
|
||||
from datahub.sql_parsing.sqlglot_utils import (
|
||||
DialectOrStr,
|
||||
get_dialect,
|
||||
get_query_fingerprint_debug,
|
||||
is_dialect_instance,
|
||||
@ -1231,12 +1232,12 @@ def _sqlglot_lineage_inner(
|
||||
schema_resolver: SchemaResolverInterface,
|
||||
default_db: Optional[str] = None,
|
||||
default_schema: Optional[str] = None,
|
||||
default_dialect: Optional[str] = None,
|
||||
override_dialect: Optional[DialectOrStr] = None,
|
||||
) -> SqlParsingResult:
|
||||
if not default_dialect:
|
||||
dialect = get_dialect(schema_resolver.platform)
|
||||
if override_dialect:
|
||||
dialect = get_dialect(override_dialect)
|
||||
else:
|
||||
dialect = get_dialect(default_dialect)
|
||||
dialect = get_dialect(schema_resolver.platform)
|
||||
|
||||
default_db = _normalize_db_or_schema(default_db, dialect)
|
||||
default_schema = _normalize_db_or_schema(default_schema, dialect)
|
||||
@ -1423,7 +1424,7 @@ def _sqlglot_lineage_nocache(
|
||||
schema_resolver: SchemaResolverInterface,
|
||||
default_db: Optional[str] = None,
|
||||
default_schema: Optional[str] = None,
|
||||
default_dialect: Optional[str] = None,
|
||||
override_dialect: Optional[DialectOrStr] = None,
|
||||
) -> SqlParsingResult:
|
||||
"""Parse a SQL statement and generate lineage information.
|
||||
|
||||
@ -1441,8 +1442,8 @@ def _sqlglot_lineage_nocache(
|
||||
can be brittle with respect to missing schema information and complex
|
||||
SQL logic like UNNESTs.
|
||||
|
||||
The SQL dialect can be given as an argument called default_dialect or it can
|
||||
be inferred from the schema_resolver's platform.
|
||||
The SQL dialect will be inferred from the schema_resolver's platform.
|
||||
That inference can be overridden by passing an override_dialect argument.
|
||||
The set of supported dialects is the same as sqlglot's. See their
|
||||
`documentation <https://sqlglot.com/sqlglot/dialects/dialect.html#Dialects>`_
|
||||
for the full list.
|
||||
@ -1457,7 +1458,7 @@ def _sqlglot_lineage_nocache(
|
||||
schema_resolver: The schema resolver to use for resolving table schemas.
|
||||
default_db: The default database to use for unqualified table names.
|
||||
default_schema: The default schema to use for unqualified table names.
|
||||
default_dialect: A default dialect to override the dialect provided by 'schema_resolver'.
|
||||
override_dialect: Override the dialect provided by 'schema_resolver'.
|
||||
|
||||
Returns:
|
||||
A SqlParsingResult object containing the parsed lineage information.
|
||||
@ -1482,7 +1483,7 @@ def _sqlglot_lineage_nocache(
|
||||
schema_resolver=schema_resolver,
|
||||
default_db=default_db,
|
||||
default_schema=default_schema,
|
||||
default_dialect=default_dialect,
|
||||
override_dialect=override_dialect,
|
||||
)
|
||||
except Exception as e:
|
||||
return SqlParsingResult.make_from_error(e)
|
||||
@ -1520,15 +1521,15 @@ def sqlglot_lineage(
|
||||
schema_resolver: SchemaResolverInterface,
|
||||
default_db: Optional[str] = None,
|
||||
default_schema: Optional[str] = None,
|
||||
default_dialect: Optional[str] = None,
|
||||
override_dialect: Optional[DialectOrStr] = None,
|
||||
) -> SqlParsingResult:
|
||||
if schema_resolver.includes_temp_tables():
|
||||
return _sqlglot_lineage_nocache(
|
||||
sql, schema_resolver, default_db, default_schema, default_dialect
|
||||
sql, schema_resolver, default_db, default_schema, override_dialect
|
||||
)
|
||||
else:
|
||||
return _sqlglot_lineage_cached(
|
||||
sql, schema_resolver, default_db, default_schema, default_dialect
|
||||
sql, schema_resolver, default_db, default_schema, override_dialect
|
||||
)
|
||||
|
||||
|
||||
@ -1580,7 +1581,7 @@ def create_lineage_sql_parsed_result(
|
||||
default_schema: Optional[str] = None,
|
||||
graph: Optional[DataHubGraph] = None,
|
||||
schema_aware: bool = True,
|
||||
default_dialect: Optional[str] = None,
|
||||
override_dialect: Optional[DialectOrStr] = None,
|
||||
) -> SqlParsingResult:
|
||||
schema_resolver = create_schema_resolver(
|
||||
platform=platform,
|
||||
@ -1600,7 +1601,7 @@ def create_lineage_sql_parsed_result(
|
||||
schema_resolver=schema_resolver,
|
||||
default_db=default_db,
|
||||
default_schema=default_schema,
|
||||
default_dialect=default_dialect,
|
||||
override_dialect=override_dialect,
|
||||
)
|
||||
except Exception as e:
|
||||
return SqlParsingResult.make_from_error(e)
|
||||
|
Loading…
x
Reference in New Issue
Block a user