mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-04 06:43:16 +00:00
refactor(sql-parsing): rename default_dialect to override_dialect parameter (#14015)
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
parent
dd87bff90e
commit
02da726cf5
@ -89,6 +89,11 @@ DataHub is a **schema-first, event-driven metadata platform** with three core la
|
|||||||
- Frontend: Tests in `__tests__/` or `.test.tsx` files
|
- Frontend: Tests in `__tests__/` or `.test.tsx` files
|
||||||
- Smoke tests go in the `smoke-test/` directory
|
- Smoke tests go in the `smoke-test/` directory
|
||||||
|
|
||||||
|
### Commits
|
||||||
|
|
||||||
|
- Follow Conventional Commits format for commit messages
|
||||||
|
- Breaking Changes: Always update `docs/how/updating-datahub.md` for breaking changes. Write entries for non-technical audiences, reference the PR number, and focus on what users need to change rather than internal implementation details
|
||||||
|
|
||||||
## Key Documentation
|
## Key Documentation
|
||||||
|
|
||||||
**Essential reading:**
|
**Essential reading:**
|
||||||
@ -107,4 +112,3 @@ DataHub is a **schema-first, event-driven metadata platform** with three core la
|
|||||||
- Entity Registry is defined in YAML, not code (`entity-registry.yml`)
|
- Entity Registry is defined in YAML, not code (`entity-registry.yml`)
|
||||||
- All metadata changes flow through the event streaming system
|
- All metadata changes flow through the event streaming system
|
||||||
- GraphQL schema is generated from backend GMS APIs
|
- GraphQL schema is generated from backend GMS APIs
|
||||||
- Follow Conventional Commits format for commit messages
|
|
||||||
|
@ -37,6 +37,9 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
|
|||||||
- `acryl-datahub-gx-plugin`
|
- `acryl-datahub-gx-plugin`
|
||||||
- `acryl-datahub-dagster-plugin` (already required Python 3.9+)
|
- `acryl-datahub-dagster-plugin` (already required Python 3.9+)
|
||||||
- #13619: The `acryl-datahub-airflow-plugin` has dropped support for Airflow versions less than 2.7.
|
- #13619: The `acryl-datahub-airflow-plugin` has dropped support for Airflow versions less than 2.7.
|
||||||
|
- #14015: In the sql-queries source, the `default_dialect` configuration parameter has been renamed to `override_dialect`. This also affects the Python SDK methods:
|
||||||
|
- `DataHubGraph.parse_sql_lineage(default_dialect=...)` → `DataHubGraph.parse_sql_lineage(override_dialect=...)`
|
||||||
|
- `LineageClient.add_lineage_via_sql(default_dialect=...)` → `LineageClient.add_lineage_via_sql(override_dialect=...)`
|
||||||
|
|
||||||
### Known Issues
|
### Known Issues
|
||||||
|
|
||||||
|
@ -1576,7 +1576,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|||||||
env: str = DEFAULT_ENV,
|
env: str = DEFAULT_ENV,
|
||||||
default_db: Optional[str] = None,
|
default_db: Optional[str] = None,
|
||||||
default_schema: Optional[str] = None,
|
default_schema: Optional[str] = None,
|
||||||
default_dialect: Optional[str] = None,
|
override_dialect: Optional[str] = None,
|
||||||
) -> "SqlParsingResult":
|
) -> "SqlParsingResult":
|
||||||
from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage
|
from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage
|
||||||
|
|
||||||
@ -1590,7 +1590,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|||||||
schema_resolver=schema_resolver,
|
schema_resolver=schema_resolver,
|
||||||
default_db=default_db,
|
default_db=default_db,
|
||||||
default_schema=default_schema,
|
default_schema=default_schema,
|
||||||
default_dialect=default_dialect,
|
override_dialect=override_dialect,
|
||||||
)
|
)
|
||||||
|
|
||||||
def create_tag(self, tag_name: str) -> str:
|
def create_tag(self, tag_name: str) -> str:
|
||||||
|
@ -66,7 +66,7 @@ class SqlQueriesSourceConfig(PlatformInstanceConfigMixin, EnvConfigMixin):
|
|||||||
description="The default schema to use for unqualified table names",
|
description="The default schema to use for unqualified table names",
|
||||||
default=None,
|
default=None,
|
||||||
)
|
)
|
||||||
default_dialect: Optional[str] = Field(
|
override_dialect: Optional[str] = Field(
|
||||||
description="The SQL dialect to use when parsing queries. Overrides automatic dialect detection.",
|
description="The SQL dialect to use when parsing queries. Overrides automatic dialect detection.",
|
||||||
default=None,
|
default=None,
|
||||||
)
|
)
|
||||||
@ -181,7 +181,7 @@ class SqlQueriesSource(Source):
|
|||||||
schema_resolver=self.schema_resolver,
|
schema_resolver=self.schema_resolver,
|
||||||
default_db=self.config.default_db,
|
default_db=self.config.default_db,
|
||||||
default_schema=self.config.default_schema,
|
default_schema=self.config.default_schema,
|
||||||
default_dialect=self.config.default_dialect,
|
override_dialect=self.config.override_dialect,
|
||||||
)
|
)
|
||||||
if result.debug_info.table_error:
|
if result.debug_info.table_error:
|
||||||
logger.info(f"Error parsing table lineage, {result.debug_info.table_error}")
|
logger.info(f"Error parsing table lineage, {result.debug_info.table_error}")
|
||||||
|
@ -478,7 +478,7 @@ class LineageClient:
|
|||||||
env: str = "PROD",
|
env: str = "PROD",
|
||||||
default_db: Optional[str] = None,
|
default_db: Optional[str] = None,
|
||||||
default_schema: Optional[str] = None,
|
default_schema: Optional[str] = None,
|
||||||
default_dialect: Optional[str] = None,
|
override_dialect: Optional[str] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Add lineage by parsing a SQL query."""
|
"""Add lineage by parsing a SQL query."""
|
||||||
from datahub.sql_parsing.sqlglot_lineage import (
|
from datahub.sql_parsing.sqlglot_lineage import (
|
||||||
@ -494,7 +494,7 @@ class LineageClient:
|
|||||||
platform_instance=platform_instance,
|
platform_instance=platform_instance,
|
||||||
env=env,
|
env=env,
|
||||||
graph=self._client._graph,
|
graph=self._client._graph,
|
||||||
default_dialect=default_dialect,
|
override_dialect=override_dialect,
|
||||||
)
|
)
|
||||||
|
|
||||||
if parsed_result.debug_info.table_error:
|
if parsed_result.debug_info.table_error:
|
||||||
|
@ -56,6 +56,7 @@ from datahub.sql_parsing.sql_parsing_common import (
|
|||||||
QueryTypeProps,
|
QueryTypeProps,
|
||||||
)
|
)
|
||||||
from datahub.sql_parsing.sqlglot_utils import (
|
from datahub.sql_parsing.sqlglot_utils import (
|
||||||
|
DialectOrStr,
|
||||||
get_dialect,
|
get_dialect,
|
||||||
get_query_fingerprint_debug,
|
get_query_fingerprint_debug,
|
||||||
is_dialect_instance,
|
is_dialect_instance,
|
||||||
@ -1231,12 +1232,12 @@ def _sqlglot_lineage_inner(
|
|||||||
schema_resolver: SchemaResolverInterface,
|
schema_resolver: SchemaResolverInterface,
|
||||||
default_db: Optional[str] = None,
|
default_db: Optional[str] = None,
|
||||||
default_schema: Optional[str] = None,
|
default_schema: Optional[str] = None,
|
||||||
default_dialect: Optional[str] = None,
|
override_dialect: Optional[DialectOrStr] = None,
|
||||||
) -> SqlParsingResult:
|
) -> SqlParsingResult:
|
||||||
if not default_dialect:
|
if override_dialect:
|
||||||
dialect = get_dialect(schema_resolver.platform)
|
dialect = get_dialect(override_dialect)
|
||||||
else:
|
else:
|
||||||
dialect = get_dialect(default_dialect)
|
dialect = get_dialect(schema_resolver.platform)
|
||||||
|
|
||||||
default_db = _normalize_db_or_schema(default_db, dialect)
|
default_db = _normalize_db_or_schema(default_db, dialect)
|
||||||
default_schema = _normalize_db_or_schema(default_schema, dialect)
|
default_schema = _normalize_db_or_schema(default_schema, dialect)
|
||||||
@ -1423,7 +1424,7 @@ def _sqlglot_lineage_nocache(
|
|||||||
schema_resolver: SchemaResolverInterface,
|
schema_resolver: SchemaResolverInterface,
|
||||||
default_db: Optional[str] = None,
|
default_db: Optional[str] = None,
|
||||||
default_schema: Optional[str] = None,
|
default_schema: Optional[str] = None,
|
||||||
default_dialect: Optional[str] = None,
|
override_dialect: Optional[DialectOrStr] = None,
|
||||||
) -> SqlParsingResult:
|
) -> SqlParsingResult:
|
||||||
"""Parse a SQL statement and generate lineage information.
|
"""Parse a SQL statement and generate lineage information.
|
||||||
|
|
||||||
@ -1441,8 +1442,8 @@ def _sqlglot_lineage_nocache(
|
|||||||
can be brittle with respect to missing schema information and complex
|
can be brittle with respect to missing schema information and complex
|
||||||
SQL logic like UNNESTs.
|
SQL logic like UNNESTs.
|
||||||
|
|
||||||
The SQL dialect can be given as an argument called default_dialect or it can
|
The SQL dialect will be inferred from the schema_resolver's platform.
|
||||||
be inferred from the schema_resolver's platform.
|
That inference can be overridden by passing an override_dialect argument.
|
||||||
The set of supported dialects is the same as sqlglot's. See their
|
The set of supported dialects is the same as sqlglot's. See their
|
||||||
`documentation <https://sqlglot.com/sqlglot/dialects/dialect.html#Dialects>`_
|
`documentation <https://sqlglot.com/sqlglot/dialects/dialect.html#Dialects>`_
|
||||||
for the full list.
|
for the full list.
|
||||||
@ -1457,7 +1458,7 @@ def _sqlglot_lineage_nocache(
|
|||||||
schema_resolver: The schema resolver to use for resolving table schemas.
|
schema_resolver: The schema resolver to use for resolving table schemas.
|
||||||
default_db: The default database to use for unqualified table names.
|
default_db: The default database to use for unqualified table names.
|
||||||
default_schema: The default schema to use for unqualified table names.
|
default_schema: The default schema to use for unqualified table names.
|
||||||
default_dialect: A default dialect to override the dialect provided by 'schema_resolver'.
|
override_dialect: Override the dialect provided by 'schema_resolver'.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A SqlParsingResult object containing the parsed lineage information.
|
A SqlParsingResult object containing the parsed lineage information.
|
||||||
@ -1482,7 +1483,7 @@ def _sqlglot_lineage_nocache(
|
|||||||
schema_resolver=schema_resolver,
|
schema_resolver=schema_resolver,
|
||||||
default_db=default_db,
|
default_db=default_db,
|
||||||
default_schema=default_schema,
|
default_schema=default_schema,
|
||||||
default_dialect=default_dialect,
|
override_dialect=override_dialect,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return SqlParsingResult.make_from_error(e)
|
return SqlParsingResult.make_from_error(e)
|
||||||
@ -1520,15 +1521,15 @@ def sqlglot_lineage(
|
|||||||
schema_resolver: SchemaResolverInterface,
|
schema_resolver: SchemaResolverInterface,
|
||||||
default_db: Optional[str] = None,
|
default_db: Optional[str] = None,
|
||||||
default_schema: Optional[str] = None,
|
default_schema: Optional[str] = None,
|
||||||
default_dialect: Optional[str] = None,
|
override_dialect: Optional[DialectOrStr] = None,
|
||||||
) -> SqlParsingResult:
|
) -> SqlParsingResult:
|
||||||
if schema_resolver.includes_temp_tables():
|
if schema_resolver.includes_temp_tables():
|
||||||
return _sqlglot_lineage_nocache(
|
return _sqlglot_lineage_nocache(
|
||||||
sql, schema_resolver, default_db, default_schema, default_dialect
|
sql, schema_resolver, default_db, default_schema, override_dialect
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return _sqlglot_lineage_cached(
|
return _sqlglot_lineage_cached(
|
||||||
sql, schema_resolver, default_db, default_schema, default_dialect
|
sql, schema_resolver, default_db, default_schema, override_dialect
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -1580,7 +1581,7 @@ def create_lineage_sql_parsed_result(
|
|||||||
default_schema: Optional[str] = None,
|
default_schema: Optional[str] = None,
|
||||||
graph: Optional[DataHubGraph] = None,
|
graph: Optional[DataHubGraph] = None,
|
||||||
schema_aware: bool = True,
|
schema_aware: bool = True,
|
||||||
default_dialect: Optional[str] = None,
|
override_dialect: Optional[DialectOrStr] = None,
|
||||||
) -> SqlParsingResult:
|
) -> SqlParsingResult:
|
||||||
schema_resolver = create_schema_resolver(
|
schema_resolver = create_schema_resolver(
|
||||||
platform=platform,
|
platform=platform,
|
||||||
@ -1600,7 +1601,7 @@ def create_lineage_sql_parsed_result(
|
|||||||
schema_resolver=schema_resolver,
|
schema_resolver=schema_resolver,
|
||||||
default_db=default_db,
|
default_db=default_db,
|
||||||
default_schema=default_schema,
|
default_schema=default_schema,
|
||||||
default_dialect=default_dialect,
|
override_dialect=override_dialect,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return SqlParsingResult.make_from_error(e)
|
return SqlParsingResult.make_from_error(e)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user