mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-25 17:15:09 +00:00
feat(ingest): enable use_queries_v2 by default for snowflake/bigquery (#13601)
Co-authored-by: Sergio Gómez Villamor <sgomezvillamor@gmail.com>
This commit is contained in:
parent
9a8673c96e
commit
29ba3673fd
@ -42,6 +42,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
|
||||
- `DataHubGraph.parse_sql_lineage(default_dialect=...)` → `DataHubGraph.parse_sql_lineage(override_dialect=...)`
|
||||
- `LineageClient.add_lineage_via_sql(default_dialect=...)` → `LineageClient.add_lineage_via_sql(override_dialect=...)`
|
||||
- #14059: The `acryl-datahub-gx-plugin` now requires pydantic v2, which means the effective minimum supported version of GX is 0.17.15 (from Sept 2023).
|
||||
- #13601: The `use_queries_v2` flag is now enabled by default for Snowflake and BigQuery ingestion. This improves the quality of lineage and quantity of queries extracted.
|
||||
|
||||
### Known Issues
|
||||
|
||||
|
@ -1,12 +1,9 @@
|
||||
source:
|
||||
type: snowflake
|
||||
config:
|
||||
# This option is recommended to be used to ingest all lineage
|
||||
# This option is recommended to be used to ingest all lineage on the first run.
|
||||
ignore_start_time_lineage: true
|
||||
|
||||
# This flag tells the snowflake ingestion to use the more advanced query parsing. This will become the default eventually.
|
||||
use_queries_v2: true
|
||||
|
||||
# Coordinates
|
||||
account_id: "abc48144"
|
||||
warehouse: "COMPUTE_WH"
|
||||
|
@ -342,7 +342,7 @@ class BigQueryV2Config(
|
||||
)
|
||||
|
||||
use_queries_v2: bool = Field(
|
||||
default=False,
|
||||
default=True,
|
||||
description="If enabled, uses the new queries extractor to extract queries from bigquery.",
|
||||
)
|
||||
include_queries: bool = Field(
|
||||
|
@ -236,7 +236,7 @@ class SnowflakeV2Config(
|
||||
)
|
||||
|
||||
use_queries_v2: bool = Field(
|
||||
default=False,
|
||||
default=True,
|
||||
description="If enabled, uses the new queries extractor to extract queries from snowflake.",
|
||||
)
|
||||
include_queries: bool = Field(
|
||||
|
@ -457,7 +457,7 @@ def test_bigquery_queries_v2_ingest(
|
||||
# if use_queries_v2 is set.
|
||||
pipeline_config_dict: Dict[str, Any] = recipe(
|
||||
mcp_output_path=mcp_output_path,
|
||||
source_config_override={"use_queries_v2": True, "include_table_lineage": False},
|
||||
source_config_override={"include_table_lineage": False},
|
||||
)
|
||||
|
||||
run_and_get_pipeline(pipeline_config_dict)
|
||||
@ -564,7 +564,6 @@ LIMIT 100
|
||||
pipeline_config_dict: Dict[str, Any] = recipe(
|
||||
mcp_output_path=mcp_output_path,
|
||||
source_config_override={
|
||||
"use_queries_v2": True,
|
||||
"include_schema_metadata": False,
|
||||
"include_table_lineage": True,
|
||||
"include_usage_statistics": True,
|
||||
|
@ -125,6 +125,7 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
|
||||
validate_upstreams_against_patterns=False,
|
||||
include_operational_stats=True,
|
||||
incremental_lineage=False,
|
||||
use_queries_v2=False,
|
||||
start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace(
|
||||
tzinfo=timezone.utc
|
||||
),
|
||||
@ -220,6 +221,7 @@ def test_snowflake_tags_as_structured_properties(
|
||||
password="TST_PWD",
|
||||
match_fully_qualified_names=True,
|
||||
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
|
||||
use_queries_v2=False,
|
||||
include_technical_schema=True,
|
||||
include_table_lineage=False,
|
||||
include_column_lineage=False,
|
||||
@ -286,6 +288,7 @@ def test_snowflake_private_link_and_incremental_mcps(
|
||||
include_views=True,
|
||||
include_usage_stats=False,
|
||||
format_sql_queries=True,
|
||||
use_queries_v2=False,
|
||||
incremental_lineage=False,
|
||||
incremental_properties=True,
|
||||
include_operational_stats=False,
|
||||
|
@ -59,6 +59,7 @@ def snowflake_pipeline_config(tmp_path):
|
||||
match_fully_qualified_names=True,
|
||||
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
|
||||
include_usage_stats=False,
|
||||
use_queries_v2=False,
|
||||
start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace(
|
||||
tzinfo=timezone.utc,
|
||||
),
|
||||
|
@ -32,6 +32,7 @@ def stateful_pipeline_config(include_tables: bool) -> PipelineConfig:
|
||||
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
|
||||
include_tables=include_tables,
|
||||
incremental_lineage=False,
|
||||
use_queries_v2=False,
|
||||
stateful_ingestion=StatefulStaleMetadataRemovalConfig.parse_obj(
|
||||
{
|
||||
"enabled": True,
|
||||
|
@ -33,6 +33,7 @@ def test_snowflake_tag_pattern():
|
||||
include_column_lineage=False,
|
||||
include_usage_stats=False,
|
||||
include_operational_stats=False,
|
||||
use_queries_v2=False,
|
||||
extract_tags=TagOption.without_lineage,
|
||||
)
|
||||
|
||||
@ -76,6 +77,7 @@ def test_snowflake_tag_pattern_deny():
|
||||
include_column_lineage=False,
|
||||
include_usage_stats=False,
|
||||
include_operational_stats=False,
|
||||
use_queries_v2=False,
|
||||
extract_tags=TagOption.without_lineage,
|
||||
)
|
||||
|
||||
@ -116,6 +118,7 @@ def test_snowflake_structured_property_pattern_deny():
|
||||
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
|
||||
extract_tags_as_structured_properties=True,
|
||||
structured_properties_template_cache_invalidation_interval=0,
|
||||
use_queries_v2=False,
|
||||
tag_pattern=AllowDenyPattern(
|
||||
deny=["TEST_DB.TEST_SCHEMA.my_tag_2:my_value_2"]
|
||||
),
|
||||
|
Loading…
x
Reference in New Issue
Block a user