feat(ingest): enable use_queries_v2 by default for snowflake/bigquery (#13601)

Co-authored-by: Sergio Gómez Villamor <sgomezvillamor@gmail.com>
This commit is contained in:
Harshal Sheth 2025-07-17 00:03:45 -07:00 committed by GitHub
parent 9a8673c96e
commit 29ba3673fd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 13 additions and 8 deletions

View File

@ -42,6 +42,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
- `DataHubGraph.parse_sql_lineage(default_dialect=...)``DataHubGraph.parse_sql_lineage(override_dialect=...)`
- `LineageClient.add_lineage_via_sql(default_dialect=...)``LineageClient.add_lineage_via_sql(override_dialect=...)`
- #14059: The `acryl-datahub-gx-plugin` now requires pydantic v2, which means the effective minimum supported version of GX is 0.17.15 (from Sept 2023).
- #13601: The `use_queries_v2` flag is now enabled by default for Snowflake and BigQuery ingestion. This improves the quality of lineage and quantity of queries extracted.
### Known Issues

View File

@ -1,12 +1,9 @@
source:
type: snowflake
config:
# This option is recommended to be used to ingest all lineage
# This option is recommended to be used to ingest all lineage on the first run.
ignore_start_time_lineage: true
# This flag tells the snowflake ingestion to use the more advanced query parsing. This will become the default eventually.
use_queries_v2: true
# Coordinates
account_id: "abc48144"
warehouse: "COMPUTE_WH"

View File

@ -342,7 +342,7 @@ class BigQueryV2Config(
)
use_queries_v2: bool = Field(
default=False,
default=True,
description="If enabled, uses the new queries extractor to extract queries from bigquery.",
)
include_queries: bool = Field(

View File

@ -236,7 +236,7 @@ class SnowflakeV2Config(
)
use_queries_v2: bool = Field(
default=False,
default=True,
description="If enabled, uses the new queries extractor to extract queries from snowflake.",
)
include_queries: bool = Field(

View File

@ -457,7 +457,7 @@ def test_bigquery_queries_v2_ingest(
# if use_queries_v2 is set.
pipeline_config_dict: Dict[str, Any] = recipe(
mcp_output_path=mcp_output_path,
source_config_override={"use_queries_v2": True, "include_table_lineage": False},
source_config_override={"include_table_lineage": False},
)
run_and_get_pipeline(pipeline_config_dict)
@ -564,7 +564,6 @@ LIMIT 100
pipeline_config_dict: Dict[str, Any] = recipe(
mcp_output_path=mcp_output_path,
source_config_override={
"use_queries_v2": True,
"include_schema_metadata": False,
"include_table_lineage": True,
"include_usage_statistics": True,

View File

@ -125,6 +125,7 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
validate_upstreams_against_patterns=False,
include_operational_stats=True,
incremental_lineage=False,
use_queries_v2=False,
start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace(
tzinfo=timezone.utc
),
@ -220,6 +221,7 @@ def test_snowflake_tags_as_structured_properties(
password="TST_PWD",
match_fully_qualified_names=True,
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
use_queries_v2=False,
include_technical_schema=True,
include_table_lineage=False,
include_column_lineage=False,
@ -286,6 +288,7 @@ def test_snowflake_private_link_and_incremental_mcps(
include_views=True,
include_usage_stats=False,
format_sql_queries=True,
use_queries_v2=False,
incremental_lineage=False,
incremental_properties=True,
include_operational_stats=False,

View File

@ -59,6 +59,7 @@ def snowflake_pipeline_config(tmp_path):
match_fully_qualified_names=True,
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
include_usage_stats=False,
use_queries_v2=False,
start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace(
tzinfo=timezone.utc,
),

View File

@ -32,6 +32,7 @@ def stateful_pipeline_config(include_tables: bool) -> PipelineConfig:
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
include_tables=include_tables,
incremental_lineage=False,
use_queries_v2=False,
stateful_ingestion=StatefulStaleMetadataRemovalConfig.parse_obj(
{
"enabled": True,

View File

@ -33,6 +33,7 @@ def test_snowflake_tag_pattern():
include_column_lineage=False,
include_usage_stats=False,
include_operational_stats=False,
use_queries_v2=False,
extract_tags=TagOption.without_lineage,
)
@ -76,6 +77,7 @@ def test_snowflake_tag_pattern_deny():
include_column_lineage=False,
include_usage_stats=False,
include_operational_stats=False,
use_queries_v2=False,
extract_tags=TagOption.without_lineage,
)
@ -116,6 +118,7 @@ def test_snowflake_structured_property_pattern_deny():
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
extract_tags_as_structured_properties=True,
structured_properties_template_cache_invalidation_interval=0,
use_queries_v2=False,
tag_pattern=AllowDenyPattern(
deny=["TEST_DB.TEST_SCHEMA.my_tag_2:my_value_2"]
),