mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-26 09:35:23 +00:00
feat(ingest): enable use_queries_v2 by default for snowflake/bigquery (#13601)
Co-authored-by: Sergio Gómez Villamor <sgomezvillamor@gmail.com>
This commit is contained in:
parent
9a8673c96e
commit
29ba3673fd
@ -42,6 +42,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
|
|||||||
- `DataHubGraph.parse_sql_lineage(default_dialect=...)` → `DataHubGraph.parse_sql_lineage(override_dialect=...)`
|
- `DataHubGraph.parse_sql_lineage(default_dialect=...)` → `DataHubGraph.parse_sql_lineage(override_dialect=...)`
|
||||||
- `LineageClient.add_lineage_via_sql(default_dialect=...)` → `LineageClient.add_lineage_via_sql(override_dialect=...)`
|
- `LineageClient.add_lineage_via_sql(default_dialect=...)` → `LineageClient.add_lineage_via_sql(override_dialect=...)`
|
||||||
- #14059: The `acryl-datahub-gx-plugin` now requires pydantic v2, which means the effective minimum supported version of GX is 0.17.15 (from Sept 2023).
|
- #14059: The `acryl-datahub-gx-plugin` now requires pydantic v2, which means the effective minimum supported version of GX is 0.17.15 (from Sept 2023).
|
||||||
|
- #13601: The `use_queries_v2` flag is now enabled by default for Snowflake and BigQuery ingestion. This improves the quality of lineage and quantity of queries extracted.
|
||||||
|
|
||||||
### Known Issues
|
### Known Issues
|
||||||
|
|
||||||
|
@ -1,12 +1,9 @@
|
|||||||
source:
|
source:
|
||||||
type: snowflake
|
type: snowflake
|
||||||
config:
|
config:
|
||||||
# This option is recommended to be used to ingest all lineage
|
# This option is recommended to be used to ingest all lineage on the first run.
|
||||||
ignore_start_time_lineage: true
|
ignore_start_time_lineage: true
|
||||||
|
|
||||||
# This flag tells the snowflake ingestion to use the more advanced query parsing. This will become the default eventually.
|
|
||||||
use_queries_v2: true
|
|
||||||
|
|
||||||
# Coordinates
|
# Coordinates
|
||||||
account_id: "abc48144"
|
account_id: "abc48144"
|
||||||
warehouse: "COMPUTE_WH"
|
warehouse: "COMPUTE_WH"
|
||||||
|
@ -342,7 +342,7 @@ class BigQueryV2Config(
|
|||||||
)
|
)
|
||||||
|
|
||||||
use_queries_v2: bool = Field(
|
use_queries_v2: bool = Field(
|
||||||
default=False,
|
default=True,
|
||||||
description="If enabled, uses the new queries extractor to extract queries from bigquery.",
|
description="If enabled, uses the new queries extractor to extract queries from bigquery.",
|
||||||
)
|
)
|
||||||
include_queries: bool = Field(
|
include_queries: bool = Field(
|
||||||
|
@ -236,7 +236,7 @@ class SnowflakeV2Config(
|
|||||||
)
|
)
|
||||||
|
|
||||||
use_queries_v2: bool = Field(
|
use_queries_v2: bool = Field(
|
||||||
default=False,
|
default=True,
|
||||||
description="If enabled, uses the new queries extractor to extract queries from snowflake.",
|
description="If enabled, uses the new queries extractor to extract queries from snowflake.",
|
||||||
)
|
)
|
||||||
include_queries: bool = Field(
|
include_queries: bool = Field(
|
||||||
|
@ -457,7 +457,7 @@ def test_bigquery_queries_v2_ingest(
|
|||||||
# if use_queries_v2 is set.
|
# if use_queries_v2 is set.
|
||||||
pipeline_config_dict: Dict[str, Any] = recipe(
|
pipeline_config_dict: Dict[str, Any] = recipe(
|
||||||
mcp_output_path=mcp_output_path,
|
mcp_output_path=mcp_output_path,
|
||||||
source_config_override={"use_queries_v2": True, "include_table_lineage": False},
|
source_config_override={"include_table_lineage": False},
|
||||||
)
|
)
|
||||||
|
|
||||||
run_and_get_pipeline(pipeline_config_dict)
|
run_and_get_pipeline(pipeline_config_dict)
|
||||||
@ -564,7 +564,6 @@ LIMIT 100
|
|||||||
pipeline_config_dict: Dict[str, Any] = recipe(
|
pipeline_config_dict: Dict[str, Any] = recipe(
|
||||||
mcp_output_path=mcp_output_path,
|
mcp_output_path=mcp_output_path,
|
||||||
source_config_override={
|
source_config_override={
|
||||||
"use_queries_v2": True,
|
|
||||||
"include_schema_metadata": False,
|
"include_schema_metadata": False,
|
||||||
"include_table_lineage": True,
|
"include_table_lineage": True,
|
||||||
"include_usage_statistics": True,
|
"include_usage_statistics": True,
|
||||||
|
@ -125,6 +125,7 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
|
|||||||
validate_upstreams_against_patterns=False,
|
validate_upstreams_against_patterns=False,
|
||||||
include_operational_stats=True,
|
include_operational_stats=True,
|
||||||
incremental_lineage=False,
|
incremental_lineage=False,
|
||||||
|
use_queries_v2=False,
|
||||||
start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace(
|
start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace(
|
||||||
tzinfo=timezone.utc
|
tzinfo=timezone.utc
|
||||||
),
|
),
|
||||||
@ -220,6 +221,7 @@ def test_snowflake_tags_as_structured_properties(
|
|||||||
password="TST_PWD",
|
password="TST_PWD",
|
||||||
match_fully_qualified_names=True,
|
match_fully_qualified_names=True,
|
||||||
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
|
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
|
||||||
|
use_queries_v2=False,
|
||||||
include_technical_schema=True,
|
include_technical_schema=True,
|
||||||
include_table_lineage=False,
|
include_table_lineage=False,
|
||||||
include_column_lineage=False,
|
include_column_lineage=False,
|
||||||
@ -286,6 +288,7 @@ def test_snowflake_private_link_and_incremental_mcps(
|
|||||||
include_views=True,
|
include_views=True,
|
||||||
include_usage_stats=False,
|
include_usage_stats=False,
|
||||||
format_sql_queries=True,
|
format_sql_queries=True,
|
||||||
|
use_queries_v2=False,
|
||||||
incremental_lineage=False,
|
incremental_lineage=False,
|
||||||
incremental_properties=True,
|
incremental_properties=True,
|
||||||
include_operational_stats=False,
|
include_operational_stats=False,
|
||||||
|
@ -59,6 +59,7 @@ def snowflake_pipeline_config(tmp_path):
|
|||||||
match_fully_qualified_names=True,
|
match_fully_qualified_names=True,
|
||||||
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
|
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
|
||||||
include_usage_stats=False,
|
include_usage_stats=False,
|
||||||
|
use_queries_v2=False,
|
||||||
start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace(
|
start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace(
|
||||||
tzinfo=timezone.utc,
|
tzinfo=timezone.utc,
|
||||||
),
|
),
|
||||||
|
@ -32,6 +32,7 @@ def stateful_pipeline_config(include_tables: bool) -> PipelineConfig:
|
|||||||
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
|
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
|
||||||
include_tables=include_tables,
|
include_tables=include_tables,
|
||||||
incremental_lineage=False,
|
incremental_lineage=False,
|
||||||
|
use_queries_v2=False,
|
||||||
stateful_ingestion=StatefulStaleMetadataRemovalConfig.parse_obj(
|
stateful_ingestion=StatefulStaleMetadataRemovalConfig.parse_obj(
|
||||||
{
|
{
|
||||||
"enabled": True,
|
"enabled": True,
|
||||||
|
@ -33,6 +33,7 @@ def test_snowflake_tag_pattern():
|
|||||||
include_column_lineage=False,
|
include_column_lineage=False,
|
||||||
include_usage_stats=False,
|
include_usage_stats=False,
|
||||||
include_operational_stats=False,
|
include_operational_stats=False,
|
||||||
|
use_queries_v2=False,
|
||||||
extract_tags=TagOption.without_lineage,
|
extract_tags=TagOption.without_lineage,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -76,6 +77,7 @@ def test_snowflake_tag_pattern_deny():
|
|||||||
include_column_lineage=False,
|
include_column_lineage=False,
|
||||||
include_usage_stats=False,
|
include_usage_stats=False,
|
||||||
include_operational_stats=False,
|
include_operational_stats=False,
|
||||||
|
use_queries_v2=False,
|
||||||
extract_tags=TagOption.without_lineage,
|
extract_tags=TagOption.without_lineage,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -116,6 +118,7 @@ def test_snowflake_structured_property_pattern_deny():
|
|||||||
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
|
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
|
||||||
extract_tags_as_structured_properties=True,
|
extract_tags_as_structured_properties=True,
|
||||||
structured_properties_template_cache_invalidation_interval=0,
|
structured_properties_template_cache_invalidation_interval=0,
|
||||||
|
use_queries_v2=False,
|
||||||
tag_pattern=AllowDenyPattern(
|
tag_pattern=AllowDenyPattern(
|
||||||
deny=["TEST_DB.TEST_SCHEMA.my_tag_2:my_value_2"]
|
deny=["TEST_DB.TEST_SCHEMA.my_tag_2:my_value_2"]
|
||||||
),
|
),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user