mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-21 00:21:06 +00:00
306 lines
7.8 KiB
Python
306 lines
7.8 KiB
Python
from datahub.utilities.bigquery_sql_parser import BigQuerySQLParser
|
|
|
|
|
|
def test_bigquery_sql_parser_comments_are_removed():
|
|
parser = BigQuerySQLParser(
|
|
sql_query="""
|
|
/*
|
|
HERE IS A STANDARD COMMENT BLOCK
|
|
THIS WILL NOT BREAK sqllineage
|
|
*/
|
|
CREATE OR REPLACE TABLE `project.dataset.test_view` AS
|
|
#This, comment will not break sqllineage
|
|
SELECT foo
|
|
-- this comment will not break sqllineage either
|
|
# this comment will not break sqllineage either
|
|
FROM `project.dataset.src_table`
|
|
"""
|
|
)
|
|
|
|
assert (
|
|
parser._parsed_sql_query
|
|
== """CREATE OR REPLACE TABLE `project.dataset.test_view` AS SELECT foo
|
|
FROM `project.dataset.src_table`"""
|
|
)
|
|
|
|
assert parser.get_tables() == ["project.dataset.src_table"]
|
|
|
|
|
|
def test_bigquery_sql_parser_formats_input_sql():
|
|
parser = BigQuerySQLParser(
|
|
sql_query="""
|
|
CREATE OR REPLACE TABLE `project.dataset.test_view` AS
|
|
SELECT foo FROM `project.dataset.src_table_a` AS a
|
|
INNER JOIN `project.dataset.src_table_b` AS b ON a.key_field = b.key_field
|
|
"""
|
|
)
|
|
|
|
assert (
|
|
parser._parsed_sql_query
|
|
== """CREATE OR REPLACE TABLE `project.dataset.test_view` AS SELECT foo
|
|
FROM `project.dataset.src_table_a` AS a
|
|
INNER JOIN `project.dataset.src_table_b` AS b
|
|
ON a.key_field = b.key_field"""
|
|
)
|
|
|
|
assert parser.get_tables() == [
|
|
"project.dataset.src_table_a",
|
|
"project.dataset.src_table_b",
|
|
]
|
|
|
|
|
|
def test_remove_comma_before_from():
|
|
assert (
|
|
BigQuerySQLParser._remove_comma_before_from(
|
|
"""
|
|
select a, b,from `project.dataset.table_name_1`
|
|
"""
|
|
)
|
|
== """
|
|
select a, b from `project.dataset.table_name_1`
|
|
"""
|
|
)
|
|
|
|
assert (
|
|
BigQuerySQLParser._remove_comma_before_from(
|
|
"""
|
|
select a, b from `project.dataset.table_name_1`
|
|
"""
|
|
)
|
|
== """
|
|
select a, b from `project.dataset.table_name_1`
|
|
"""
|
|
)
|
|
|
|
assert (
|
|
BigQuerySQLParser._remove_comma_before_from(
|
|
"""
|
|
select
|
|
a,
|
|
b,
|
|
from `project.dataset.table_name_1`
|
|
"""
|
|
)
|
|
== """
|
|
select
|
|
a,
|
|
b from `project.dataset.table_name_1`
|
|
"""
|
|
)
|
|
|
|
|
|
def test_bigquery_sql_parser_subquery():
|
|
parser = BigQuerySQLParser(
|
|
sql_query="""
|
|
create or replace table smoke_test_db.table_from_view_and_table
|
|
as (select b.date_utc, v.revenue from smoke_test_db.base_table b, smoke_test_db.view_from_table v
|
|
"""
|
|
)
|
|
assert parser.get_tables() == [
|
|
"smoke_test_db.base_table",
|
|
"smoke_test_db.view_from_table",
|
|
]
|
|
|
|
|
|
def test_bigquery_sql_parser_comment_sign_switched_correctly():
|
|
sql_query = BigQuerySQLParser._parse_bigquery_comment_sign(
|
|
"""
|
|
#upper comment
|
|
SELECT * FROM hello
|
|
# lower comment
|
|
"""
|
|
)
|
|
|
|
assert (
|
|
sql_query
|
|
== """
|
|
-- upper comment
|
|
SELECT * FROM hello
|
|
-- lower comment
|
|
"""
|
|
)
|
|
|
|
|
|
def test_bigquery_sql_parser_keyword_from_is_escaped_if_used_as_fieldname():
|
|
sql_query = BigQuerySQLParser._escape_keyword_from_as_field_name(
|
|
"""
|
|
SELECT hello.from AS col FROM hello
|
|
"""
|
|
)
|
|
|
|
assert (
|
|
sql_query
|
|
== """
|
|
SELECT `hello.from` AS col FROM hello
|
|
"""
|
|
)
|
|
|
|
|
|
def test_bigquery_sql_parser_first_cte_name_is_escaped():
|
|
sql_query = BigQuerySQLParser._escape_cte_name_after_keyword_with(
|
|
"""
|
|
CREATE OR REPLACE VIEW `test_view` AS
|
|
WITH cte_1 AS (
|
|
SELECT * FROM foo
|
|
),
|
|
cte_2 AS (
|
|
SELECT * FROM bar
|
|
)
|
|
SELECT * FROM cte_1 UNION ALL
|
|
SELECT * FROM cte_2
|
|
"""
|
|
)
|
|
|
|
assert (
|
|
sql_query
|
|
== """
|
|
CREATE OR REPLACE VIEW `test_view` AS
|
|
WITH `cte_1` AS (
|
|
SELECT * FROM foo
|
|
),
|
|
cte_2 AS (
|
|
SELECT * FROM bar
|
|
)
|
|
SELECT * FROM cte_1 UNION ALL
|
|
SELECT * FROM cte_2
|
|
"""
|
|
)
|
|
|
|
|
|
def test_bigquery_sql_parser_table_name_is_escaped_at_create_statement():
|
|
sql_query_create = BigQuerySQLParser._escape_table_or_view_name_at_create_statement(
|
|
"""
|
|
CREATE TABLE project.dataset.test_table AS
|
|
col_1 STRING,
|
|
col_2 STRING
|
|
"""
|
|
)
|
|
|
|
sql_query_create_or_replace = BigQuerySQLParser._escape_table_or_view_name_at_create_statement(
|
|
"""
|
|
CREATE OR REPLACE TABLE project.dataset.test_table AS
|
|
col_1 STRING,
|
|
col_2 STRING
|
|
"""
|
|
)
|
|
|
|
assert (
|
|
sql_query_create
|
|
== """
|
|
CREATE TABLE `project.dataset.test_table` AS
|
|
col_1 STRING,
|
|
col_2 STRING
|
|
"""
|
|
)
|
|
assert (
|
|
sql_query_create_or_replace
|
|
== """
|
|
CREATE OR REPLACE TABLE `project.dataset.test_table` AS
|
|
col_1 STRING,
|
|
col_2 STRING
|
|
"""
|
|
)
|
|
|
|
|
|
def test_bigquery_sql_parser_view_name_is_escaped_at_create_statement():
|
|
sql_query_create = BigQuerySQLParser._escape_table_or_view_name_at_create_statement(
|
|
"""
|
|
CREATE VIEW project.dataset.test_view AS
|
|
SELECT * FROM project.dataset.src_table
|
|
"""
|
|
)
|
|
|
|
sql_query_create_or_replace = BigQuerySQLParser._escape_table_or_view_name_at_create_statement(
|
|
"""
|
|
CREATE OR REPLACE VIEW project.dataset.test_view AS
|
|
SELECT * FROM project.dataset.src_table
|
|
"""
|
|
)
|
|
|
|
assert (
|
|
sql_query_create
|
|
== """
|
|
CREATE VIEW `project.dataset.test_view` AS
|
|
SELECT * FROM project.dataset.src_table
|
|
"""
|
|
)
|
|
assert (
|
|
sql_query_create_or_replace
|
|
== """
|
|
CREATE OR REPLACE VIEW `project.dataset.test_view` AS
|
|
SELECT * FROM project.dataset.src_table
|
|
"""
|
|
)
|
|
|
|
|
|
def test_bigquery_sql_parser_object_name_is_escaped_after_keyword_from():
|
|
sql_query = BigQuerySQLParser._escape_object_name_after_keyword_from(
|
|
"""
|
|
CREATE OR REPLACE VIEW `project.dataset.test_view` AS
|
|
SELECT * FROM src-project.dataset.src_table_a UNION ALL
|
|
SELECT * FROM project.dataset.src_table_b
|
|
"""
|
|
)
|
|
|
|
assert (
|
|
sql_query
|
|
== """
|
|
CREATE OR REPLACE VIEW `project.dataset.test_view` AS
|
|
SELECT * FROM `src-project.dataset.src_table_a` UNION ALL
|
|
SELECT * FROM `project.dataset.src_table_b`
|
|
"""
|
|
)
|
|
|
|
|
|
def test_bigquery_sql_parser_field_name_is_not_escaped_after_keyword_from_in_datetime_functions():
|
|
sql_query = BigQuerySQLParser._escape_object_name_after_keyword_from(
|
|
"""
|
|
CREATE OR REPLACE VIEW `project.dataset.test_view` AS
|
|
SELECT
|
|
EXTRACT(MICROSECOND FROM time_field) AS col_1,
|
|
EXTRACT(MILLISECOND FROM time_field) AS col_2,
|
|
EXTRACT(SECOND FROM time_field) AS col_3,
|
|
EXTRACT(MINUTE FROM time_field) AS col_4,
|
|
EXTRACT(HOUR FROM time_field) AS col_5,
|
|
EXTRACT(DAYOFWEEK FROM time_field) AS col_6,
|
|
EXTRACT(DAY FROM time_field) AS col_7,
|
|
EXTRACT(DAYOFYEAR FROM time_field) AS col_8,
|
|
EXTRACT(WEEK FROM time_field) AS col_9,
|
|
EXTRACT(WEEK FROM time_field) AS col_10,
|
|
EXTRACT(ISOWEEK FROM time_field) AS col_11,
|
|
EXTRACT(MONTH FROM time_field) AS col_12,
|
|
EXTRACT(QUARTER FROM time_field) AS col_13,
|
|
EXTRACT(YEAR FROM time_field) AS col_14,
|
|
EXTRACT(ISOYEAR FROM time_field) AS col_15,
|
|
EXTRACT(DATE FROM time_field) AS col_16,
|
|
EXTRACT(TIME FROM time_field) AS col_17
|
|
FROM src-project.dataset.src_table_a
|
|
"""
|
|
)
|
|
|
|
assert (
|
|
sql_query
|
|
== """
|
|
CREATE OR REPLACE VIEW `project.dataset.test_view` AS
|
|
SELECT
|
|
EXTRACT(MICROSECOND FROM time_field) AS col_1,
|
|
EXTRACT(MILLISECOND FROM time_field) AS col_2,
|
|
EXTRACT(SECOND FROM time_field) AS col_3,
|
|
EXTRACT(MINUTE FROM time_field) AS col_4,
|
|
EXTRACT(HOUR FROM time_field) AS col_5,
|
|
EXTRACT(DAYOFWEEK FROM time_field) AS col_6,
|
|
EXTRACT(DAY FROM time_field) AS col_7,
|
|
EXTRACT(DAYOFYEAR FROM time_field) AS col_8,
|
|
EXTRACT(WEEK FROM time_field) AS col_9,
|
|
EXTRACT(WEEK FROM time_field) AS col_10,
|
|
EXTRACT(ISOWEEK FROM time_field) AS col_11,
|
|
EXTRACT(MONTH FROM time_field) AS col_12,
|
|
EXTRACT(QUARTER FROM time_field) AS col_13,
|
|
EXTRACT(YEAR FROM time_field) AS col_14,
|
|
EXTRACT(ISOYEAR FROM time_field) AS col_15,
|
|
EXTRACT(DATE FROM time_field) AS col_16,
|
|
EXTRACT(TIME FROM time_field) AS col_17
|
|
FROM `src-project.dataset.src_table_a`
|
|
"""
|
|
)
|