2025-04-03 10:39:47 +05:30
|
|
|
# Copyright 2025 Collate
|
|
|
|
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
2022-02-04 21:50:20 +05:30
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
2025-04-03 10:39:47 +05:30
|
|
|
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
2022-02-04 21:50:20 +05:30
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
"""
|
2022-07-18 18:50:27 +02:00
|
|
|
Validate query parser logic
|
2022-02-04 21:50:20 +05:30
|
|
|
"""
|
2022-07-18 18:50:27 +02:00
|
|
|
|
2022-02-04 21:50:20 +05:30
|
|
|
from unittest import TestCase
|
|
|
|
|
2024-02-05 19:44:08 +05:30
|
|
|
from collate_sqllineage.core.models import Column
|
2023-01-03 09:27:45 +01:00
|
|
|
|
2022-07-18 18:50:27 +02:00
|
|
|
from metadata.generated.schema.type.tableUsageCount import TableColumn, TableColumnJoin
|
2023-03-08 20:49:02 +01:00
|
|
|
from metadata.ingestion.lineage.models import Dialect
|
2022-11-30 16:02:21 +01:00
|
|
|
from metadata.ingestion.lineage.parser import LineageParser
|
2022-07-18 18:50:27 +02:00
|
|
|
|
|
|
|
|
|
|
|
class QueryParserTests(TestCase):
|
|
|
|
"""
|
|
|
|
Check methods from query_parser.py
|
|
|
|
"""
|
2022-02-04 21:50:20 +05:30
|
|
|
|
2023-03-08 20:49:02 +01:00
|
|
|
col_lineage = """
|
2022-07-18 18:50:27 +02:00
|
|
|
SELECT
|
|
|
|
a.col1,
|
|
|
|
a.col2 + b.col2 AS col2,
|
|
|
|
case
|
|
|
|
when col1 = 3 then 'hello'
|
|
|
|
else 'bye'
|
|
|
|
end as new_col
|
|
|
|
FROM foo a
|
|
|
|
JOIN db.grault b
|
|
|
|
ON a.col1 = b.col1
|
|
|
|
JOIN db.holis c
|
|
|
|
ON a.col1 = c.abc
|
|
|
|
JOIN db.random d
|
|
|
|
ON a.col2 = d.col2
|
|
|
|
WHERE a.col3 = 'abc'
|
|
|
|
"""
|
2022-02-04 21:50:20 +05:30
|
|
|
|
2022-11-30 16:02:21 +01:00
|
|
|
parser = LineageParser(col_lineage)
|
2023-03-08 20:49:02 +01:00
|
|
|
parser_with_dialect = LineageParser(col_lineage, dialect=Dialect.TSQL)
|
2022-07-18 18:50:27 +02:00
|
|
|
|
|
|
|
def test_involved_tables(self):
|
2023-03-08 20:49:02 +01:00
|
|
|
expected_tables = {"db.grault", "db.holis", "<default>.foo", "db.random"}
|
2022-11-30 16:02:21 +01:00
|
|
|
tables = {str(table) for table in self.parser.involved_tables}
|
2023-03-08 20:49:02 +01:00
|
|
|
self.assertEqual(tables, expected_tables)
|
|
|
|
tables = {str(table) for table in self.parser_with_dialect.involved_tables}
|
|
|
|
self.assertEqual(tables, expected_tables)
|
2022-07-18 18:50:27 +02:00
|
|
|
|
|
|
|
def test_clean_parser_table_list(self):
|
2023-03-08 20:49:02 +01:00
|
|
|
expected_tables = {"db.grault", "db.holis", "foo", "db.random"}
|
2022-11-30 16:02:21 +01:00
|
|
|
clean_tables = set(self.parser.clean_table_list)
|
2023-03-08 20:49:02 +01:00
|
|
|
self.assertEqual(clean_tables, expected_tables)
|
|
|
|
clean_tables = set(self.parser_with_dialect.clean_table_list)
|
|
|
|
self.assertEqual(clean_tables, expected_tables)
|
2022-07-18 18:50:27 +02:00
|
|
|
|
2022-12-13 06:40:37 +01:00
|
|
|
def test_bracketed_parser_table_list(self):
|
2023-03-08 20:49:02 +01:00
|
|
|
expected_tables = {"test_schema.test_view", "test_table"}
|
2022-12-13 06:40:37 +01:00
|
|
|
parser = LineageParser(
|
|
|
|
"create view [test_schema].[test_view] as select * from [test_table];"
|
|
|
|
)
|
|
|
|
clean_tables = set(parser.clean_table_list)
|
2023-03-08 20:49:02 +01:00
|
|
|
self.assertEqual(clean_tables, expected_tables)
|
|
|
|
parser = LineageParser(
|
|
|
|
"create view [test_schema].[test_view] as select * from [test_table];",
|
|
|
|
dialect=Dialect.TSQL,
|
|
|
|
)
|
|
|
|
clean_tables = set(parser.clean_table_list)
|
|
|
|
self.assertEqual(clean_tables, expected_tables)
|
2022-12-13 06:40:37 +01:00
|
|
|
|
2022-07-18 18:50:27 +02:00
|
|
|
def test_parser_table_aliases(self):
|
2023-03-08 20:49:02 +01:00
|
|
|
expected_tables = {
|
|
|
|
"b": "db.grault",
|
|
|
|
"c": "db.holis",
|
|
|
|
"a": "foo",
|
|
|
|
"d": "db.random",
|
|
|
|
}
|
2022-11-30 16:02:21 +01:00
|
|
|
aliases = self.parser.table_aliases
|
2023-03-08 20:49:02 +01:00
|
|
|
self.assertEqual(aliases, expected_tables)
|
|
|
|
aliases = self.parser_with_dialect.table_aliases
|
|
|
|
self.assertEqual(aliases, expected_tables)
|
2022-07-18 18:50:27 +02:00
|
|
|
|
|
|
|
def test_get_table_joins(self):
|
2022-02-04 21:50:20 +05:30
|
|
|
"""
|
2022-07-18 18:50:27 +02:00
|
|
|
main logic point
|
2022-02-04 21:50:20 +05:30
|
|
|
"""
|
2023-03-08 20:49:02 +01:00
|
|
|
expected_joins = [
|
|
|
|
TableColumnJoin(
|
|
|
|
tableColumn=TableColumn(table="foo", column="col1"),
|
|
|
|
joinedWith=[
|
|
|
|
TableColumn(table="db.grault", column="col1"),
|
|
|
|
TableColumn(table="db.holis", column="abc"),
|
|
|
|
],
|
|
|
|
),
|
|
|
|
TableColumnJoin(
|
|
|
|
tableColumn=TableColumn(table="foo", column="col2"),
|
|
|
|
joinedWith=[
|
|
|
|
TableColumn(table="db.random", column="col2"),
|
|
|
|
],
|
|
|
|
),
|
|
|
|
]
|
|
|
|
|
2022-11-30 16:02:21 +01:00
|
|
|
joins = self.parser.table_joins
|
2022-07-18 18:50:27 +02:00
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
joins["foo"],
|
2023-03-08 20:49:02 +01:00
|
|
|
expected_joins,
|
|
|
|
)
|
|
|
|
|
|
|
|
joins = self.parser_with_dialect.table_joins
|
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
joins["foo"],
|
|
|
|
expected_joins,
|
2022-07-18 18:50:27 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
def test_capitals(self):
|
|
|
|
"""
|
|
|
|
Example on how LineageRunner keeps capitals
|
|
|
|
for column names
|
|
|
|
"""
|
|
|
|
|
|
|
|
query = """
|
|
|
|
SELECT
|
|
|
|
USERS.ID,
|
|
|
|
li.id
|
|
|
|
FROM TESTDB.PUBLIC.USERS
|
|
|
|
JOIN testdb.PUBLIC."lowercase_users" li
|
|
|
|
ON USERS.id = li.ID
|
|
|
|
;
|
|
|
|
"""
|
|
|
|
|
2023-03-08 20:49:02 +01:00
|
|
|
expected_joins = [
|
|
|
|
TableColumnJoin(
|
|
|
|
tableColumn=TableColumn(
|
|
|
|
table="testdb.public.users", column="id"
|
|
|
|
), # lowercase col
|
|
|
|
joinedWith=[
|
|
|
|
TableColumn(
|
|
|
|
table="testdb.public.lowercase_users", column="ID"
|
|
|
|
), # uppercase col
|
|
|
|
],
|
|
|
|
),
|
|
|
|
]
|
|
|
|
|
2022-11-30 16:02:21 +01:00
|
|
|
parser = LineageParser(query)
|
2022-07-18 18:50:27 +02:00
|
|
|
|
2022-11-30 16:02:21 +01:00
|
|
|
joins = parser.table_joins
|
2022-07-18 18:50:27 +02:00
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
joins["testdb.public.users"],
|
2023-03-08 20:49:02 +01:00
|
|
|
expected_joins,
|
|
|
|
)
|
|
|
|
|
|
|
|
parser = LineageParser(query, dialect=Dialect.MYSQL)
|
|
|
|
|
|
|
|
joins = parser.table_joins
|
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
joins["testdb.public.users"],
|
|
|
|
expected_joins,
|
2022-07-18 18:50:27 +02:00
|
|
|
)
|
2022-10-24 17:22:22 +02:00
|
|
|
|
|
|
|
def test_clean_raw_query_copy_grants(self):
|
|
|
|
"""
|
2022-11-30 16:02:21 +01:00
|
|
|
Validate COPY GRANT query cleaning logic
|
2022-10-24 17:22:22 +02:00
|
|
|
"""
|
|
|
|
query = "create or replace view my_view copy grants as select * from my_table"
|
|
|
|
self.assertEqual(
|
2022-11-30 16:02:21 +01:00
|
|
|
LineageParser.clean_raw_query(query),
|
2022-10-24 17:22:22 +02:00
|
|
|
"create or replace view my_view as select * from my_table",
|
|
|
|
)
|
|
|
|
|
|
|
|
def test_clean_raw_query_merge_into(self):
|
|
|
|
"""
|
2022-11-30 16:02:21 +01:00
|
|
|
Validate MERGE INTO query cleaning logic
|
2022-10-24 17:22:22 +02:00
|
|
|
"""
|
|
|
|
query = """
|
2022-10-25 19:29:59 +02:00
|
|
|
/* comment */ merge into table_1 using (select a, b from table_2) when matched update set t.a = 'value'
|
2022-10-24 17:22:22 +02:00
|
|
|
when not matched then insert (table_1.a, table_2.b) values ('value1', 'value2')
|
|
|
|
"""
|
|
|
|
self.assertEqual(
|
2022-11-30 16:02:21 +01:00
|
|
|
LineageParser.clean_raw_query(query),
|
2022-10-25 19:29:59 +02:00
|
|
|
"/* comment */ merge into table_1 using (select a, b from table_2)",
|
2022-10-24 17:22:22 +02:00
|
|
|
)
|
2022-11-30 16:02:21 +01:00
|
|
|
|
|
|
|
def test_clean_raw_query_copy_from(self):
|
|
|
|
"""
|
|
|
|
Validate COPY FROM query cleaning logic
|
|
|
|
"""
|
|
|
|
query = "COPY my_schema.my_table FROM 's3://bucket/path/object.csv';"
|
|
|
|
self.assertEqual(
|
|
|
|
LineageParser.clean_raw_query(query),
|
|
|
|
None,
|
|
|
|
)
|
2023-01-03 09:27:45 +01:00
|
|
|
|
|
|
|
def test_ctes_column_lineage(self):
|
|
|
|
"""
|
|
|
|
Validate we obtain information from Comon Table Expressions
|
|
|
|
"""
|
2024-01-25 10:15:32 +05:30
|
|
|
query = """CREATE TABLE TESTDB.PUBLIC.TARGET AS
|
2023-01-03 09:27:45 +01:00
|
|
|
WITH cte_table AS (
|
|
|
|
SELECT
|
|
|
|
USERS.ID,
|
|
|
|
USERS.NAME
|
|
|
|
FROM TESTDB.PUBLIC.USERS
|
|
|
|
),
|
|
|
|
cte_table2 AS (
|
|
|
|
SELECT
|
|
|
|
ID,
|
|
|
|
NAME
|
|
|
|
FROM cte_table
|
|
|
|
)
|
|
|
|
SELECT
|
|
|
|
ID,
|
|
|
|
NAME
|
|
|
|
FROM cte_table2
|
|
|
|
;
|
|
|
|
"""
|
|
|
|
|
2023-03-08 20:49:02 +01:00
|
|
|
expected_lineage = [
|
|
|
|
(
|
|
|
|
Column("testdb.public.users.id"),
|
|
|
|
Column("testdb.public.target.id"),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
Column("testdb.public.users.name"),
|
|
|
|
Column("testdb.public.target.name"),
|
|
|
|
),
|
|
|
|
]
|
|
|
|
|
2023-01-03 09:27:45 +01:00
|
|
|
parser = LineageParser(query)
|
2023-03-08 20:49:02 +01:00
|
|
|
tables = {str(table) for table in parser.source_tables}
|
|
|
|
self.assertEqual(tables, {"testdb.public.users"})
|
|
|
|
self.assertEqual(
|
|
|
|
parser.column_lineage,
|
|
|
|
expected_lineage,
|
|
|
|
)
|
2023-01-03 09:27:45 +01:00
|
|
|
|
2023-03-08 20:49:02 +01:00
|
|
|
parser = LineageParser(query, dialect=Dialect.MYSQL)
|
2023-01-03 09:27:45 +01:00
|
|
|
tables = {str(table) for table in parser.source_tables}
|
|
|
|
self.assertEqual(tables, {"testdb.public.users"})
|
|
|
|
self.assertEqual(
|
|
|
|
parser.column_lineage,
|
2023-03-08 20:49:02 +01:00
|
|
|
expected_lineage,
|
2023-01-03 09:27:45 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
def test_table_with_single_comment(self):
|
|
|
|
"""
|
|
|
|
Validate we obtain information from Comon Table Expressions
|
|
|
|
"""
|
2024-01-25 10:15:32 +05:30
|
|
|
query = """CREATE TABLE TESTDB.PUBLIC.TARGET AS
|
2023-01-03 09:27:45 +01:00
|
|
|
SELECT
|
|
|
|
ID,
|
|
|
|
-- A comment here
|
|
|
|
NAME
|
|
|
|
FROM TESTDB.PUBLIC.USERS
|
|
|
|
;
|
|
|
|
"""
|
2023-03-08 20:49:02 +01:00
|
|
|
expected_tables = {"testdb.public.users", "testdb.public.target"}
|
|
|
|
expected_lineage = [
|
|
|
|
(Column("testdb.public.users.id"), Column("testdb.public.target.id")),
|
|
|
|
(
|
|
|
|
Column("testdb.public.users.name"),
|
|
|
|
Column("testdb.public.target.name"),
|
|
|
|
),
|
|
|
|
]
|
2023-01-03 09:27:45 +01:00
|
|
|
|
|
|
|
parser = LineageParser(query)
|
2023-03-08 20:49:02 +01:00
|
|
|
tables = {str(table) for table in parser.involved_tables}
|
|
|
|
self.assertEqual(tables, expected_tables)
|
|
|
|
self.assertEqual(
|
|
|
|
parser.column_lineage,
|
|
|
|
expected_lineage,
|
|
|
|
)
|
2023-01-03 09:27:45 +01:00
|
|
|
|
2023-03-08 20:49:02 +01:00
|
|
|
parser = LineageParser(query, Dialect.MYSQL)
|
2023-01-03 09:27:45 +01:00
|
|
|
tables = {str(table) for table in parser.involved_tables}
|
2023-03-08 20:49:02 +01:00
|
|
|
self.assertEqual(tables, expected_tables)
|
2023-01-03 09:27:45 +01:00
|
|
|
self.assertEqual(
|
|
|
|
parser.column_lineage,
|
2023-03-08 20:49:02 +01:00
|
|
|
expected_lineage,
|
2023-01-03 09:27:45 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
def test_table_with_aliases(self):
|
|
|
|
"""
|
|
|
|
Validate we obtain information from Comon Table Expressions
|
|
|
|
"""
|
2024-01-25 10:15:32 +05:30
|
|
|
query = """CREATE TABLE TESTDB.PUBLIC.TARGET AS
|
2023-01-03 09:27:45 +01:00
|
|
|
SELECT
|
|
|
|
ID AS new_identifier,
|
|
|
|
NAME new_name
|
|
|
|
FROM TESTDB.PUBLIC.USERS
|
|
|
|
;
|
|
|
|
"""
|
2023-03-08 20:49:02 +01:00
|
|
|
expected_lineage = [
|
|
|
|
(
|
|
|
|
Column("testdb.public.users.id"),
|
|
|
|
Column("testdb.public.target.new_identifier"),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
Column("testdb.public.users.name"),
|
|
|
|
Column("testdb.public.target.new_name"),
|
|
|
|
),
|
|
|
|
]
|
|
|
|
expected_tables = {"testdb.public.users", "testdb.public.target"}
|
2023-01-03 09:27:45 +01:00
|
|
|
|
|
|
|
parser = LineageParser(query)
|
2023-03-08 20:49:02 +01:00
|
|
|
tables = {str(table) for table in parser.involved_tables}
|
|
|
|
self.assertEqual(tables, expected_tables)
|
|
|
|
self.assertEqual(
|
|
|
|
parser.column_lineage,
|
|
|
|
expected_lineage,
|
|
|
|
)
|
2023-01-03 09:27:45 +01:00
|
|
|
|
2023-03-08 20:49:02 +01:00
|
|
|
parser = LineageParser(query, Dialect.MYSQL)
|
2023-01-03 09:27:45 +01:00
|
|
|
tables = {str(table) for table in parser.involved_tables}
|
2023-03-08 20:49:02 +01:00
|
|
|
self.assertEqual(tables, expected_tables)
|
2023-01-03 09:27:45 +01:00
|
|
|
self.assertEqual(
|
|
|
|
parser.column_lineage,
|
2023-03-08 20:49:02 +01:00
|
|
|
expected_lineage,
|
2023-01-03 09:27:45 +01:00
|
|
|
)
|
2024-01-25 10:15:32 +05:30
|
|
|
|
|
|
|
def test_copy_query(self):
|
|
|
|
"""
|
|
|
|
Validate Copy query is skipped appropriately without any errors
|
|
|
|
"""
|
|
|
|
query = """COPY MY_TABLE col1,col2,col3
|
|
|
|
FROM 's3://bucket/schema/table.csv'
|
|
|
|
WITH CREDENTIALS ''
|
|
|
|
REGION 'US-east-2'
|
|
|
|
"""
|
|
|
|
expected_lineage = []
|
|
|
|
expected_tables = set()
|
|
|
|
|
|
|
|
parser = LineageParser(query)
|
|
|
|
tables = {str(table) for table in parser.involved_tables}
|
|
|
|
self.assertEqual(tables, expected_tables)
|
|
|
|
self.assertEqual(
|
|
|
|
parser.column_lineage,
|
|
|
|
expected_lineage,
|
|
|
|
)
|
|
|
|
|
|
|
|
parser = LineageParser(query, Dialect.MYSQL)
|
|
|
|
tables = {str(table) for table in parser.involved_tables}
|
|
|
|
self.assertEqual(tables, expected_tables)
|
|
|
|
self.assertEqual(
|
|
|
|
parser.column_lineage,
|
|
|
|
expected_lineage,
|
|
|
|
)
|