| 
									
										
										
										
											2022-02-04 21:50:20 +05:30
										 |  |  | #  Copyright 2021 Collate | 
					
						
							|  |  |  | #  Licensed under the Apache License, Version 2.0 (the "License"); | 
					
						
							|  |  |  | #  you may not use this file except in compliance with the License. | 
					
						
							|  |  |  | #  You may obtain a copy of the License at | 
					
						
							|  |  |  | #  http://www.apache.org/licenses/LICENSE-2.0 | 
					
						
							|  |  |  | #  Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  | #  distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  | #  See the License for the specific language governing permissions and | 
					
						
							|  |  |  | #  limitations under the License. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2022-07-18 18:50:27 +02:00
										 |  |  | Validate query parser logic | 
					
						
							| 
									
										
										
										
											2022-02-04 21:50:20 +05:30
										 |  |  | """
 | 
					
						
							| 
									
										
										
										
											2022-07-18 18:50:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-04 21:50:20 +05:30
										 |  |  | from unittest import TestCase | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-07-18 18:50:27 +02:00
										 |  |  | from metadata.generated.schema.type.tableUsageCount import TableColumn, TableColumnJoin | 
					
						
							| 
									
										
										
										
											2022-11-30 16:02:21 +01:00
										 |  |  | from metadata.ingestion.lineage.parser import LineageParser | 
					
						
							| 
									
										
										
										
											2022-07-18 18:50:27 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class QueryParserTests(TestCase): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Check methods from query_parser.py | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2022-02-04 21:50:20 +05:30
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-07-18 18:50:27 +02:00
										 |  |  |     query = col_lineage = """
 | 
					
						
							|  |  |  |         SELECT | 
					
						
							|  |  |  |           a.col1, | 
					
						
							|  |  |  |           a.col2 + b.col2 AS col2, | 
					
						
							|  |  |  |           case  | 
					
						
							|  |  |  |             when col1 = 3 then 'hello' | 
					
						
							|  |  |  |             else 'bye' | 
					
						
							|  |  |  |           end as new_col | 
					
						
							|  |  |  |         FROM foo a | 
					
						
							|  |  |  |         JOIN db.grault b | 
					
						
							|  |  |  |           ON a.col1 = b.col1 | 
					
						
							|  |  |  |         JOIN db.holis c | 
					
						
							|  |  |  |           ON a.col1 = c.abc | 
					
						
							|  |  |  |         JOIN db.random d | 
					
						
							|  |  |  |           ON a.col2 = d.col2 | 
					
						
							|  |  |  |         WHERE a.col3 = 'abc' | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2022-02-04 21:50:20 +05:30
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-11-30 16:02:21 +01:00
										 |  |  |     parser = LineageParser(col_lineage) | 
					
						
							| 
									
										
										
										
											2022-07-18 18:50:27 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_involved_tables(self): | 
					
						
							| 
									
										
										
										
											2022-11-30 16:02:21 +01:00
										 |  |  |         tables = {str(table) for table in self.parser.involved_tables} | 
					
						
							| 
									
										
										
										
											2022-07-18 18:50:27 +02:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             tables, {"db.grault", "db.holis", "<default>.foo", "db.random"} | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_clean_parser_table_list(self): | 
					
						
							| 
									
										
										
										
											2022-11-30 16:02:21 +01:00
										 |  |  |         clean_tables = set(self.parser.clean_table_list) | 
					
						
							| 
									
										
										
										
											2022-07-18 18:50:27 +02:00
										 |  |  |         self.assertEqual(clean_tables, {"db.grault", "db.holis", "foo", "db.random"}) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-13 06:40:37 +01:00
										 |  |  |     def test_bracketed_parser_table_list(self): | 
					
						
							|  |  |  |         parser = LineageParser( | 
					
						
							|  |  |  |             "create view [test_schema].[test_view] as select * from [test_table];" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         clean_tables = set(parser.clean_table_list) | 
					
						
							|  |  |  |         self.assertEqual(clean_tables, {"test_schema.test_view", "test_table"}) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-07-18 18:50:27 +02:00
										 |  |  |     def test_parser_table_aliases(self): | 
					
						
							| 
									
										
										
										
											2022-11-30 16:02:21 +01:00
										 |  |  |         aliases = self.parser.table_aliases | 
					
						
							| 
									
										
										
										
											2022-07-18 18:50:27 +02:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             aliases, {"b": "db.grault", "c": "db.holis", "a": "foo", "d": "db.random"} | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_get_table_joins(self): | 
					
						
							| 
									
										
										
										
											2022-02-04 21:50:20 +05:30
										 |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2022-07-18 18:50:27 +02:00
										 |  |  |         main logic point | 
					
						
							| 
									
										
										
										
											2022-02-04 21:50:20 +05:30
										 |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2022-11-30 16:02:21 +01:00
										 |  |  |         joins = self.parser.table_joins | 
					
						
							| 
									
										
										
										
											2022-07-18 18:50:27 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             joins["foo"], | 
					
						
							|  |  |  |             [ | 
					
						
							|  |  |  |                 TableColumnJoin( | 
					
						
							|  |  |  |                     tableColumn=TableColumn(table="foo", column="col1"), | 
					
						
							|  |  |  |                     joinedWith=[ | 
					
						
							|  |  |  |                         TableColumn(table="db.grault", column="col1"), | 
					
						
							|  |  |  |                         TableColumn(table="db.holis", column="abc"), | 
					
						
							|  |  |  |                     ], | 
					
						
							|  |  |  |                 ), | 
					
						
							|  |  |  |                 TableColumnJoin( | 
					
						
							|  |  |  |                     tableColumn=TableColumn(table="foo", column="col2"), | 
					
						
							|  |  |  |                     joinedWith=[ | 
					
						
							|  |  |  |                         TableColumn(table="db.random", column="col2"), | 
					
						
							|  |  |  |                     ], | 
					
						
							|  |  |  |                 ), | 
					
						
							|  |  |  |             ], | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_capitals(self): | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Example on how LineageRunner keeps capitals | 
					
						
							|  |  |  |         for column names | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         query = """
 | 
					
						
							|  |  |  |          SELECT | 
					
						
							|  |  |  |            USERS.ID, | 
					
						
							|  |  |  |            li.id | 
					
						
							|  |  |  |         FROM TESTDB.PUBLIC.USERS | 
					
						
							|  |  |  |         JOIN testdb.PUBLIC."lowercase_users" li | 
					
						
							|  |  |  |           ON USERS.id = li.ID | 
					
						
							|  |  |  |         ; | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-11-30 16:02:21 +01:00
										 |  |  |         parser = LineageParser(query) | 
					
						
							| 
									
										
										
										
											2022-07-18 18:50:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-11-30 16:02:21 +01:00
										 |  |  |         joins = parser.table_joins | 
					
						
							| 
									
										
										
										
											2022-07-18 18:50:27 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             joins["testdb.public.users"], | 
					
						
							|  |  |  |             [ | 
					
						
							|  |  |  |                 TableColumnJoin( | 
					
						
							|  |  |  |                     tableColumn=TableColumn( | 
					
						
							|  |  |  |                         table="testdb.public.users", column="id" | 
					
						
							|  |  |  |                     ),  # lowercase col | 
					
						
							|  |  |  |                     joinedWith=[ | 
					
						
							|  |  |  |                         TableColumn( | 
					
						
							|  |  |  |                             table="testdb.public.lowercase_users", column="ID" | 
					
						
							|  |  |  |                         ),  # uppercase col | 
					
						
							|  |  |  |                     ], | 
					
						
							|  |  |  |                 ), | 
					
						
							|  |  |  |             ], | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2022-10-24 17:22:22 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_clean_raw_query_copy_grants(self): | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2022-11-30 16:02:21 +01:00
										 |  |  |         Validate COPY GRANT query cleaning logic | 
					
						
							| 
									
										
										
										
											2022-10-24 17:22:22 +02:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         query = "create or replace view my_view copy grants as select * from my_table" | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2022-11-30 16:02:21 +01:00
										 |  |  |             LineageParser.clean_raw_query(query), | 
					
						
							| 
									
										
										
										
											2022-10-24 17:22:22 +02:00
										 |  |  |             "create or replace view my_view as select * from my_table", | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_clean_raw_query_merge_into(self): | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2022-11-30 16:02:21 +01:00
										 |  |  |         Validate MERGE INTO query cleaning logic | 
					
						
							| 
									
										
										
										
											2022-10-24 17:22:22 +02:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         query = """
 | 
					
						
							| 
									
										
										
										
											2022-10-25 19:29:59 +02:00
										 |  |  |             /* comment */ merge into table_1 using (select a, b from table_2) when matched update set t.a = 'value'  | 
					
						
							| 
									
										
										
										
											2022-10-24 17:22:22 +02:00
										 |  |  |             when not matched then insert (table_1.a, table_2.b) values ('value1', 'value2') | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2022-11-30 16:02:21 +01:00
										 |  |  |             LineageParser.clean_raw_query(query), | 
					
						
							| 
									
										
										
										
											2022-10-25 19:29:59 +02:00
										 |  |  |             "/* comment */ merge into table_1 using (select a, b from table_2)", | 
					
						
							| 
									
										
										
										
											2022-10-24 17:22:22 +02:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2022-11-30 16:02:21 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_clean_raw_query_copy_from(self): | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Validate COPY FROM query cleaning logic | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         query = "COPY my_schema.my_table FROM 's3://bucket/path/object.csv';" | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             LineageParser.clean_raw_query(query), | 
					
						
							|  |  |  |             None, | 
					
						
							|  |  |  |         ) |