| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  | #  Copyright 2022 Collate | 
					
						
							|  |  |  | #  Licensed under the Apache License, Version 2.0 (the "License"); | 
					
						
							|  |  |  | #  you may not use this file except in compliance with the License. | 
					
						
							|  |  |  | #  You may obtain a copy of the License at | 
					
						
							|  |  |  | #  http://www.apache.org/licenses/LICENSE-2.0 | 
					
						
							|  |  |  | #  Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  | #  distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  | #  See the License for the specific language governing permissions and | 
					
						
							|  |  |  | #  limitations under the License. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | Hive E2E tests | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from typing import List | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common.test_cli_db import CliCommonDB | 
					
						
							|  |  |  | from .common_e2e_sqa_mixins import SQACommonMethods | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class HiveCliTest(CliCommonDB.TestSuite, SQACommonMethods): | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |     prepare_e2e: List[str] = [ | 
					
						
							|  |  |  |         "DROP DATABASE IF EXISTS e2e_cli_tests CASCADE", | 
					
						
							|  |  |  |         "CREATE DATABASE e2e_cli_tests", | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         CREATE TABLE IF NOT EXISTS e2e_cli_tests.persons_profile ( | 
					
						
							|  |  |  |             person_id int, | 
					
						
							|  |  |  |             full_name varchar(255), | 
					
						
							|  |  |  |             birthdate date | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         """,
 | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         INSERT INTO e2e_cli_tests.persons_profile (person_id, full_name, birthdate) VALUES | 
					
						
							|  |  |  |             (1,'Peter Parker', '2004-08-10'), | 
					
						
							|  |  |  |             (2,'Bruce Banner', '1988-12-18'), | 
					
						
							|  |  |  |             (3,'Steve Rogers', '1988-07-04'), | 
					
						
							|  |  |  |             (4,'Natasha Romanoff', '1997-12-03'), | 
					
						
							|  |  |  |             (5,'Wanda Maximoff', '1998-02-10'), | 
					
						
							|  |  |  |             (6,'Diana Prince', '1976-03-17') | 
					
						
							|  |  |  |         """,
 | 
					
						
							|  |  |  |     ] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  |     create_table_query: str = """
 | 
					
						
							| 
									
										
										
										
											2023-04-25 16:05:49 +02:00
										 |  |  |         CREATE TABLE IF NOT EXISTS e2e_cli_tests.persons ( | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  |             person_id int, | 
					
						
							|  |  |  |             full_name varchar(255), | 
					
						
							|  |  |  |             birthdate date | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     create_view_query: str = """
 | 
					
						
							| 
									
										
										
										
											2023-04-25 16:05:49 +02:00
										 |  |  |         CREATE OR REPLACE VIEW e2e_cli_tests.view_persons AS | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  |             SELECT * | 
					
						
							|  |  |  |             FROM e2e_cli_tests.persons | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     insert_data_queries: List[str] = [ | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |     INSERT INTO e2e_cli_tests.persons (person_id, full_name, birthdate) VALUES | 
					
						
							|  |  |  |         (1,'Peter Parker', '2004-08-10'), | 
					
						
							|  |  |  |         (2,'Bruce Banner', '1988-12-18'), | 
					
						
							|  |  |  |         (3,'Steve Rogers', '1988-07-04'), | 
					
						
							|  |  |  |         (4,'Natasha Romanoff', '1997-12-03'), | 
					
						
							|  |  |  |         (5,'Wanda Maximoff', '1998-02-10'), | 
					
						
							|  |  |  |         (6,'Diana Prince', '1976-03-17') | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     drop_table_query: str = """
 | 
					
						
							|  |  |  |         DROP TABLE IF EXISTS e2e_cli_tests.persons | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     drop_view_query: str = """
 | 
					
						
							|  |  |  |         DROP VIEW  IF EXISTS e2e_cli_tests.view_persons | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def setUp(self) -> None: | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |         with self.engine.connect() as connection: | 
					
						
							|  |  |  |             for sql_statements in self.prepare_e2e: | 
					
						
							|  |  |  |                 connection.execute(sql_statements) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  |         self.create_table_and_view() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def tearDown(self) -> None: | 
					
						
							|  |  |  |         self.delete_table_and_view() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def create_table_and_view(self) -> None: | 
					
						
							|  |  |  |         SQACommonMethods.create_table_and_view(self) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def delete_table_and_view(self) -> None: | 
					
						
							|  |  |  |         SQACommonMethods.delete_table_and_view(self) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def get_connector_name() -> str: | 
					
						
							|  |  |  |         return "hive" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def expected_tables() -> int: | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |         return 3 | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def inserted_rows_count(self) -> int: | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |         # For the persons table | 
					
						
							|  |  |  |         return 6 | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def view_column_lineage_count(self) -> int: | 
					
						
							|  |  |  |         """view was created from `CREATE VIEW xyz AS (SELECT * FROM abc)`
 | 
					
						
							|  |  |  |         which does not propagate column lineage | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def fqn_created_table() -> str: | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |         return "e2e_hive.default.e2e_cli_tests.persons_profile" | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def _fqn_deleted_table() -> str: | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |         return "e2e_hive.default.e2e_cli_tests.view_persons" | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def get_profiler_time_partition() -> dict: | 
					
						
							|  |  |  |         return { | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |             "fullyQualifiedName": "e2e_hive.default.e2e_cli_tests.persons_profile", | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  |             "partitionConfig": { | 
					
						
							|  |  |  |                 "enablePartitioning": True, | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |                 "partitionColumnName": "birthdate", | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  |                 "partitionIntervalType": "TIME-UNIT", | 
					
						
							|  |  |  |                 "partitionInterval": 50, | 
					
						
							|  |  |  |                 "partitionIntervalUnit": "YEAR", | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def get_includes_schemas() -> List[str]: | 
					
						
							|  |  |  |         return ["e2e_cli_tests"] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def get_includes_tables() -> List[str]: | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |         return ["persons"] | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def get_excludes_tables() -> List[str]: | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |         return ["my_table"] | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def expected_filtered_schema_includes() -> int: | 
					
						
							|  |  |  |         return 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def expected_filtered_schema_excludes() -> int: | 
					
						
							|  |  |  |         return 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def expected_filtered_table_includes() -> int: | 
					
						
							| 
									
										
										
										
											2023-10-19 17:49:02 +05:30
										 |  |  |         return 1 | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def expected_filtered_table_excludes() -> int: | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |         return 2 | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def expected_filtered_mix() -> int: | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |         return 2 | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def get_profiler_time_partition_results() -> dict: | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             "table_profile": { | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |                 "columnCount": 3.0, | 
					
						
							|  |  |  |                 "rowCount": 6.0, | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  |             }, | 
					
						
							|  |  |  |             "column_profile": [ | 
					
						
							|  |  |  |                 { | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |                     "person_id": { | 
					
						
							|  |  |  |                         "valuesCount": 6, | 
					
						
							|  |  |  |                         "valuesPercentage": None, | 
					
						
							|  |  |  |                         "validCount": None, | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  |                         "duplicateCount": None, | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |                         "nullCount": 0, | 
					
						
							|  |  |  |                         "nullProportion": 0, | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  |                         "missingPercentage": None, | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |                         "missingCount": None, | 
					
						
							|  |  |  |                         "uniqueCount": 6, | 
					
						
							|  |  |  |                         "uniqueProportion": 1, | 
					
						
							|  |  |  |                         "distinctCount": 6, | 
					
						
							|  |  |  |                         "distinctProportion": 1, | 
					
						
							|  |  |  |                         "min": 1, | 
					
						
							|  |  |  |                         "max": 6, | 
					
						
							|  |  |  |                         "minLength": None, | 
					
						
							|  |  |  |                         "maxLength": None, | 
					
						
							|  |  |  |                         "mean": 3.5, | 
					
						
							|  |  |  |                         "sum": 21, | 
					
						
							|  |  |  |                         "stddev": 1.707825127659933, | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  |                         "variance": None, | 
					
						
							| 
									
										
										
										
											2023-05-30 17:15:55 +02:00
										 |  |  |                         "median": 3.5, | 
					
						
							|  |  |  |                         "firstQuartile": 2.25, | 
					
						
							|  |  |  |                         "thirdQuartile": 4.75, | 
					
						
							|  |  |  |                         "interQuartileRange": 2.5, | 
					
						
							|  |  |  |                         "nonParametricSkew": 0, | 
					
						
							|  |  |  |                         "histogram": { | 
					
						
							|  |  |  |                             "boundaries": ["1.00 to 3.75", "3.75 and up"], | 
					
						
							|  |  |  |                             "frequencies": [3, 3], | 
					
						
							|  |  |  |                         }, | 
					
						
							| 
									
										
										
										
											2023-04-21 17:45:12 +02:00
										 |  |  |                     } | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             ], | 
					
						
							|  |  |  |         } |