mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-11-03 20:19:31 +00:00 
			
		
		
		
	* linting: fix python linting * fix: get column types from parquet schema for parquet files * style: python linting * fix: remove displayType check in test as variation depending on OS
		
			
				
	
	
		
			208 lines
		
	
	
		
			6.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			208 lines
		
	
	
		
			6.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#  Copyright 2022 Collate
 | 
						|
#  Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
#  you may not use this file except in compliance with the License.
 | 
						|
#  You may obtain a copy of the License at
 | 
						|
#  http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
#  Unless required by applicable law or agreed to in writing, software
 | 
						|
#  distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
#  See the License for the specific language governing permissions and
 | 
						|
#  limitations under the License.
 | 
						|
 | 
						|
"""
 | 
						|
Hive E2E tests
 | 
						|
"""
 | 
						|
 | 
						|
from typing import List
 | 
						|
 | 
						|
from .common.test_cli_db import CliCommonDB
 | 
						|
from .common_e2e_sqa_mixins import SQACommonMethods
 | 
						|
 | 
						|
 | 
						|
class HiveCliTest(CliCommonDB.TestSuite, SQACommonMethods):
 | 
						|
    prepare_e2e: List[str] = [
 | 
						|
        "DROP DATABASE IF EXISTS e2e_cli_tests CASCADE",
 | 
						|
        "CREATE DATABASE e2e_cli_tests",
 | 
						|
        """
 | 
						|
        CREATE TABLE IF NOT EXISTS e2e_cli_tests.persons_profile (
 | 
						|
            person_id int,
 | 
						|
            full_name varchar(255),
 | 
						|
            birthdate date
 | 
						|
        )
 | 
						|
        """,
 | 
						|
        """
 | 
						|
        INSERT INTO e2e_cli_tests.persons_profile (person_id, full_name, birthdate) VALUES
 | 
						|
            (1,'Peter Parker', '2004-08-10'),
 | 
						|
            (2,'Bruce Banner', '1988-12-18'),
 | 
						|
            (3,'Steve Rogers', '1988-07-04'),
 | 
						|
            (4,'Natasha Romanoff', '1997-12-03'),
 | 
						|
            (5,'Wanda Maximoff', '1998-02-10'),
 | 
						|
            (6,'Diana Prince', '1976-03-17')
 | 
						|
        """,
 | 
						|
    ]
 | 
						|
 | 
						|
    create_table_query: str = """
 | 
						|
        CREATE TABLE IF NOT EXISTS e2e_cli_tests.persons (
 | 
						|
            person_id int,
 | 
						|
            full_name varchar(255),
 | 
						|
            birthdate date
 | 
						|
        )
 | 
						|
    """
 | 
						|
 | 
						|
    create_view_query: str = """
 | 
						|
        CREATE OR REPLACE VIEW e2e_cli_tests.view_persons AS
 | 
						|
            SELECT *
 | 
						|
            FROM e2e_cli_tests.persons
 | 
						|
    """
 | 
						|
 | 
						|
    insert_data_queries: List[str] = [
 | 
						|
        """
 | 
						|
    INSERT INTO e2e_cli_tests.persons (person_id, full_name, birthdate) VALUES
 | 
						|
        (1,'Peter Parker', '2004-08-10'),
 | 
						|
        (2,'Bruce Banner', '1988-12-18'),
 | 
						|
        (3,'Steve Rogers', '1988-07-04'),
 | 
						|
        (4,'Natasha Romanoff', '1997-12-03'),
 | 
						|
        (5,'Wanda Maximoff', '1998-02-10'),
 | 
						|
        (6,'Diana Prince', '1976-03-17')
 | 
						|
    """
 | 
						|
    ]
 | 
						|
 | 
						|
    drop_table_query: str = """
 | 
						|
        DROP TABLE IF EXISTS e2e_cli_tests.persons
 | 
						|
    """
 | 
						|
 | 
						|
    drop_view_query: str = """
 | 
						|
        DROP VIEW  IF EXISTS e2e_cli_tests.view_persons
 | 
						|
    """
 | 
						|
 | 
						|
    def setUp(self) -> None:
 | 
						|
        with self.engine.connect() as connection:
 | 
						|
            for sql_statements in self.prepare_e2e:
 | 
						|
                connection.execute(sql_statements)
 | 
						|
 | 
						|
        self.create_table_and_view()
 | 
						|
 | 
						|
    def tearDown(self) -> None:
 | 
						|
        self.delete_table_and_view()
 | 
						|
 | 
						|
    def create_table_and_view(self) -> None:
 | 
						|
        SQACommonMethods.create_table_and_view(self)
 | 
						|
 | 
						|
    def delete_table_and_view(self) -> None:
 | 
						|
        SQACommonMethods.delete_table_and_view(self)
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def get_connector_name() -> str:
 | 
						|
        return "hive"
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def expected_tables() -> int:
 | 
						|
        return 3
 | 
						|
 | 
						|
    def inserted_rows_count(self) -> int:
 | 
						|
        # For the persons table
 | 
						|
        return 6
 | 
						|
 | 
						|
    def view_column_lineage_count(self) -> int:
 | 
						|
        """view was created from `CREATE VIEW xyz AS (SELECT * FROM abc)`
 | 
						|
        which does not propagate column lineage
 | 
						|
        """
 | 
						|
        return None
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def fqn_created_table() -> str:
 | 
						|
        return "e2e_hive.default.e2e_cli_tests.persons_profile"
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def _fqn_deleted_table() -> str:
 | 
						|
        return "e2e_hive.default.e2e_cli_tests.view_persons"
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def get_profiler_time_partition() -> dict:
 | 
						|
        return {
 | 
						|
            "fullyQualifiedName": "e2e_hive.default.e2e_cli_tests.persons_profile",
 | 
						|
            "partitionConfig": {
 | 
						|
                "enablePartitioning": True,
 | 
						|
                "partitionColumnName": "birthdate",
 | 
						|
                "partitionIntervalType": "TIME-UNIT",
 | 
						|
                "partitionInterval": 50,
 | 
						|
                "partitionIntervalUnit": "YEAR",
 | 
						|
            },
 | 
						|
        }
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def get_includes_schemas() -> List[str]:
 | 
						|
        return ["e2e_cli_tests"]
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def get_includes_tables() -> List[str]:
 | 
						|
        return ["persons"]
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def get_excludes_tables() -> List[str]:
 | 
						|
        return ["my_table"]
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def expected_filtered_schema_includes() -> int:
 | 
						|
        return 1
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def expected_filtered_schema_excludes() -> int:
 | 
						|
        return 1
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def expected_filtered_table_includes() -> int:
 | 
						|
        return 1
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def expected_filtered_table_excludes() -> int:
 | 
						|
        return 2
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def expected_filtered_mix() -> int:
 | 
						|
        return 2
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def get_profiler_time_partition_results() -> dict:
 | 
						|
        return {
 | 
						|
            "table_profile": {
 | 
						|
                "columnCount": 3.0,
 | 
						|
                "rowCount": 6.0,
 | 
						|
            },
 | 
						|
            "column_profile": [
 | 
						|
                {
 | 
						|
                    "person_id": {
 | 
						|
                        "valuesCount": 6,
 | 
						|
                        "valuesPercentage": None,
 | 
						|
                        "validCount": None,
 | 
						|
                        "duplicateCount": None,
 | 
						|
                        "nullCount": 0,
 | 
						|
                        "nullProportion": 0,
 | 
						|
                        "missingPercentage": None,
 | 
						|
                        "missingCount": None,
 | 
						|
                        "uniqueCount": 6,
 | 
						|
                        "uniqueProportion": 1,
 | 
						|
                        "distinctCount": 6,
 | 
						|
                        "distinctProportion": 1,
 | 
						|
                        "min": 1,
 | 
						|
                        "max": 6,
 | 
						|
                        "minLength": None,
 | 
						|
                        "maxLength": None,
 | 
						|
                        "mean": 3.5,
 | 
						|
                        "sum": 21,
 | 
						|
                        "stddev": 1.707825127659933,
 | 
						|
                        "variance": None,
 | 
						|
                        "median": 3.5,
 | 
						|
                        "firstQuartile": 2.25,
 | 
						|
                        "thirdQuartile": 4.75,
 | 
						|
                        "interQuartileRange": 2.5,
 | 
						|
                        "nonParametricSkew": 0,
 | 
						|
                        "histogram": {
 | 
						|
                            "boundaries": ["1.00 to 3.75", "3.75 and up"],
 | 
						|
                            "frequencies": [3, 3],
 | 
						|
                        },
 | 
						|
                    }
 | 
						|
                }
 | 
						|
            ],
 | 
						|
        }
 |