Imri Paran 25284e0232
MINOR: fix snowflake system metrics (#17989)
* fix snowflake system metrics

* format

* add link to logs and commit
fixed the dq cli test

* reverted bad formatting

* fixed models.py

* removed version pinning for data diff in tests
2024-09-26 11:55:17 +00:00

419 lines
16 KiB
Python

# Copyright 2022 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Test database connectors with CLI
"""
from abc import abstractmethod
from typing import List, Optional
from unittest import TestCase
import pytest
from pydantic import TypeAdapter
from _openmetadata_testutils.pydantic.test_utils import assert_equal_pydantic_objects
from metadata.data_quality.api.models import TestCaseDefinition
from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.tests.basic import TestCaseResult
from metadata.generated.schema.tests.testCase import TestCase as OMTestCase
from metadata.ingestion.api.status import Status
from .e2e_types import E2EType
from .test_cli import CliBase
class CliDBBase(TestCase):
"""
CLI DB Base class
"""
class TestSuite(TestCase, CliBase): # pylint: disable=too-many-public-methods
"""
TestSuite class to define test structure
"""
@pytest.mark.order(1)
def test_vanilla_ingestion(self) -> None:
"""1. Deploy vanilla ingestion"""
# build config file for ingest
self.build_config_file(E2EType.INGEST)
# run ingest with new tables
result = self.run_command()
sink_status, source_status = self.retrieve_statuses(result)
self.assert_for_vanilla_ingestion(source_status, sink_status)
@pytest.mark.order(2)
def test_create_table_with_profiler(self) -> None:
"""2. create a new table + deploy ingestion with views, sample data, and profiler.
We will perform the following steps:
1. delete table in case it exists
2. create a table and a view
3. build config file for ingest
4. run ingest with new tables `self.run_command()` defaults to `ingestion`
5. build config file for profiler
6. run profiler with new tables
"""
self.delete_table_and_view()
self.create_table_and_view()
self.build_config_file()
self.run_command()
self.build_config_file(
E2EType.PROFILER, {"includes": self.get_includes_schemas()}
)
result = self.run_command("profile")
sink_status, source_status = self.retrieve_statuses(result)
self.assert_for_table_with_profiler(source_status, sink_status)
@pytest.mark.order(3)
def test_delete_table_is_marked_as_deleted(self) -> None:
"""3. delete the new table + deploy marking tables as deleted
We will perform the following steps:
1. delete table created in previous test
2. build config file for ingest
3. run ingest `self.run_command()` defaults to `ingestion`
"""
self.delete_table_and_view()
self.build_config_file()
result = self.run_command()
sink_status, source_status = self.retrieve_statuses(result)
self.assert_for_delete_table_is_marked_as_deleted(
source_status, sink_status
)
@pytest.mark.order(4)
def test_schema_filter_includes(self) -> None:
"""4. vanilla ingestion + include schema filter pattern
We will perform the following steps:
1. build config file for ingest with filters
2. run ingest `self.run_command()` defaults to `ingestion`
"""
self.build_config_file(
E2EType.INGEST_DB_FILTER_SCHEMA,
{"includes": self.get_includes_schemas()},
)
result = self.run_command()
sink_status, source_status = self.retrieve_statuses(result)
self.assert_filtered_schemas_includes(source_status, sink_status)
@pytest.mark.order(5)
def test_schema_filter_excludes(self) -> None:
"""5. vanilla ingestion + exclude schema filter pattern
We will perform the following steps:
1. build config file for ingest with filters
2. run ingest `self.run_command()` defaults to `ingestion`
"""
self.build_config_file(
E2EType.INGEST_DB_FILTER_SCHEMA,
{"excludes": self.get_excludes_schemas()},
)
result = self.run_command()
sink_status, source_status = self.retrieve_statuses(result)
self.assert_filtered_schemas_excludes(source_status, sink_status)
@pytest.mark.order(6)
def test_table_filter_includes(self) -> None:
"""6. Vanilla ingestion + include table filter pattern
We will perform the following steps:
1. build config file for ingest with filters
2. run ingest `self.run_command()` defaults to `ingestion`
"""
self.build_config_file(
E2EType.INGEST_DB_FILTER_TABLE, {"includes": self.get_includes_tables()}
)
result = self.run_command()
sink_status, source_status = self.retrieve_statuses(result)
self.assert_filtered_tables_includes(source_status, sink_status)
@pytest.mark.order(7)
def test_table_filter_excludes(self) -> None:
"""7. Vanilla ingestion + exclude table filter pattern
We will perform the following steps:
1. build config file for ingest with filters
2. run ingest `self.run_command()` defaults to `ingestion`
"""
self.build_config_file(
E2EType.INGEST_DB_FILTER_TABLE, {"excludes": self.get_includes_tables()}
)
result = self.run_command()
sink_status, source_status = self.retrieve_statuses(result)
self.assert_filtered_tables_excludes(source_status, sink_status)
@pytest.mark.order(8)
def test_table_filter_mix(self) -> None:
"""8. Vanilla ingestion + include schema filter pattern + exclude table filter pattern
We will perform the following steps:
1. build config file for ingest with filters
2. run ingest `self.run_command()` defaults to `ingestion`
"""
self.build_config_file(
E2EType.INGEST_DB_FILTER_MIX,
{
"schema": {"includes": self.get_includes_schemas()},
"table": {
"includes": self.get_includes_tables(),
"excludes": self.get_excludes_tables(),
},
},
)
result = self.run_command()
sink_status, source_status = self.retrieve_statuses(result)
self.assert_filtered_mix(source_status, sink_status)
@pytest.mark.order(9)
def test_usage(self) -> None:
"""9. Run queries in the source (creates, inserts, views) and ingest metadata & Lineage
This test will need to be implemented on the database specific test classes
"""
@pytest.mark.order(10)
def test_lineage(self) -> None:
"""10. Run queries in the source (creates, inserts, views) and ingest metadata & Lineage
This test will need to be implemented on the database specific test classes
"""
@pytest.mark.order(11)
def test_profiler_with_time_partition(self) -> None:
"""11. Test time partitioning for the profiler"""
time_partition = self.get_profiler_time_partition()
if not time_partition:
pytest.skip("Profiler time partition not configured. Skipping test.")
if time_partition:
processor_config = self.get_profiler_processor_config(time_partition)
self.build_config_file(
E2EType.PROFILER_PROCESSOR,
{
"processor": processor_config,
"includes": self.get_includes_schemas(),
},
)
result = self.run_command("profile")
sink_status, source_status = self.retrieve_statuses(result)
self.assert_for_table_with_profiler_time_partition(
source_status,
sink_status,
)
@pytest.mark.order(12)
def test_data_quality(self) -> None:
"""12. Test data quality for the connector"""
if self.get_data_quality_table() is None:
return
self.delete_table_and_view()
self.create_table_and_view()
self.build_config_file()
self.run_command()
table: Table = self.openmetadata.get_by_name(
Table, self.get_data_quality_table(), nullable=False
)
test_case_definitions = self.get_test_case_definitions()
self.build_config_file(
E2EType.DATA_QUALITY,
{
"entity_fqn": table.fullyQualifiedName.root,
"test_case_definitions": TypeAdapter(
List[TestCaseDefinition]
).dump_python(test_case_definitions),
},
)
result = self.run_command("test")
try:
sink_status, source_status = self.retrieve_statuses(result)
self.assert_status_for_data_quality(source_status, sink_status)
test_case_entities = [
self.openmetadata.get_by_name(
OMTestCase,
".".join([table.fullyQualifiedName.root, tcd.name]),
fields=["*"],
nullable=False,
)
for tcd in test_case_definitions
]
expected = self.get_expected_test_case_results()
try:
for test_case, expected in zip(test_case_entities, expected):
assert_equal_pydantic_objects(
expected.model_copy(
update={"timestamp": test_case.testCaseResult.timestamp}
),
test_case.testCaseResult,
)
finally:
for tc in test_case_entities:
self.openmetadata.delete(
OMTestCase, tc.id, recursive=True, hard_delete=True
)
except AssertionError:
print(result)
raise
def retrieve_table(self, table_name_fqn: str) -> Table:
return self.openmetadata.get_by_name(entity=Table, fqn=table_name_fqn)
def retrieve_sample_data(self, table_name_fqn: str) -> Table:
table: Table = self.openmetadata.get_by_name(
entity=Table, fqn=table_name_fqn
)
return self.openmetadata.get_sample_data(table=table)
def retrieve_profile(self, table_fqn: str) -> Table:
table: Table = self.openmetadata.get_latest_table_profile(fqn=table_fqn)
return table
def retrieve_lineage(self, entity_fqn: str) -> dict:
return self.openmetadata.client.get(
f"/lineage/table/name/{entity_fqn}?upstreamDepth=3&downstreamDepth=3"
)
@staticmethod
@abstractmethod
def get_connector_name() -> str:
raise NotImplementedError()
@abstractmethod
def create_table_and_view(self) -> None:
raise NotImplementedError()
@abstractmethod
def delete_table_and_view(self) -> None:
raise NotImplementedError()
@abstractmethod
def assert_for_vanilla_ingestion(
self, source_status: Status, sink_status: Status
) -> None:
raise NotImplementedError()
@abstractmethod
def assert_for_table_with_profiler(
self, source_status: Status, sink_status: Status
):
raise NotImplementedError()
@abstractmethod
def assert_for_table_with_profiler_time_partition(
self, source_status: Status, sink_status: Status
):
raise NotImplementedError()
@abstractmethod
def assert_for_delete_table_is_marked_as_deleted(
self, source_status: Status, sink_status: Status
):
raise NotImplementedError()
@abstractmethod
def assert_filtered_schemas_includes(
self, source_status: Status, sink_status: Status
):
raise NotImplementedError()
@abstractmethod
def assert_filtered_schemas_excludes(
self, source_status: Status, sink_status: Status
):
raise NotImplementedError()
@abstractmethod
def assert_filtered_tables_includes(
self, source_status: Status, sink_status: Status
):
raise NotImplementedError()
@abstractmethod
def assert_filtered_tables_excludes(
self, source_status: Status, sink_status: Status
):
raise NotImplementedError()
@abstractmethod
def assert_filtered_mix(self, source_status: Status, sink_status: Status):
raise NotImplementedError()
@staticmethod
@abstractmethod
def get_includes_schemas() -> List[str]:
raise NotImplementedError()
@classmethod
def get_excludes_schemas(cls) -> List[str]:
return cls.get_includes_schemas()
@staticmethod
@abstractmethod
def get_includes_tables() -> List[str]:
raise NotImplementedError()
@staticmethod
@abstractmethod
def get_excludes_tables() -> List[str]:
raise NotImplementedError()
@staticmethod
def get_profiler_time_partition() -> Optional[dict]:
return None
@staticmethod
def get_profiler_time_partition_results() -> Optional[dict]:
return None
@staticmethod
def delete_queries() -> Optional[List[str]]:
return None
@staticmethod
def update_queries() -> Optional[List[str]]:
return None
@staticmethod
def delete_table_rows() -> None:
return None
@staticmethod
def update_table_row() -> None:
return None
@staticmethod
def get_test_type() -> str:
return "database"
def get_profiler_processor_config(self, config: dict) -> dict:
return {
"processor": {
"type": "orm-profiler",
"config": {"tableConfig": [config]},
}
}
def get_data_quality_table(self):
return None
def get_test_case_definitions(self) -> List[TestCaseDefinition]:
pass
def get_expected_test_case_results(self) -> List[TestCaseResult]:
pass
def assert_status_for_data_quality(self, source_status, sink_status):
pass