mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-07-14 20:47:41 +00:00
391 lines
14 KiB
Python
391 lines
14 KiB
Python
# Copyright 2022 Collate
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
Test database connectors with CLI
|
|
"""
|
|
import os
|
|
import re
|
|
from abc import abstractmethod
|
|
from contextlib import redirect_stdout
|
|
from enum import Enum
|
|
from io import StringIO
|
|
from pathlib import Path
|
|
from typing import List
|
|
from unittest import TestCase
|
|
|
|
import pytest
|
|
import yaml
|
|
|
|
from metadata.cmd import metadata
|
|
from metadata.config.common import load_config_file
|
|
from metadata.generated.schema.entity.data.table import Table
|
|
from metadata.ingestion.api.sink import SinkStatus
|
|
from metadata.ingestion.api.source import SourceStatus
|
|
from metadata.ingestion.api.workflow import Workflow
|
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
from metadata.utils.constants import UTF_8
|
|
|
|
PATH_TO_RESOURCES = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
|
|
class E2EType(Enum):
|
|
"""
|
|
E2E Type Enum Class
|
|
"""
|
|
|
|
INGEST = "ingest"
|
|
PROFILER = "profiler"
|
|
INGEST_FILTER_SCHEMA = "ingest-filter-schema"
|
|
INGEST_FILTER_TABLE = "ingest-filter-table"
|
|
INGEST_FILTER_MIX = "ingest-filter-mix"
|
|
|
|
|
|
class CliDBBase(TestCase):
|
|
"""
|
|
CLI DB Base class
|
|
"""
|
|
|
|
class TestSuite(TestCase): # pylint: disable=too-many-public-methods
|
|
"""
|
|
TestSuite class to define test structure
|
|
"""
|
|
|
|
catcher = StringIO()
|
|
openmetadata: OpenMetadata
|
|
test_file_path: str
|
|
config_file_path: str
|
|
|
|
# 1. deploy vanilla ingestion
|
|
@pytest.mark.order(1)
|
|
def test_vanilla_ingestion(self) -> None:
|
|
# build config file for ingest
|
|
self.build_config_file(E2EType.INGEST)
|
|
# run ingest with new tables
|
|
self.run_command()
|
|
result = self.catcher.getvalue()
|
|
self.catcher.truncate(0)
|
|
sink_status, source_status = self.retrieve_statuses(result)
|
|
self.assert_for_vanilla_ingestion(source_status, sink_status)
|
|
|
|
# 2. create a new table + deploy ingestion with views, sample data, and profiler
|
|
@pytest.mark.order(2)
|
|
def test_create_table_with_profiler(self) -> None:
|
|
# delete table in case it exists
|
|
self.delete_table_and_view()
|
|
# create a table and a view
|
|
self.create_table_and_view()
|
|
# build config file for ingest
|
|
self.build_config_file()
|
|
# run ingest with new tables
|
|
self.run_command()
|
|
self.catcher.truncate(0)
|
|
# build config file for profiler
|
|
self.build_config_file(E2EType.PROFILER)
|
|
# run profiler with new tables
|
|
self.run_command("profile")
|
|
result = self.catcher.getvalue()
|
|
self.catcher.truncate(0)
|
|
|
|
sink_status, source_status = self.retrieve_statuses(result)
|
|
self.assert_for_table_with_profiler(source_status, sink_status)
|
|
|
|
# 3. delete the new table + deploy marking tables as deleted
|
|
@pytest.mark.order(3)
|
|
def test_delete_table_is_marked_as_deleted(self) -> None:
|
|
# delete table created in previous test
|
|
self.delete_table_and_view()
|
|
# build config file for ingest
|
|
self.build_config_file()
|
|
# run ingest
|
|
self.run_command()
|
|
result = self.catcher.getvalue()
|
|
self.catcher.truncate(0)
|
|
|
|
sink_status, source_status = self.retrieve_statuses(result)
|
|
self.assert_for_delete_table_is_marked_as_deleted(
|
|
source_status, sink_status
|
|
)
|
|
|
|
# 4. vanilla ingestion + include schema filter pattern
|
|
@pytest.mark.order(4)
|
|
def test_schema_filter_includes(self) -> None:
|
|
# build config file for ingest with filters
|
|
self.build_config_file(
|
|
E2EType.INGEST_FILTER_SCHEMA, {"includes": self.get_includes_schemas()}
|
|
)
|
|
# run ingest
|
|
self.run_command()
|
|
result = self.catcher.getvalue()
|
|
self.catcher.truncate(0)
|
|
|
|
sink_status, source_status = self.retrieve_statuses(result)
|
|
self.assert_filtered_schemas_includes(source_status, sink_status)
|
|
|
|
# 5. vanilla ingestion + exclude schema filter pattern
|
|
@pytest.mark.order(5)
|
|
def test_schema_filter_excludes(self) -> None:
|
|
# build config file for ingest with filters
|
|
self.build_config_file(
|
|
E2EType.INGEST_FILTER_SCHEMA, {"excludes": self.get_includes_schemas()}
|
|
)
|
|
# run ingest
|
|
self.run_command()
|
|
result = self.catcher.getvalue()
|
|
self.catcher.truncate(0)
|
|
sink_status, source_status = self.retrieve_statuses(result)
|
|
self.assert_filtered_schemas_excludes(source_status, sink_status)
|
|
|
|
# 6. Vanilla ingestion + include table filter pattern
|
|
@pytest.mark.order(6)
|
|
def test_table_filter_includes(self) -> None:
|
|
# build config file for ingest with filters
|
|
self.build_config_file(
|
|
E2EType.INGEST_FILTER_TABLE, {"includes": self.get_includes_tables()}
|
|
)
|
|
# run ingest
|
|
self.run_command()
|
|
result = self.catcher.getvalue()
|
|
self.catcher.truncate(0)
|
|
|
|
sink_status, source_status = self.retrieve_statuses(result)
|
|
self.assert_filtered_tables_includes(source_status, sink_status)
|
|
|
|
# 7. Vanilla ingestion + include table filter pattern
|
|
@pytest.mark.order(7)
|
|
def test_table_filter_excludes(self) -> None:
|
|
# build config file for ingest with filters
|
|
self.build_config_file(
|
|
E2EType.INGEST_FILTER_TABLE, {"excludes": self.get_includes_tables()}
|
|
)
|
|
# run ingest
|
|
self.run_command()
|
|
result = self.catcher.getvalue()
|
|
self.catcher.truncate(0)
|
|
sink_status, source_status = self.retrieve_statuses(result)
|
|
self.assert_filtered_tables_excludes(source_status, sink_status)
|
|
|
|
# 8. Vanilla ingestion mixing filters
|
|
@pytest.mark.order(8)
|
|
def test_table_filter_mix(self) -> None:
|
|
# build config file for ingest with filters
|
|
self.build_config_file(
|
|
E2EType.INGEST_FILTER_MIX,
|
|
{
|
|
"schema": {"includes": self.get_includes_schemas()},
|
|
"table": {
|
|
"includes": self.get_includes_tables(),
|
|
"excludes": self.get_excludes_tables(),
|
|
},
|
|
},
|
|
)
|
|
# run ingest
|
|
self.run_command()
|
|
result = self.catcher.getvalue()
|
|
self.catcher.truncate(0)
|
|
sink_status, source_status = self.retrieve_statuses(result)
|
|
self.assert_filtered_mix(source_status, sink_status)
|
|
|
|
# 9. Run usage
|
|
@pytest.mark.order(9)
|
|
def test_usage(self) -> None:
|
|
# to be implemented
|
|
pass
|
|
|
|
# 10. Run queries in the source (creates, inserts, views) and ingest metadata & Lineage
|
|
@pytest.mark.order(10)
|
|
def test_lineage(self) -> None:
|
|
# to be implemented
|
|
pass
|
|
|
|
def run_command(self, command: str = "ingest"):
|
|
args = [
|
|
command,
|
|
"-c",
|
|
self.test_file_path,
|
|
]
|
|
with redirect_stdout(self.catcher):
|
|
with self.assertRaises(SystemExit):
|
|
metadata(args)
|
|
|
|
def build_config_file(
|
|
self, test_type: E2EType = E2EType.INGEST, extra_args: dict = None
|
|
) -> None:
|
|
with open(self.config_file_path, encoding=UTF_8) as config_file:
|
|
config_yaml = yaml.safe_load(config_file)
|
|
config_yaml = self.build_yaml(config_yaml, test_type, extra_args)
|
|
with open(self.test_file_path, "w", encoding=UTF_8) as test_file:
|
|
yaml.dump(config_yaml, test_file)
|
|
|
|
def retrieve_statuses(self, result):
|
|
source_status: SourceStatus = self.extract_source_status(result)
|
|
sink_status: SinkStatus = self.extract_sink_status(result)
|
|
return sink_status, source_status
|
|
|
|
def retrieve_table(self, table_name_fqn: str) -> Table:
|
|
return self.openmetadata.get_by_name(entity=Table, fqn=table_name_fqn)
|
|
|
|
def retrieve_sample_data(self, table_name_fqn: str) -> Table:
|
|
table: Table = self.openmetadata.get_by_name(
|
|
entity=Table, fqn=table_name_fqn
|
|
)
|
|
return self.openmetadata.get_sample_data(table=table)
|
|
|
|
def retrieve_lineage(self, table_name_fqn: str) -> dict:
|
|
return self.openmetadata.client.get(
|
|
f"/lineage/table/name/{table_name_fqn}?upstreamDepth=3&downstreamDepth=3"
|
|
)
|
|
|
|
@staticmethod
|
|
def get_workflow(connector: str) -> Workflow:
|
|
config_file = Path(
|
|
PATH_TO_RESOURCES + f"/database/{connector}/{connector}.yaml"
|
|
)
|
|
config_dict = load_config_file(config_file)
|
|
return Workflow.create(config_dict)
|
|
|
|
@staticmethod
|
|
def extract_source_status(output) -> SourceStatus:
|
|
output_clean = output.replace("\n", " ")
|
|
output_clean = re.sub(" +", " ", output_clean)
|
|
output_clean_ansi = re.compile(r"\x1b[^m]*m")
|
|
output_clean = output_clean_ansi.sub(" ", output_clean)
|
|
if re.match(".* Processor Status: .*", output_clean):
|
|
output_clean = re.findall(
|
|
"Source Status: (.*?) Processor Status: .*", output_clean.strip()
|
|
)
|
|
else:
|
|
output_clean = re.findall(
|
|
"Source Status: (.*?) Sink Status: .*", output_clean.strip()
|
|
)
|
|
return SourceStatus.parse_obj(
|
|
eval(output_clean[0].strip()) # pylint: disable=eval-used
|
|
)
|
|
|
|
@staticmethod
|
|
def extract_sink_status(output) -> SinkStatus:
|
|
output_clean = output.replace("\n", " ")
|
|
output_clean = re.sub(" +", " ", output_clean)
|
|
output_clean_ansi = re.compile(r"\x1b[^m]*m")
|
|
output_clean = output_clean_ansi.sub("", output_clean)
|
|
output_clean = re.findall(
|
|
".* Sink Status: (.*?) Workflow finished.*", output_clean.strip()
|
|
)[0].strip()
|
|
return SinkStatus.parse_obj(eval(output_clean)) # pylint: disable=eval-used
|
|
|
|
@staticmethod
|
|
@abstractmethod
|
|
def get_connector_name() -> str:
|
|
raise NotImplementedError()
|
|
|
|
@abstractmethod
|
|
def create_table_and_view(self) -> None:
|
|
raise NotImplementedError()
|
|
|
|
@abstractmethod
|
|
def delete_table_and_view(self) -> None:
|
|
raise NotImplementedError()
|
|
|
|
@abstractmethod
|
|
def assert_for_vanilla_ingestion(
|
|
self, source_status: SourceStatus, sink_status: SinkStatus
|
|
) -> None:
|
|
raise NotImplementedError()
|
|
|
|
@abstractmethod
|
|
def assert_for_table_with_profiler(
|
|
self, source_status: SourceStatus, sink_status: SinkStatus
|
|
):
|
|
raise NotImplementedError()
|
|
|
|
@abstractmethod
|
|
def assert_for_delete_table_is_marked_as_deleted(
|
|
self, source_status: SourceStatus, sink_status: SinkStatus
|
|
):
|
|
raise NotImplementedError()
|
|
|
|
@abstractmethod
|
|
def assert_filtered_schemas_includes(
|
|
self, source_status: SourceStatus, sink_status: SinkStatus
|
|
):
|
|
raise NotImplementedError()
|
|
|
|
@abstractmethod
|
|
def assert_filtered_schemas_excludes(
|
|
self, source_status: SourceStatus, sink_status: SinkStatus
|
|
):
|
|
raise NotImplementedError()
|
|
|
|
@abstractmethod
|
|
def assert_filtered_tables_includes(
|
|
self, source_status: SourceStatus, sink_status: SinkStatus
|
|
):
|
|
raise NotImplementedError()
|
|
|
|
@abstractmethod
|
|
def assert_filtered_tables_excludes(
|
|
self, source_status: SourceStatus, sink_status: SinkStatus
|
|
):
|
|
raise NotImplementedError()
|
|
|
|
@abstractmethod
|
|
def assert_filtered_mix(
|
|
self, source_status: SourceStatus, sink_status: SinkStatus
|
|
):
|
|
raise NotImplementedError()
|
|
|
|
@staticmethod
|
|
@abstractmethod
|
|
def get_includes_schemas() -> List[str]:
|
|
raise NotImplementedError()
|
|
|
|
@staticmethod
|
|
@abstractmethod
|
|
def get_includes_tables() -> List[str]:
|
|
raise NotImplementedError()
|
|
|
|
@staticmethod
|
|
@abstractmethod
|
|
def get_excludes_tables() -> List[str]:
|
|
raise NotImplementedError()
|
|
|
|
@staticmethod
|
|
def build_yaml(config_yaml: dict, test_type: E2EType, extra_args: dict):
|
|
"""
|
|
Build yaml as per E2EType
|
|
"""
|
|
if test_type == E2EType.PROFILER:
|
|
del config_yaml["source"]["sourceConfig"]["config"]
|
|
config_yaml["source"]["sourceConfig"] = {
|
|
"config": {
|
|
"type": "Profiler",
|
|
"generateSampleData": True,
|
|
"profileSample": 1,
|
|
}
|
|
}
|
|
config_yaml["processor"] = {"type": "orm-profiler", "config": {}}
|
|
if test_type == E2EType.INGEST_FILTER_SCHEMA:
|
|
config_yaml["source"]["sourceConfig"]["config"][
|
|
"schemaFilterPattern"
|
|
] = extra_args
|
|
if test_type == E2EType.INGEST_FILTER_TABLE:
|
|
config_yaml["source"]["sourceConfig"]["config"][
|
|
"tableFilterPattern"
|
|
] = extra_args
|
|
if test_type == E2EType.INGEST_FILTER_MIX:
|
|
config_yaml["source"]["sourceConfig"]["config"][
|
|
"schemaFilterPattern"
|
|
] = extra_args["schema"]
|
|
config_yaml["source"]["sourceConfig"]["config"][
|
|
"tableFilterPattern"
|
|
] = extra_args["table"]
|
|
return config_yaml
|