2023-05-19 18:54:28 +05:30
|
|
|
# Copyright 2022 Collate
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
"""
|
|
|
|
Test Datalake connector with CLI
|
|
|
|
"""
|
2023-07-07 09:13:18 +05:30
|
|
|
import urllib.parse
|
|
|
|
from pathlib import Path
|
2023-05-19 18:54:28 +05:30
|
|
|
from typing import List
|
|
|
|
|
2023-07-07 09:13:18 +05:30
|
|
|
import pytest
|
|
|
|
|
2023-08-30 15:49:42 +02:00
|
|
|
from metadata.workflow.metadata import MetadataWorkflow
|
2023-07-07 09:13:18 +05:30
|
|
|
|
|
|
|
from .base.e2e_types import E2EType
|
|
|
|
from .base.test_cli import PATH_TO_RESOURCES
|
2023-05-19 18:54:28 +05:30
|
|
|
from .common.test_cli_db import CliCommonDB
|
|
|
|
|
|
|
|
|
2023-07-07 09:13:18 +05:30
|
|
|
class DatalakeCliTest(CliCommonDB.TestSuite):
|
|
|
|
@classmethod
|
|
|
|
def setUpClass(cls) -> None:
|
|
|
|
connector = cls.get_connector_name()
|
2023-08-30 15:49:42 +02:00
|
|
|
workflow: MetadataWorkflow = cls.get_workflow(
|
2023-07-07 09:13:18 +05:30
|
|
|
test_type=cls.get_test_type(), connector=connector
|
|
|
|
)
|
|
|
|
cls.openmetadata = workflow.source.metadata
|
|
|
|
cls.config_file_path = str(
|
|
|
|
Path(PATH_TO_RESOURCES + f"/database/{connector}/{connector}.yaml")
|
|
|
|
)
|
|
|
|
cls.test_file_path = str(
|
|
|
|
Path(PATH_TO_RESOURCES + f"/database/{connector}/test.yaml")
|
2023-05-19 18:54:28 +05:30
|
|
|
)
|
|
|
|
|
2023-07-07 09:13:18 +05:30
|
|
|
def tearDown(self) -> None:
|
|
|
|
pass
|
2023-05-19 18:54:28 +05:30
|
|
|
|
2023-07-07 09:13:18 +05:30
|
|
|
def create_table_and_view(self):
|
|
|
|
pass
|
2023-05-19 18:54:28 +05:30
|
|
|
|
2023-07-07 09:13:18 +05:30
|
|
|
def delete_table_and_view(self):
|
|
|
|
pass
|
2023-05-19 18:54:28 +05:30
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def get_connector_name() -> str:
|
2023-07-07 09:13:18 +05:30
|
|
|
return "datalake_s3"
|
2023-05-19 18:54:28 +05:30
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def expected_tables() -> int:
|
2023-07-07 09:13:18 +05:30
|
|
|
return 7
|
2023-05-19 18:54:28 +05:30
|
|
|
|
|
|
|
def inserted_rows_count(self) -> int:
|
2023-11-10 14:15:14 +05:30
|
|
|
return 50
|
2023-05-19 18:54:28 +05:30
|
|
|
|
|
|
|
def view_column_lineage_count(self) -> int:
|
2023-07-07 09:13:18 +05:30
|
|
|
pass
|
2023-05-19 18:54:28 +05:30
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def fqn_created_table() -> str:
|
2023-07-07 09:13:18 +05:30
|
|
|
return 'aws_datalake.default.aws-datalake-e2e."sales/sales.csv"'
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def fqn_deleted_table() -> None:
|
|
|
|
return None
|
2023-05-19 18:54:28 +05:30
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def get_includes_schemas() -> List[str]:
|
2023-07-07 09:13:18 +05:30
|
|
|
return ["aws-datalake-e2e"]
|
2023-05-19 18:54:28 +05:30
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def get_includes_tables() -> List[str]:
|
2023-07-07 09:13:18 +05:30
|
|
|
return [".*example.*"]
|
2023-05-19 18:54:28 +05:30
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def get_excludes_tables() -> List[str]:
|
2023-07-07 09:13:18 +05:30
|
|
|
return [".*test.*"]
|
2023-05-19 18:54:28 +05:30
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def expected_filtered_schema_includes() -> int:
|
|
|
|
return 0
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def expected_filtered_schema_excludes() -> int:
|
2024-06-21 11:42:35 +02:00
|
|
|
return 3
|
2023-05-19 18:54:28 +05:30
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def expected_filtered_table_includes() -> int:
|
2023-07-07 09:13:18 +05:30
|
|
|
return 7
|
2023-05-19 18:54:28 +05:30
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def expected_filtered_table_excludes() -> int:
|
2023-07-07 09:13:18 +05:30
|
|
|
return 1
|
2023-05-19 18:54:28 +05:30
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def expected_filtered_mix() -> int:
|
2023-07-07 09:13:18 +05:30
|
|
|
return 7
|
|
|
|
|
|
|
|
def retrieve_lineage(self, entity_fqn: str) -> dict:
|
|
|
|
return self.openmetadata.client.get(
|
|
|
|
f"/lineage/table/name/{urllib.parse.quote_plus(entity_fqn)}?upstreamDepth=3&downstreamDepth=3"
|
|
|
|
)
|
|
|
|
|
|
|
|
@pytest.mark.order(2)
|
|
|
|
def test_create_table_with_profiler(self) -> None:
|
|
|
|
# delete table in case it exists
|
|
|
|
self.delete_table_and_view()
|
|
|
|
# create a table and a view
|
|
|
|
self.create_table_and_view()
|
|
|
|
# build config file for ingest
|
|
|
|
self.build_config_file()
|
|
|
|
# run ingest with new tables
|
|
|
|
self.run_command()
|
|
|
|
# build config file for profiler
|
|
|
|
self.build_config_file(
|
|
|
|
E2EType.PROFILER,
|
|
|
|
# Otherwise the sampling here takes too long
|
|
|
|
extra_args={"profileSample": 5, "includes": self.get_includes_schemas()},
|
|
|
|
)
|
|
|
|
# run profiler with new tables
|
|
|
|
result = self.run_command("profile")
|
|
|
|
sink_status, source_status = self.retrieve_statuses(result)
|
|
|
|
self.assert_for_table_with_profiler(source_status, sink_status)
|