From 4cad5762adab5da05eb51f09e4878e79a58c163b Mon Sep 17 00:00:00 2001 From: Akash Verma <138790903+akashverma0786@users.noreply.github.com> Date: Thu, 9 Jan 2025 14:59:49 +0530 Subject: [PATCH] Added lineage in cli e2e (#19216) --- .../cli_e2e/base/config_builders/builders.py | 26 +++++++++++++++++++ ingestion/tests/cli_e2e/base/e2e_types.py | 1 + ingestion/tests/cli_e2e/base/test_cli_db.py | 20 ++++++++++++++ ingestion/tests/cli_e2e/common/test_cli_db.py | 21 +++++++++++++++ ingestion/tests/cli_e2e/test_cli_athena.py | 7 +++++ ingestion/tests/cli_e2e/test_cli_bigquery.py | 3 +++ .../test_cli_bigquery_multiple_project.py | 3 +++ .../tests/cli_e2e/test_cli_datalake_s3.py | 7 +++++ .../tests/cli_e2e/test_cli_dbt_redshift.py | 5 ++++ ingestion/tests/cli_e2e/test_cli_hive.py | 5 +++- ingestion/tests/cli_e2e/test_cli_metabase.py | 6 +++++ ingestion/tests/cli_e2e/test_cli_mssql.py | 3 +++ ingestion/tests/cli_e2e/test_cli_mysql.py | 3 +++ ingestion/tests/cli_e2e/test_cli_oracle.py | 3 +++ ingestion/tests/cli_e2e/test_cli_postgres.py | 5 +++- ingestion/tests/cli_e2e/test_cli_powerbi.py | 6 +++++ .../tests/cli_e2e/test_cli_quicksight.py | 6 +++++ ingestion/tests/cli_e2e/test_cli_redash.py | 6 +++++ ingestion/tests/cli_e2e/test_cli_redshift.py | 3 +++ ingestion/tests/cli_e2e/test_cli_snowflake.py | 3 +++ ingestion/tests/cli_e2e/test_cli_tableau.py | 6 +++++ ingestion/tests/cli_e2e/test_cli_vertica.py | 3 +++ 22 files changed, 149 insertions(+), 2 deletions(-) diff --git a/ingestion/tests/cli_e2e/base/config_builders/builders.py b/ingestion/tests/cli_e2e/base/config_builders/builders.py index f9b1a9d0ccc..3368fcb6ed0 100644 --- a/ingestion/tests/cli_e2e/base/config_builders/builders.py +++ b/ingestion/tests/cli_e2e/base/config_builders/builders.py @@ -72,6 +72,31 @@ class ProfilerConfigBuilder(BaseBuilder): return self.config +class LineageConfigBuilder(BaseBuilder): + """Builder class for the Lineage config""" + + # pylint: disable=invalid-name + def __init__(self, config: dict, config_args: dict) -> None: + super().__init__(config, config_args) + self.resultLimit = self.config_args.get("resultLimit", 1000) + self.queryLogDuration = self.config_args.get("queryLogDuration", 1) + + # pylint: enable=invalid-name + def build(self) -> dict: + """build lineage config""" + self.config["source"]["type"] = self.config_args["source"] + self.config["source"]["sourceConfig"] = { + "config": { + "type": "DatabaseLineage", + "queryLogDuration": 1, + "resultLimit": 10000, + "processQueryLineage": True, + "processStoredProcedureLineage": True, + } + } + return self.config + + class AutoClassificationConfigBuilder(BaseBuilder): """Builder class for the AutoClassification config""" @@ -206,6 +231,7 @@ def builder_factory(builder, config: dict, config_args: dict): """Factory method to return the builder class""" builder_classes = { E2EType.PROFILER.value: ProfilerConfigBuilder, + E2EType.LINEAGE.value: LineageConfigBuilder, E2EType.DATA_QUALITY.value: DataQualityConfigBuilder, E2EType.INGEST_DB_FILTER_SCHEMA.value: SchemaConfigBuilder, E2EType.INGEST_DB_FILTER_TABLE.value: TableConfigBuilder, diff --git a/ingestion/tests/cli_e2e/base/e2e_types.py b/ingestion/tests/cli_e2e/base/e2e_types.py index 00d4b9352f5..14eb0a1b855 100644 --- a/ingestion/tests/cli_e2e/base/e2e_types.py +++ b/ingestion/tests/cli_e2e/base/e2e_types.py @@ -23,6 +23,7 @@ class E2EType(Enum): INGEST = "ingest" PROFILER = "profiler" + LINEAGE = "lineage" PROFILER_PROCESSOR = "profiler-processor" AUTO_CLASSIFICATION = "auto-classification" DATA_QUALITY = "test" diff --git a/ingestion/tests/cli_e2e/base/test_cli_db.py b/ingestion/tests/cli_e2e/base/test_cli_db.py index b2e29e7b8f8..1e8ef1e1636 100644 --- a/ingestion/tests/cli_e2e/base/test_cli_db.py +++ b/ingestion/tests/cli_e2e/base/test_cli_db.py @@ -211,6 +211,20 @@ class CliDBBase(TestCase): This test will need to be implemented on the database specific test classes """ + self.delete_table_and_view() + self.create_table_and_view() + self.build_config_file( + E2EType.INGEST_DB_FILTER_SCHEMA, + {"includes": self.get_includes_schemas()}, + ) + self.run_command() + self.build_config_file( + E2EType.LINEAGE, + {"source": f"{self.get_connector_name()}-lineage"}, + ) + result = self.run_command() + sink_status, source_status = self.retrieve_statuses(result) + self.assert_for_test_lineage(source_status, sink_status) @pytest.mark.order(12) def test_profiler_with_time_partition(self) -> None: @@ -326,6 +340,12 @@ class CliDBBase(TestCase): ) -> None: raise NotImplementedError() + @abstractmethod + def assert_for_test_lineage( + self, source_status: Status, sink_status: Status + ) -> None: + raise NotImplementedError() + @abstractmethod def assert_for_table_with_profiler( self, source_status: Status, sink_status: Status diff --git a/ingestion/tests/cli_e2e/common/test_cli_db.py b/ingestion/tests/cli_e2e/common/test_cli_db.py index 55fe0bf25c4..b0171d5b90a 100644 --- a/ingestion/tests/cli_e2e/common/test_cli_db.py +++ b/ingestion/tests/cli_e2e/common/test_cli_db.py @@ -100,6 +100,23 @@ class CliCommonDB: # of https://github.com/open-metadata/OpenMetadata/pull/18558 # we need to introduce Lineage E2E base and add view lineage check there. + def assert_for_test_lineage(self, source_status: Status, sink_status: Status): + self.assertEqual(len(source_status.failures), 0) + self.assertEqual(len(source_status.warnings), 0) + self.assertEqual(len(sink_status.failures), 0) + self.assertEqual(len(sink_status.warnings), 0) + self.assertGreaterEqual(len(sink_status.records), 1) + lineage_data = self.retrieve_lineage(self.fqn_created_table()) + retrieved_view_column_lineage_count = len( + lineage_data["downstreamEdges"][0]["lineageDetails"]["columnsLineage"] + ) + self.assertEqual( + retrieved_view_column_lineage_count, self.view_column_lineage_count() + ) + + retrieved_lineage_node = lineage_data["nodes"][0]["fullyQualifiedName"] + self.assertEqual(retrieved_lineage_node, self.expected_lineage_node()) + def assert_auto_classification_sample_data( self, source_status: Status, sink_status: Status ): @@ -206,6 +223,10 @@ class CliCommonDB: def view_column_lineage_count(self) -> int: raise NotImplementedError() + @abstractmethod + def expected_lineage_node(self) -> str: + raise NotImplementedError() + @staticmethod @abstractmethod def fqn_created_table() -> str: diff --git a/ingestion/tests/cli_e2e/test_cli_athena.py b/ingestion/tests/cli_e2e/test_cli_athena.py index c4a495a7504..c9608799e2e 100644 --- a/ingestion/tests/cli_e2e/test_cli_athena.py +++ b/ingestion/tests/cli_e2e/test_cli_athena.py @@ -66,6 +66,9 @@ class AthenaCliTest(CliCommonDB.TestSuite): def view_column_lineage_count(self) -> int: pass + def expected_lineage_node(self) -> str: + pass + @staticmethod def fqn_created_table() -> str: return "e2e_athena.database_name.e2e_db.customers" @@ -130,6 +133,10 @@ class AthenaCliTest(CliCommonDB.TestSuite): sink_status, source_status = self.retrieve_statuses(result) self.assert_for_table_with_profiler(source_status, sink_status) + @pytest.mark.order(11) + def test_lineage(self) -> None: + pytest.skip("Lineage not configured. Skipping Test") + def assert_for_vanilla_ingestion( self, source_status: Status, sink_status: Status ) -> None: diff --git a/ingestion/tests/cli_e2e/test_cli_bigquery.py b/ingestion/tests/cli_e2e/test_cli_bigquery.py index 23bad416a5a..59832af872f 100644 --- a/ingestion/tests/cli_e2e/test_cli_bigquery.py +++ b/ingestion/tests/cli_e2e/test_cli_bigquery.py @@ -98,6 +98,9 @@ class BigqueryCliTest(CliCommonDB.TestSuite, SQACommonMethods): def view_column_lineage_count(self) -> int: return 2 + def expected_lineage_node(self) -> str: + return "local_bigquery.open-metadata-beta.exclude_me.view_orders" + @staticmethod def _expected_profiled_tables() -> int: return 2 diff --git a/ingestion/tests/cli_e2e/test_cli_bigquery_multiple_project.py b/ingestion/tests/cli_e2e/test_cli_bigquery_multiple_project.py index c0946df01c4..0ad52d395f3 100644 --- a/ingestion/tests/cli_e2e/test_cli_bigquery_multiple_project.py +++ b/ingestion/tests/cli_e2e/test_cli_bigquery_multiple_project.py @@ -74,6 +74,9 @@ class BigqueryCliTest(CliCommonDB.TestSuite, SQACommonMethods): def view_column_lineage_count(self) -> int: return 2 + def expected_lineage_node(self) -> str: + return "local_bigquery_multiple.modified-leaf-330420.do_not_touch.view_orders" + @staticmethod def _expected_profiled_tables() -> int: return 2 diff --git a/ingestion/tests/cli_e2e/test_cli_datalake_s3.py b/ingestion/tests/cli_e2e/test_cli_datalake_s3.py index 12bb495ac20..b79c14c80fd 100644 --- a/ingestion/tests/cli_e2e/test_cli_datalake_s3.py +++ b/ingestion/tests/cli_e2e/test_cli_datalake_s3.py @@ -63,6 +63,9 @@ class DatalakeCliTest(CliCommonDB.TestSuite): def view_column_lineage_count(self) -> int: pass + def expected_lineage_node(self) -> str: + pass + @staticmethod def fqn_created_table() -> str: return 'aws_datalake.default.aws-datalake-e2e."sales/sales.csv"' @@ -128,3 +131,7 @@ class DatalakeCliTest(CliCommonDB.TestSuite): result = self.run_command("profile") sink_status, source_status = self.retrieve_statuses(result) self.assert_for_table_with_profiler(source_status, sink_status) + + @pytest.mark.order(11) + def test_lineage(self) -> None: + pytest.skip("Lineage not configured. Skipping Test") diff --git a/ingestion/tests/cli_e2e/test_cli_dbt_redshift.py b/ingestion/tests/cli_e2e/test_cli_dbt_redshift.py index d3772eb0c33..09ac816b7bb 100644 --- a/ingestion/tests/cli_e2e/test_cli_dbt_redshift.py +++ b/ingestion/tests/cli_e2e/test_cli_dbt_redshift.py @@ -15,6 +15,7 @@ Test Redshift connector with CLI from pathlib import Path from typing import List +import pytest from sqlalchemy.engine import Engine from metadata.ingestion.api.status import Status @@ -62,6 +63,10 @@ class DbtCliTest(CliDBTBase.TestSuite): "local_redshift.dev.dbt_cli_e2e.orders", ] + @pytest.mark.order(11) + def test_lineage(self) -> None: + pytest.skip("Lineage not configured. Skipping Test") + def assert_for_vanilla_ingestion( self, source_status: Status, sink_status: Status ) -> None: diff --git a/ingestion/tests/cli_e2e/test_cli_hive.py b/ingestion/tests/cli_e2e/test_cli_hive.py index 6c28e10229b..a0f0518b0dc 100644 --- a/ingestion/tests/cli_e2e/test_cli_hive.py +++ b/ingestion/tests/cli_e2e/test_cli_hive.py @@ -107,7 +107,10 @@ class HiveCliTest(CliCommonDB.TestSuite, SQACommonMethods): """view was created from `CREATE VIEW xyz AS (SELECT * FROM abc)` which does not propagate column lineage """ - return None + return 3 + + def expected_lineage_node(self) -> str: + return "e2e_hive.default.e2e_cli_tests.view_persons" @staticmethod def fqn_created_table() -> str: diff --git a/ingestion/tests/cli_e2e/test_cli_metabase.py b/ingestion/tests/cli_e2e/test_cli_metabase.py index 1da93bae5bf..b9dbdb7fd8b 100644 --- a/ingestion/tests/cli_e2e/test_cli_metabase.py +++ b/ingestion/tests/cli_e2e/test_cli_metabase.py @@ -15,6 +15,8 @@ Test Metabase connector with CLI from pathlib import Path from typing import List +import pytest + from .base.test_cli import PATH_TO_RESOURCES from .common.test_cli_dashboard import CliCommonDashboard @@ -77,3 +79,7 @@ class MetabaseCliTest(CliCommonDashboard.TestSuite): def expected_dashboards_and_charts_after_patch(self) -> int: return 0 + + @pytest.mark.order(11) + def test_lineage(self) -> None: + pytest.skip("Lineage not configured. Skipping Test") diff --git a/ingestion/tests/cli_e2e/test_cli_mssql.py b/ingestion/tests/cli_e2e/test_cli_mssql.py index 8450ba21506..1a3bdab09dd 100644 --- a/ingestion/tests/cli_e2e/test_cli_mssql.py +++ b/ingestion/tests/cli_e2e/test_cli_mssql.py @@ -84,6 +84,9 @@ class MSSQLCliTest(CliCommonDB.TestSuite, SQACommonMethods): def view_column_lineage_count(self) -> int: return 4 + def expected_lineage_node(self) -> str: + return "mssql.e2e_cli_tests.dbo.view_persons" + @staticmethod def fqn_created_table() -> str: return "mssql.e2e_cli_tests.dbo.persons" diff --git a/ingestion/tests/cli_e2e/test_cli_mysql.py b/ingestion/tests/cli_e2e/test_cli_mysql.py index bc1325f8701..57689f532d5 100644 --- a/ingestion/tests/cli_e2e/test_cli_mysql.py +++ b/ingestion/tests/cli_e2e/test_cli_mysql.py @@ -91,6 +91,9 @@ class MysqlCliTest(CliCommonDB.TestSuite, SQACommonMethods): def view_column_lineage_count(self) -> int: return 22 + def expected_lineage_node(self) -> str: + return "local_mysql.default.openmetadata_db.view_persons" + @staticmethod def fqn_created_table() -> str: return "local_mysql.default.openmetadata_db.persons" diff --git a/ingestion/tests/cli_e2e/test_cli_oracle.py b/ingestion/tests/cli_e2e/test_cli_oracle.py index 7eafd8e0a3f..6edbb45cb13 100644 --- a/ingestion/tests/cli_e2e/test_cli_oracle.py +++ b/ingestion/tests/cli_e2e/test_cli_oracle.py @@ -101,6 +101,9 @@ SELECT * from names """ return 12 + def expected_lineage_node(self) -> str: + return "e2e_oracle.default.admin.admin_emp_view" + @staticmethod def fqn_created_table() -> str: return "e2e_oracle.default.admin.ADMIN_EMP" diff --git a/ingestion/tests/cli_e2e/test_cli_postgres.py b/ingestion/tests/cli_e2e/test_cli_postgres.py index 5698e547320..6ade13ca046 100644 --- a/ingestion/tests/cli_e2e/test_cli_postgres.py +++ b/ingestion/tests/cli_e2e/test_cli_postgres.py @@ -106,7 +106,10 @@ class PostgresCliTest(CliCommonDB.TestSuite, SQACommonMethods): return len(self.insert_data_queries) def view_column_lineage_count(self) -> int: - return None + return 22 + + def expected_lineage_node(self) -> str: + return "local_postgres.E2EDB.public.view_all_datatypes" @staticmethod def fqn_created_table() -> str: diff --git a/ingestion/tests/cli_e2e/test_cli_powerbi.py b/ingestion/tests/cli_e2e/test_cli_powerbi.py index 9b1e1476994..2409c1adf1f 100644 --- a/ingestion/tests/cli_e2e/test_cli_powerbi.py +++ b/ingestion/tests/cli_e2e/test_cli_powerbi.py @@ -15,6 +15,8 @@ Test PowerBI connector with CLI from pathlib import Path from typing import List +import pytest + from .base.test_cli import PATH_TO_RESOURCES from .common.test_cli_dashboard import CliCommonDashboard @@ -75,3 +77,7 @@ class PowerBICliTest(CliCommonDashboard.TestSuite): def expected_dashboards_and_charts_after_patch(self) -> int: return 0 + + @pytest.mark.order(11) + def test_lineage(self) -> None: + pytest.skip("Lineage not configured. Skipping Test") diff --git a/ingestion/tests/cli_e2e/test_cli_quicksight.py b/ingestion/tests/cli_e2e/test_cli_quicksight.py index ef1495e6f38..1d09b7da4a6 100644 --- a/ingestion/tests/cli_e2e/test_cli_quicksight.py +++ b/ingestion/tests/cli_e2e/test_cli_quicksight.py @@ -14,6 +14,8 @@ Test Quicksight connector with CLI """ from typing import List +import pytest + from metadata.ingestion.api.status import Status from .common.test_cli_dashboard import CliCommonDashboard @@ -75,6 +77,10 @@ class QuicksightCliTest(CliCommonDashboard.TestSuite): def expected_dashboards_and_charts_after_patch(self) -> int: return 7 + @pytest.mark.order(11) + def test_lineage(self) -> None: + pytest.skip("Lineage not configured. Skipping Test") + def assert_for_vanilla_ingestion( self, source_status: Status, sink_status: Status ) -> None: diff --git a/ingestion/tests/cli_e2e/test_cli_redash.py b/ingestion/tests/cli_e2e/test_cli_redash.py index 43177271a24..df481cef500 100644 --- a/ingestion/tests/cli_e2e/test_cli_redash.py +++ b/ingestion/tests/cli_e2e/test_cli_redash.py @@ -14,6 +14,8 @@ Test Redash connector with CLI """ from typing import List +import pytest + from .common.test_cli_dashboard import CliCommonDashboard @@ -65,3 +67,7 @@ class RedashCliTest(CliCommonDashboard.TestSuite): def expected_dashboards_and_charts_after_patch(self) -> int: return 1 + + @pytest.mark.order(11) + def test_lineage(self) -> None: + pytest.skip("Lineage not configured. Skipping Test") diff --git a/ingestion/tests/cli_e2e/test_cli_redshift.py b/ingestion/tests/cli_e2e/test_cli_redshift.py index aa11e10f310..60ff1384bf0 100644 --- a/ingestion/tests/cli_e2e/test_cli_redshift.py +++ b/ingestion/tests/cli_e2e/test_cli_redshift.py @@ -110,6 +110,9 @@ class RedshiftCliTest(CliCommonDB.TestSuite, SQACommonMethods): """ return 9 + def expected_lineage_node(self) -> str: + return "e2e_redshift.e2e_cli_tests.dbt_jaffle.view_listing" + @staticmethod def fqn_created_table() -> str: return "e2e_redshift.e2e_cli_tests.dbt_jaffle.listing" diff --git a/ingestion/tests/cli_e2e/test_cli_snowflake.py b/ingestion/tests/cli_e2e/test_cli_snowflake.py index a6eb1896e23..e366cb3eb31 100644 --- a/ingestion/tests/cli_e2e/test_cli_snowflake.py +++ b/ingestion/tests/cli_e2e/test_cli_snowflake.py @@ -180,6 +180,9 @@ class SnowflakeCliTest(CliCommonDB.TestSuite, SQACommonMethods): def view_column_lineage_count(self) -> int: return 2 + def expected_lineage_node(self) -> str: + return "e2e_snowflake.E2E_DB.E2E_TEST.view_persons" + @staticmethod def fqn_created_table() -> str: return "e2e_snowflake.E2E_DB.E2E_TEST.PERSONS" diff --git a/ingestion/tests/cli_e2e/test_cli_tableau.py b/ingestion/tests/cli_e2e/test_cli_tableau.py index ead16fde853..dd0b5c8c713 100644 --- a/ingestion/tests/cli_e2e/test_cli_tableau.py +++ b/ingestion/tests/cli_e2e/test_cli_tableau.py @@ -15,6 +15,8 @@ Test Tableau connector with CLI from pathlib import Path from typing import List +import pytest + from metadata.ingestion.api.status import Status from .base.test_cli import PATH_TO_RESOURCES @@ -78,6 +80,10 @@ class TableauCliTest(CliCommonDashboard.TestSuite): def expected_dashboards_and_charts_after_patch(self) -> int: return 4 + @pytest.mark.order(11) + def test_lineage(self) -> None: + pytest.skip("Lineage not configured. Skipping Test") + # Overriding the method since for Tableau we don't expect lineage to be shown on this assert. # This differs from the base case def assert_not_including(self, source_status: Status, sink_status: Status): diff --git a/ingestion/tests/cli_e2e/test_cli_vertica.py b/ingestion/tests/cli_e2e/test_cli_vertica.py index 6abe0a1d2e9..7931de6218f 100644 --- a/ingestion/tests/cli_e2e/test_cli_vertica.py +++ b/ingestion/tests/cli_e2e/test_cli_vertica.py @@ -73,6 +73,9 @@ class VerticaCliTest(CliCommonDB.TestSuite, SQACommonMethods): def view_column_lineage_count(self) -> int: return 2 + def expected_lineage_node(self) -> str: + return "e2e_vertica.VMart.public.vendor_dimension_v" + @staticmethod def fqn_created_table() -> str: return "e2e_vertica.VMart.public.vendor_dimension_new"