| 
									
										
										
										
											2024-10-11 08:37:58 +02:00
										 |  |  | import glob | 
					
						
							|  |  |  | import json | 
					
						
							|  |  |  | import os.path | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  | import sys | 
					
						
							| 
									
										
										
										
											2024-08-19 14:28:42 +02:00
										 |  |  | from dataclasses import dataclass | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  | from typing import List | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | import pytest | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-19 14:28:42 +02:00
										 |  |  | from _openmetadata_testutils.pydantic.test_utils import assert_equal_pydantic_objects | 
					
						
							|  |  |  | from metadata.data_quality.api.models import TestCaseDefinition | 
					
						
							| 
									
										
										
										
											2024-10-11 08:37:58 +02:00
										 |  |  | from metadata.generated.schema.entity.data.table import Table | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  | from metadata.generated.schema.entity.services.databaseService import DatabaseService | 
					
						
							|  |  |  | from metadata.generated.schema.metadataIngestion.testSuitePipeline import ( | 
					
						
							|  |  |  |     TestSuiteConfigType, | 
					
						
							|  |  |  |     TestSuitePipeline, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | from metadata.generated.schema.metadataIngestion.workflow import ( | 
					
						
							|  |  |  |     OpenMetadataWorkflowConfig, | 
					
						
							|  |  |  |     Processor, | 
					
						
							|  |  |  |     Sink, | 
					
						
							|  |  |  |     Source, | 
					
						
							|  |  |  |     SourceConfig, | 
					
						
							|  |  |  |     WorkflowConfig, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2024-10-11 08:37:58 +02:00
										 |  |  | from metadata.generated.schema.tests.basic import ( | 
					
						
							|  |  |  |     TestCaseResult, | 
					
						
							|  |  |  |     TestCaseStatus, | 
					
						
							|  |  |  |     TestResultValue, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  | from metadata.generated.schema.tests.testCase import TestCase | 
					
						
							| 
									
										
										
										
											2024-07-02 09:56:35 +02:00
										 |  |  | from metadata.generated.schema.tests.testSuite import TestSuite | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  | from metadata.generated.schema.type.basic import ComponentConfig | 
					
						
							| 
									
										
										
										
											2024-07-12 09:44:21 +02:00
										 |  |  | from metadata.ingestion.api.status import TruncatedStackTraceError | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  | from metadata.ingestion.ometa.ometa_api import OpenMetadata | 
					
						
							| 
									
										
										
										
											2024-10-11 08:37:58 +02:00
										 |  |  | from metadata.utils import entity_link | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  | from metadata.workflow.data_quality import TestSuiteWorkflow | 
					
						
							| 
									
										
										
										
											2024-07-17 08:11:34 +02:00
										 |  |  | from metadata.workflow.metadata import MetadataWorkflow | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | if not sys.version_info >= (3, 9): | 
					
						
							|  |  |  |     pytest.skip("requires python 3.9+", allow_module_level=True) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-11 08:37:58 +02:00
										 |  |  | @pytest.fixture(scope="module") | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  | def run_data_quality_workflow( | 
					
						
							| 
									
										
										
										
											2024-07-17 08:11:34 +02:00
										 |  |  |     run_workflow, | 
					
						
							|  |  |  |     ingestion_config, | 
					
						
							|  |  |  |     db_service: DatabaseService, | 
					
						
							|  |  |  |     metadata: OpenMetadata, | 
					
						
							|  |  |  |     sink_config, | 
					
						
							|  |  |  |     workflow_config, | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  | ): | 
					
						
							| 
									
										
										
										
											2024-07-17 08:11:34 +02:00
										 |  |  |     run_workflow(MetadataWorkflow, ingestion_config) | 
					
						
							|  |  |  |     test_suite_config = OpenMetadataWorkflowConfig( | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  |         source=Source( | 
					
						
							|  |  |  |             type=TestSuiteConfigType.TestSuite.value, | 
					
						
							|  |  |  |             serviceName="MyTestSuite", | 
					
						
							|  |  |  |             sourceConfig=SourceConfig( | 
					
						
							|  |  |  |                 config=TestSuitePipeline( | 
					
						
							|  |  |  |                     type=TestSuiteConfigType.TestSuite, | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |                     entityFullyQualifiedName=f"{db_service.fullyQualifiedName.root}.dvdrental.public.customer", | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  |                 ) | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |             serviceConnection=db_service.connection, | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         processor=Processor( | 
					
						
							|  |  |  |             type="orm-test-runner", | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |             config=ComponentConfig( | 
					
						
							|  |  |  |                 { | 
					
						
							|  |  |  |                     "testCases": [ | 
					
						
							|  |  |  |                         { | 
					
						
							|  |  |  |                             "name": "first_name_includes_tom_and_jerry_wo_enum", | 
					
						
							|  |  |  |                             "testDefinitionName": "columnValuesToBeInSet", | 
					
						
							|  |  |  |                             "columnName": "first_name", | 
					
						
							|  |  |  |                             "parameterValues": [ | 
					
						
							|  |  |  |                                 {"name": "allowedValues", "value": "['Tom', 'Jerry']"} | 
					
						
							|  |  |  |                             ], | 
					
						
							| 
									
										
										
										
											2024-10-11 08:37:58 +02:00
										 |  |  |                             "computePassedFailedRowCount": True, | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |                         }, | 
					
						
							|  |  |  |                         { | 
					
						
							|  |  |  |                             "name": "first_name_includes_tom_and_jerry", | 
					
						
							|  |  |  |                             "testDefinitionName": "columnValuesToBeInSet", | 
					
						
							|  |  |  |                             "columnName": "first_name", | 
					
						
							|  |  |  |                             "parameterValues": [ | 
					
						
							|  |  |  |                                 {"name": "allowedValues", "value": "['Tom', 'Jerry']"}, | 
					
						
							| 
									
										
										
										
											2024-10-15 16:29:43 +02:00
										 |  |  |                                 {"name": "matchEnum", "value": "false"}, | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |                             ], | 
					
						
							|  |  |  |                         }, | 
					
						
							|  |  |  |                         { | 
					
						
							|  |  |  |                             "name": "first_name_is_tom_or_jerry", | 
					
						
							|  |  |  |                             "testDefinitionName": "columnValuesToBeInSet", | 
					
						
							|  |  |  |                             "columnName": "first_name", | 
					
						
							|  |  |  |                             "parameterValues": [ | 
					
						
							|  |  |  |                                 {"name": "allowedValues", "value": "['Tom', 'Jerry']"}, | 
					
						
							| 
									
										
										
										
											2024-10-15 16:29:43 +02:00
										 |  |  |                                 {"name": "matchEnum", "value": "true"}, | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |                             ], | 
					
						
							|  |  |  |                         }, | 
					
						
							| 
									
										
										
										
											2024-08-19 09:09:35 +02:00
										 |  |  |                         { | 
					
						
							|  |  |  |                             "name": "id_no_bounds", | 
					
						
							|  |  |  |                             "testDefinitionName": "columnValuesToBeBetween", | 
					
						
							|  |  |  |                             "columnName": "customer_id", | 
					
						
							|  |  |  |                             "parameterValues": [], | 
					
						
							|  |  |  |                         }, | 
					
						
							| 
									
										
										
										
											2024-10-11 08:37:58 +02:00
										 |  |  |                         { | 
					
						
							|  |  |  |                             "name": "column_values_not_match_regex", | 
					
						
							|  |  |  |                             "testDefinitionName": "columnValuesToNotMatchRegex", | 
					
						
							|  |  |  |                             "columnName": "email", | 
					
						
							|  |  |  |                             "parameterValues": [ | 
					
						
							|  |  |  |                                 {"name": "forbiddenRegex", "value": ".*@example\\.com$"} | 
					
						
							|  |  |  |                             ], | 
					
						
							|  |  |  |                         }, | 
					
						
							|  |  |  |                         { | 
					
						
							|  |  |  |                             "name": "table_column_count_between", | 
					
						
							|  |  |  |                             "testDefinitionName": "tableColumnCountToBeBetween", | 
					
						
							|  |  |  |                             "parameterValues": [ | 
					
						
							|  |  |  |                                 {"name": "minColValue", "value": "8"}, | 
					
						
							|  |  |  |                                 {"name": "maxColValue", "value": "12"}, | 
					
						
							|  |  |  |                             ], | 
					
						
							|  |  |  |                         }, | 
					
						
							|  |  |  |                         { | 
					
						
							|  |  |  |                             "name": "table_column_count_equal", | 
					
						
							|  |  |  |                             "testDefinitionName": "tableColumnCountToEqual", | 
					
						
							|  |  |  |                             "parameterValues": [{"name": "columnCount", "value": "11"}], | 
					
						
							|  |  |  |                         }, | 
					
						
							|  |  |  |                         { | 
					
						
							|  |  |  |                             "name": "table_column_name_exists", | 
					
						
							|  |  |  |                             "testDefinitionName": "tableColumnNameToExist", | 
					
						
							|  |  |  |                             "parameterValues": [ | 
					
						
							|  |  |  |                                 {"name": "columnName", "value": "customer_id"} | 
					
						
							|  |  |  |                             ], | 
					
						
							|  |  |  |                         }, | 
					
						
							|  |  |  |                         { | 
					
						
							|  |  |  |                             "name": "table_column_names_match_set", | 
					
						
							|  |  |  |                             "testDefinitionName": "tableColumnToMatchSet", | 
					
						
							|  |  |  |                             "parameterValues": [ | 
					
						
							|  |  |  |                                 { | 
					
						
							|  |  |  |                                     "name": "columnNames", | 
					
						
							|  |  |  |                                     "value": "customer_id, store_id, first_name, last_name, email, address_id, activebool, create_date, last_update, active, json_field", | 
					
						
							|  |  |  |                                 }, | 
					
						
							|  |  |  |                                 {"name": "ordered", "value": "false"}, | 
					
						
							|  |  |  |                             ], | 
					
						
							|  |  |  |                         }, | 
					
						
							|  |  |  |                         { | 
					
						
							|  |  |  |                             "name": "custom_sql_query_count", | 
					
						
							|  |  |  |                             "testDefinitionName": "tableCustomSQLQuery", | 
					
						
							|  |  |  |                             "parameterValues": [ | 
					
						
							|  |  |  |                                 { | 
					
						
							|  |  |  |                                     "name": "sqlExpression", | 
					
						
							|  |  |  |                                     "value": "SELECT CASE WHEN COUNT(*) > 0 THEN 0 ELSE 1 END FROM customer WHERE active = 1", | 
					
						
							|  |  |  |                                 }, | 
					
						
							|  |  |  |                                 {"name": "strategy", "value": "COUNT"}, | 
					
						
							|  |  |  |                                 {"name": "threshold", "value": "0"}, | 
					
						
							|  |  |  |                             ], | 
					
						
							|  |  |  |                         }, | 
					
						
							|  |  |  |                         { | 
					
						
							|  |  |  |                             "name": "custom_sql_query_rows", | 
					
						
							|  |  |  |                             "testDefinitionName": "tableCustomSQLQuery", | 
					
						
							|  |  |  |                             "parameterValues": [ | 
					
						
							|  |  |  |                                 { | 
					
						
							|  |  |  |                                     "name": "sqlExpression", | 
					
						
							|  |  |  |                                     "value": "SELECT * FROM customer WHERE active = 1", | 
					
						
							|  |  |  |                                 }, | 
					
						
							|  |  |  |                                 {"name": "strategy", "value": "ROWS"}, | 
					
						
							|  |  |  |                                 {"name": "threshold", "value": "10"}, | 
					
						
							|  |  |  |                             ], | 
					
						
							|  |  |  |                         }, | 
					
						
							|  |  |  |                         { | 
					
						
							|  |  |  |                             "name": "table_row_count_between", | 
					
						
							|  |  |  |                             "testDefinitionName": "tableRowCountToBeBetween", | 
					
						
							|  |  |  |                             "parameterValues": [ | 
					
						
							|  |  |  |                                 {"name": "minValue", "value": "100"}, | 
					
						
							|  |  |  |                                 {"name": "maxValue", "value": "1000"}, | 
					
						
							|  |  |  |                             ], | 
					
						
							|  |  |  |                         }, | 
					
						
							|  |  |  |                         { | 
					
						
							|  |  |  |                             "name": "table_row_count_equal", | 
					
						
							|  |  |  |                             "testDefinitionName": "tableRowCountToEqual", | 
					
						
							|  |  |  |                             "parameterValues": [{"name": "value", "value": "599"}], | 
					
						
							|  |  |  |                         }, | 
					
						
							|  |  |  |                         { | 
					
						
							|  |  |  |                             "name": "table_row_inserted_count_between_fail", | 
					
						
							|  |  |  |                             "testDefinitionName": "tableRowInsertedCountToBeBetween", | 
					
						
							|  |  |  |                             "parameterValues": [ | 
					
						
							|  |  |  |                                 {"name": "min", "value": "10"}, | 
					
						
							|  |  |  |                                 {"name": "max", "value": "50"}, | 
					
						
							|  |  |  |                                 {"name": "columnName", "value": "create_date"}, | 
					
						
							|  |  |  |                                 {"name": "rangeType", "value": "DAY"}, | 
					
						
							|  |  |  |                                 {"name": "rangeInterval", "value": "1"}, | 
					
						
							|  |  |  |                             ], | 
					
						
							|  |  |  |                         }, | 
					
						
							|  |  |  |                         { | 
					
						
							|  |  |  |                             "name": "table_row_inserted_count_between_success", | 
					
						
							|  |  |  |                             "testDefinitionName": "tableRowInsertedCountToBeBetween", | 
					
						
							|  |  |  |                             "parameterValues": [ | 
					
						
							|  |  |  |                                 {"name": "min", "value": "590"}, | 
					
						
							|  |  |  |                                 {"name": "max", "value": "600"}, | 
					
						
							|  |  |  |                                 {"name": "columnName", "value": "last_update"}, | 
					
						
							|  |  |  |                                 {"name": "rangeType", "value": "YEAR"}, | 
					
						
							|  |  |  |                                 {"name": "rangeInterval", "value": "12"}, | 
					
						
							|  |  |  |                             ], | 
					
						
							|  |  |  |                         }, | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |                     ], | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  |             ), | 
					
						
							|  |  |  |         ), | 
					
						
							| 
									
										
										
										
											2024-07-17 08:11:34 +02:00
										 |  |  |         sink=Sink.model_validate(sink_config), | 
					
						
							|  |  |  |         workflowConfig=WorkflowConfig.model_validate(workflow_config), | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2024-07-17 08:11:34 +02:00
										 |  |  |     test_suite_processor = TestSuiteWorkflow.create(test_suite_config) | 
					
						
							| 
									
										
										
										
											2024-07-12 09:44:21 +02:00
										 |  |  |     test_suite_processor.execute() | 
					
						
							|  |  |  |     test_suite_processor.raise_from_status() | 
					
						
							| 
									
										
										
										
											2024-07-02 09:56:35 +02:00
										 |  |  |     yield | 
					
						
							|  |  |  |     test_suite: TestSuite = metadata.get_by_name( | 
					
						
							|  |  |  |         TestSuite, "MyTestSuite", nullable=True | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     if test_suite: | 
					
						
							|  |  |  |         metadata.delete(TestSuite, test_suite.id, recursive=True, hard_delete=True) | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-11 08:37:58 +02:00
										 |  |  | def test_all_definition_exists(metadata, run_data_quality_workflow, db_service): | 
					
						
							|  |  |  |     test_difinitions_glob = ( | 
					
						
							|  |  |  |         os.path.dirname(__file__) | 
					
						
							|  |  |  |         + "/../../../.." | 
					
						
							|  |  |  |         + "/openmetadata-service/src/main/resources/json/data/tests/**.json" | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     test_definitions: List[str] = [] | 
					
						
							|  |  |  |     for test_definition_file in glob.glob(test_difinitions_glob, recursive=False): | 
					
						
							|  |  |  |         test_definitions.append(json.load(open(test_definition_file))["name"]) | 
					
						
							|  |  |  |     assert len(test_definitions) > 0 | 
					
						
							|  |  |  |     table: Table = metadata.get_by_name( | 
					
						
							|  |  |  |         Table, | 
					
						
							|  |  |  |         f"{db_service.fullyQualifiedName.root}.dvdrental.public.customer", | 
					
						
							|  |  |  |         nullable=False, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     tcs: List[TestCase] = metadata.list_entities( | 
					
						
							|  |  |  |         TestCase, | 
					
						
							|  |  |  |         fields=["*"], | 
					
						
							|  |  |  |         params={ | 
					
						
							|  |  |  |             "entityLink": entity_link.get_entity_link( | 
					
						
							|  |  |  |                 Table, table.fullyQualifiedName.root | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |     ).entities | 
					
						
							|  |  |  |     tcs_dict = {tc.testDefinition.fullyQualifiedName: tc for tc in tcs} | 
					
						
							|  |  |  |     excluded = { | 
					
						
							|  |  |  |         # TODO implement these too | 
					
						
							|  |  |  |         "columnValueLengthsToBeBetween", | 
					
						
							|  |  |  |         "columnValueMaxToBeBetween", | 
					
						
							|  |  |  |         "columnValueMinToBeBetween", | 
					
						
							|  |  |  |         "columnValuesToBeUnique", | 
					
						
							|  |  |  |         "tableDataToBeFresh", | 
					
						
							|  |  |  |         "columnValuesToMatchRegex", | 
					
						
							|  |  |  |         "columnValuesToNotMatchRegex", | 
					
						
							|  |  |  |         "columnValueStdDevToBeBetween", | 
					
						
							|  |  |  |         "columnValuesToBeNotNull", | 
					
						
							|  |  |  |         "columnValueMedianToBeBetween", | 
					
						
							|  |  |  |         "columnValuesSumToBeBetween", | 
					
						
							|  |  |  |         "columnValuesToBeInSet", | 
					
						
							|  |  |  |         "columnValuesMissingCount", | 
					
						
							|  |  |  |         "columnValuesToBeNotInSet", | 
					
						
							|  |  |  |         "columnValueMeanToBeBetween", | 
					
						
							|  |  |  |         "columnValuesToBeBetween", | 
					
						
							|  |  |  |         "tableDiff", | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     missing = set() | 
					
						
							|  |  |  |     for test_definition in test_definitions: | 
					
						
							|  |  |  |         if test_definition in tcs_dict: | 
					
						
							|  |  |  |             assert ( | 
					
						
							|  |  |  |                 test_definition not in excluded | 
					
						
							|  |  |  |             ), f"Remove test from excluded list: {test_definition}" | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             if test_definition in excluded: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             missing.add(test_definition.fullyQualifiedName.root) | 
					
						
							|  |  |  |     assert not missing, f"Missing test cases: {missing}" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  | @pytest.mark.parametrize( | 
					
						
							|  |  |  |     "test_case_name,expected_status", | 
					
						
							|  |  |  |     [ | 
					
						
							| 
									
										
										
										
											2024-10-11 08:37:58 +02:00
										 |  |  |         ( | 
					
						
							|  |  |  |             "first_name_includes_tom_and_jerry_wo_enum", | 
					
						
							|  |  |  |             TestCaseResult( | 
					
						
							|  |  |  |                 timestamp=0, | 
					
						
							|  |  |  |                 testCaseStatus=TestCaseStatus.Success, | 
					
						
							|  |  |  |                 passedRows=2, | 
					
						
							|  |  |  |                 failedRows=597, | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             "first_name_includes_tom_and_jerry", | 
					
						
							|  |  |  |             TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Success), | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             "first_name_is_tom_or_jerry", | 
					
						
							|  |  |  |             TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Failed), | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             "id_no_bounds", | 
					
						
							|  |  |  |             TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Success), | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             "column_values_not_match_regex", | 
					
						
							|  |  |  |             TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Success), | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             "table_column_count_between", | 
					
						
							|  |  |  |             TestCaseResult( | 
					
						
							|  |  |  |                 timestamp=0, | 
					
						
							|  |  |  |                 testCaseStatus=TestCaseStatus.Success, | 
					
						
							|  |  |  |                 testResultValue=[TestResultValue(name="columnCount", value="11")], | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             "table_column_count_equal", | 
					
						
							|  |  |  |             TestCaseResult( | 
					
						
							|  |  |  |                 timestamp=0, | 
					
						
							|  |  |  |                 testCaseStatus=TestCaseStatus.Success, | 
					
						
							|  |  |  |                 testResultValue=[TestResultValue(name="columnCount", value="11")], | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             "table_column_name_exists", | 
					
						
							|  |  |  |             TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Success), | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             "table_column_names_match_set", | 
					
						
							|  |  |  |             TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Success), | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             "custom_sql_query_count", | 
					
						
							|  |  |  |             TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Success), | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             "custom_sql_query_rows", | 
					
						
							|  |  |  |             TestCaseResult( | 
					
						
							|  |  |  |                 timestamp=0, | 
					
						
							|  |  |  |                 testCaseStatus=TestCaseStatus.Failed, | 
					
						
							|  |  |  |                 testResultValues=[{"name": "resultRowCount", "value": "599"}], | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             "table_row_count_between", | 
					
						
							|  |  |  |             TestCaseResult( | 
					
						
							|  |  |  |                 timestamp=0, | 
					
						
							|  |  |  |                 testCaseStatus=TestCaseStatus.Success, | 
					
						
							|  |  |  |                 testResultValue=[TestResultValue(name="rowCount", value="599")], | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             "table_row_count_equal", | 
					
						
							|  |  |  |             TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Success), | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             "table_row_inserted_count_between_fail", | 
					
						
							|  |  |  |             TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Failed), | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             "table_row_inserted_count_between_success", | 
					
						
							|  |  |  |             TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Success), | 
					
						
							|  |  |  |         ), | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  |     ], | 
					
						
							| 
									
										
										
										
											2024-10-11 08:37:58 +02:00
										 |  |  |     ids=lambda *x: x[0], | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  | ) | 
					
						
							|  |  |  | def test_data_quality( | 
					
						
							|  |  |  |     run_data_quality_workflow, metadata: OpenMetadata, test_case_name, expected_status | 
					
						
							|  |  |  | ): | 
					
						
							|  |  |  |     test_cases: List[TestCase] = metadata.list_entities( | 
					
						
							|  |  |  |         TestCase, fields=["*"], skip_on_failure=True | 
					
						
							|  |  |  |     ).entities | 
					
						
							|  |  |  |     test_case: TestCase = next( | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |         (t for t in test_cases if t.name.root == test_case_name), None | 
					
						
							| 
									
										
										
										
											2024-05-28 09:30:30 +02:00
										 |  |  |     ) | 
					
						
							|  |  |  |     assert test_case is not None | 
					
						
							| 
									
										
										
										
											2024-10-11 08:37:58 +02:00
										 |  |  |     assert_equal_pydantic_objects( | 
					
						
							|  |  |  |         expected_status.model_copy( | 
					
						
							|  |  |  |             update={"timestamp": test_case.testCaseResult.timestamp} | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         test_case.testCaseResult, | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2024-07-02 09:56:35 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-17 08:11:34 +02:00
										 |  |  | @pytest.fixture() | 
					
						
							| 
									
										
										
										
											2024-08-19 14:28:42 +02:00
										 |  |  | def get_incompatible_column_type_config(workflow_config, sink_config): | 
					
						
							|  |  |  |     def inner(entity_fqn: str, incompatible_test_case: TestCaseDefinition): | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             "source": { | 
					
						
							|  |  |  |                 "type": "TestSuite", | 
					
						
							|  |  |  |                 "serviceName": "MyTestSuite", | 
					
						
							|  |  |  |                 "sourceConfig": { | 
					
						
							|  |  |  |                     "config": { | 
					
						
							|  |  |  |                         "type": "TestSuite", | 
					
						
							|  |  |  |                         "entityFullyQualifiedName": entity_fqn, | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 }, | 
					
						
							| 
									
										
										
										
											2024-07-02 09:56:35 +02:00
										 |  |  |             }, | 
					
						
							| 
									
										
										
										
											2024-08-19 14:28:42 +02:00
										 |  |  |             "processor": { | 
					
						
							|  |  |  |                 "type": "orm-test-runner", | 
					
						
							|  |  |  |                 "config": { | 
					
						
							|  |  |  |                     "testCases": [ | 
					
						
							|  |  |  |                         incompatible_test_case.model_dump(), | 
					
						
							|  |  |  |                         { | 
					
						
							|  |  |  |                             "name": "compatible_test", | 
					
						
							|  |  |  |                             "testDefinitionName": "columnValueMaxToBeBetween", | 
					
						
							|  |  |  |                             "columnName": "customer_id", | 
					
						
							|  |  |  |                             "parameterValues": [ | 
					
						
							|  |  |  |                                 {"name": "minValueForMaxInCol", "value": "0"}, | 
					
						
							|  |  |  |                                 {"name": "maxValueForMaxInCol", "value": "10"}, | 
					
						
							|  |  |  |                             ], | 
					
						
							|  |  |  |                         }, | 
					
						
							|  |  |  |                     ] | 
					
						
							|  |  |  |                 }, | 
					
						
							| 
									
										
										
										
											2024-07-02 09:56:35 +02:00
										 |  |  |             }, | 
					
						
							| 
									
										
										
										
											2024-08-19 14:28:42 +02:00
										 |  |  |             "sink": sink_config, | 
					
						
							|  |  |  |             "workflowConfig": workflow_config, | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return inner | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @dataclass | 
					
						
							|  |  |  | class IncompatibleTypeParameter: | 
					
						
							|  |  |  |     entity_fqn: str | 
					
						
							|  |  |  |     test_case: TestCaseDefinition | 
					
						
							|  |  |  |     expected_failure: TruncatedStackTraceError | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @pytest.fixture( | 
					
						
							|  |  |  |     params=[ | 
					
						
							|  |  |  |         IncompatibleTypeParameter( | 
					
						
							|  |  |  |             entity_fqn="{database_service}.dvdrental.public.customer", | 
					
						
							|  |  |  |             test_case=TestCaseDefinition( | 
					
						
							|  |  |  |                 name="string_max_between", | 
					
						
							|  |  |  |                 testDefinitionName="columnValueMaxToBeBetween", | 
					
						
							|  |  |  |                 columnName="first_name", | 
					
						
							|  |  |  |                 parameterValues=[ | 
					
						
							|  |  |  |                     {"name": "minValueForMaxInCol", "value": "0"}, | 
					
						
							|  |  |  |                     {"name": "maxValueForMaxInCol", "value": "10"}, | 
					
						
							|  |  |  |                 ], | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |             expected_failure=TruncatedStackTraceError( | 
					
						
							|  |  |  |                 name="Incompatible Column for Test Case", | 
					
						
							|  |  |  |                 error="Test case string_max_between of type columnValueMaxToBeBetween " | 
					
						
							|  |  |  |                 "is not compatible with column first_name of type VARCHAR", | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         IncompatibleTypeParameter( | 
					
						
							|  |  |  |             entity_fqn="{database_service}.dvdrental.public.customer", | 
					
						
							|  |  |  |             test_case=TestCaseDefinition( | 
					
						
							|  |  |  |                 name="unique_json_column", | 
					
						
							|  |  |  |                 testDefinitionName="columnValuesToBeUnique", | 
					
						
							|  |  |  |                 columnName="json_field", | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |             expected_failure=TruncatedStackTraceError( | 
					
						
							|  |  |  |                 name="Incompatible Column for Test Case", | 
					
						
							|  |  |  |                 error="Test case unique_json_column of type columnValuesToBeUnique " | 
					
						
							|  |  |  |                 "is not compatible with column json_field of type JSON", | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |     ], | 
					
						
							|  |  |  |     ids=lambda x: x.test_case.name, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | def parameters(request, db_service): | 
					
						
							|  |  |  |     request.param.entity_fqn = request.param.entity_fqn.format( | 
					
						
							|  |  |  |         database_service=db_service.fullyQualifiedName.root | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     return request.param | 
					
						
							| 
									
										
										
										
											2024-07-17 08:11:34 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_incompatible_column_type( | 
					
						
							| 
									
										
										
										
											2024-08-19 14:28:42 +02:00
										 |  |  |     parameters: IncompatibleTypeParameter, | 
					
						
							| 
									
										
										
										
											2024-07-17 08:11:34 +02:00
										 |  |  |     patch_passwords_for_db_services, | 
					
						
							|  |  |  |     run_workflow, | 
					
						
							|  |  |  |     ingestion_config, | 
					
						
							| 
									
										
										
										
											2024-08-19 14:28:42 +02:00
										 |  |  |     get_incompatible_column_type_config, | 
					
						
							| 
									
										
										
										
											2024-07-17 08:11:34 +02:00
										 |  |  |     metadata: OpenMetadata, | 
					
						
							|  |  |  |     db_service, | 
					
						
							| 
									
										
										
										
											2024-08-19 14:28:42 +02:00
										 |  |  |     cleanup_fqns, | 
					
						
							| 
									
										
										
										
											2024-07-17 08:11:34 +02:00
										 |  |  | ): | 
					
						
							|  |  |  |     run_workflow(MetadataWorkflow, ingestion_config) | 
					
						
							|  |  |  |     test_suite_processor = run_workflow( | 
					
						
							| 
									
										
										
										
											2024-08-19 14:28:42 +02:00
										 |  |  |         TestSuiteWorkflow, | 
					
						
							|  |  |  |         get_incompatible_column_type_config( | 
					
						
							|  |  |  |             parameters.entity_fqn, parameters.test_case | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         raise_from_status=False, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     cleanup_fqns( | 
					
						
							|  |  |  |         TestCase, | 
					
						
							|  |  |  |         f"{parameters.entity_fqn}.{parameters.test_case.columnName}.{parameters.test_case.name}", | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     assert_equal_pydantic_objects( | 
					
						
							|  |  |  |         parameters.expected_failure, | 
					
						
							|  |  |  |         test_suite_processor.steps[0].get_status().failures[0], | 
					
						
							| 
									
										
										
										
											2024-07-17 08:11:34 +02:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2024-07-02 09:56:35 +02:00
										 |  |  |     assert ( | 
					
						
							|  |  |  |         f"{db_service.fullyQualifiedName.root}.dvdrental.public.customer.customer_id.compatible_test" | 
					
						
							| 
									
										
										
										
											2024-07-12 09:44:21 +02:00
										 |  |  |         in test_suite_processor.steps[1].get_status().records | 
					
						
							| 
									
										
										
										
											2024-07-02 09:56:35 +02:00
										 |  |  |     ), "Test case compatible_test should pass" |