mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-10-26 16:22:09 +00:00 
			
		
		
		
	Add partial support for BQ partitioned table (#7066)
* Added support for BQ time based partition (not ingestion) * Fixed minor errors in test suite workflow
This commit is contained in:
		
							parent
							
								
									1dfcb45e86
								
							
						
					
					
						commit
						a39c4db8e7
					
				| @ -248,11 +248,8 @@ class SQAInterface(InterfaceProtocol): | |||||||
|             sample=sample, |             sample=sample, | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|         try: |         if column is not None: | ||||||
|             column = column.name |             column = column.name | ||||||
|         except Exception as exc: |  | ||||||
|             logger.debug(traceback.format_exc()) |  | ||||||
|             logger.warning(f"Unexpected exception computing metrics: {exc}") |  | ||||||
| 
 | 
 | ||||||
|         return row, column |         return row, column | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -34,7 +34,7 @@ class TablePartitionConfig(ConfigModel): | |||||||
|     """table partition config""" |     """table partition config""" | ||||||
| 
 | 
 | ||||||
|     partitionField: Optional[str] = None |     partitionField: Optional[str] = None | ||||||
|     partitionQueryDuration: Optional[int] = 1 |     partitionQueryDuration: Optional[int] = 30 | ||||||
|     partitionValues: Optional[List] = None |     partitionValues: Optional[List] = None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -28,6 +28,7 @@ from metadata.config.workflow import get_sink | |||||||
| from metadata.generated.schema.entity.data.database import Database | from metadata.generated.schema.entity.data.database import Database | ||||||
| from metadata.generated.schema.entity.data.table import ( | from metadata.generated.schema.entity.data.table import ( | ||||||
|     ColumnProfilerConfig, |     ColumnProfilerConfig, | ||||||
|  |     IntervalType, | ||||||
|     Table, |     Table, | ||||||
|     TableProfile, |     TableProfile, | ||||||
| ) | ) | ||||||
| @ -210,6 +211,26 @@ class ProfilerWorkflow: | |||||||
|         if entity_config: |         if entity_config: | ||||||
|             return entity_config.partitionConfig |             return entity_config.partitionConfig | ||||||
| 
 | 
 | ||||||
|  |         if entity.tablePartition: | ||||||
|  |             if entity.tablePartition.intervalType in { | ||||||
|  |                 IntervalType.TIME_UNIT, | ||||||
|  |                 IntervalType.INGESTION_TIME, | ||||||
|  |             }: | ||||||
|  |                 try: | ||||||
|  |                     partition_field = entity.tablePartition.columns[0] | ||||||
|  |                 except Exception: | ||||||
|  |                     raise TypeError( | ||||||
|  |                         "Unsupported ingestion based partition type. Skipping table" | ||||||
|  |                     ) | ||||||
|  | 
 | ||||||
|  |                 return TablePartitionConfig( | ||||||
|  |                     partitionField=partition_field, | ||||||
|  |                 ) | ||||||
|  | 
 | ||||||
|  |             raise TypeError( | ||||||
|  |                 f"Unsupported partition type {entity.tablePartition.intervalType}. Skipping table" | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|         return None |         return None | ||||||
| 
 | 
 | ||||||
|     def create_profiler_interface(self, service_connection_config, table_entity: Table): |     def create_profiler_interface(self, service_connection_config, table_entity: Table): | ||||||
|  | |||||||
| @ -164,11 +164,10 @@ class partition_filter_handler: | |||||||
|         def handle_and_execute(_self, *args, **kwargs): |         def handle_and_execute(_self, *args, **kwargs): | ||||||
|             """Handle partitioned queries""" |             """Handle partitioned queries""" | ||||||
|             if _self._partition_details: |             if _self._partition_details: | ||||||
|  |                 partition_field = _self._partition_details["partition_field"] | ||||||
|                 partition_filter = build_partition_predicate( |                 partition_filter = build_partition_predicate( | ||||||
|                     _self._partition_details, |                     _self._partition_details, | ||||||
|                     _self.table.__table__.c.get( |                     _self.table.__table__.c.get(partition_field), | ||||||
|                         _self._partition_details["partition_field"] |  | ||||||
|                     ), |  | ||||||
|                 ) |                 ) | ||||||
|                 if self.build_sample: |                 if self.build_sample: | ||||||
|                     return ( |                     return ( | ||||||
|  | |||||||
| @ -26,7 +26,7 @@ from metadata.config.common import WorkflowExecutionError | |||||||
| from metadata.config.workflow import get_sink | from metadata.config.workflow import get_sink | ||||||
| from metadata.generated.schema.api.tests.createTestCase import CreateTestCaseRequest | from metadata.generated.schema.api.tests.createTestCase import CreateTestCaseRequest | ||||||
| from metadata.generated.schema.api.tests.createTestSuite import CreateTestSuiteRequest | from metadata.generated.schema.api.tests.createTestSuite import CreateTestSuiteRequest | ||||||
| from metadata.generated.schema.entity.data.table import Table | from metadata.generated.schema.entity.data.table import IntervalType, Table | ||||||
| from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import ( | from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import ( | ||||||
|     OpenMetadataConnection, |     OpenMetadataConnection, | ||||||
| ) | ) | ||||||
| @ -40,11 +40,11 @@ from metadata.generated.schema.metadataIngestion.workflow import ( | |||||||
| from metadata.generated.schema.tests.testCase import TestCase | from metadata.generated.schema.tests.testCase import TestCase | ||||||
| from metadata.generated.schema.tests.testDefinition import TestDefinition | from metadata.generated.schema.tests.testDefinition import TestDefinition | ||||||
| from metadata.generated.schema.tests.testSuite import TestSuite | from metadata.generated.schema.tests.testSuite import TestSuite | ||||||
| from metadata.generated.schema.type.basic import EntityLink |  | ||||||
| from metadata.ingestion.api.parser import parse_workflow_config_gracefully | from metadata.ingestion.api.parser import parse_workflow_config_gracefully | ||||||
| from metadata.ingestion.api.processor import ProcessorStatus | from metadata.ingestion.api.processor import ProcessorStatus | ||||||
| from metadata.ingestion.ometa.ometa_api import OpenMetadata | from metadata.ingestion.ometa.ometa_api import OpenMetadata | ||||||
| from metadata.interfaces.sqa_interface import SQAInterface | from metadata.interfaces.sqa_interface import SQAInterface | ||||||
|  | from metadata.orm_profiler.api.models import TablePartitionConfig | ||||||
| from metadata.test_suite.api.models import TestCaseDefinition, TestSuiteProcessorConfig | from metadata.test_suite.api.models import TestCaseDefinition, TestSuiteProcessorConfig | ||||||
| from metadata.test_suite.runner.core import DataTestsRunner | from metadata.test_suite.runner.core import DataTestsRunner | ||||||
| from metadata.utils import entity_link | from metadata.utils import entity_link | ||||||
| @ -178,6 +178,34 @@ class TestSuiteWorkflow: | |||||||
| 
 | 
 | ||||||
|         return None |         return None | ||||||
| 
 | 
 | ||||||
|  |     def _get_partition_details(self, entity: Table) -> Optional[TablePartitionConfig]: | ||||||
|  |         """Get partition details | ||||||
|  | 
 | ||||||
|  |         Args: | ||||||
|  |             entity: table entity | ||||||
|  |         """ | ||||||
|  |         if entity.tablePartition: | ||||||
|  |             if entity.tablePartition.intervalType in { | ||||||
|  |                 IntervalType.TIME_UNIT, | ||||||
|  |                 IntervalType.INGESTION_TIME, | ||||||
|  |             }: | ||||||
|  |                 try: | ||||||
|  |                     partition_field = entity.tablePartition.columns[0] | ||||||
|  |                 except Exception: | ||||||
|  |                     raise TypeError( | ||||||
|  |                         "Unsupported ingestion based partition type. Skipping table" | ||||||
|  |                     ) | ||||||
|  | 
 | ||||||
|  |                 return TablePartitionConfig( | ||||||
|  |                     partitionField=partition_field, | ||||||
|  |                 ) | ||||||
|  | 
 | ||||||
|  |             raise TypeError( | ||||||
|  |                 f"Unsupported partition type {entity.tablePartition.intervalType}. Skipping table" | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|  |         return None | ||||||
|  | 
 | ||||||
|     def _create_sqa_tests_runner_interface(self, table_fqn: str): |     def _create_sqa_tests_runner_interface(self, table_fqn: str): | ||||||
|         """create the interface to execute test against SQA sources""" |         """create the interface to execute test against SQA sources""" | ||||||
|         table_entity = self._get_table_entity_from_test_case(table_fqn) |         table_entity = self._get_table_entity_from_test_case(table_fqn) | ||||||
| @ -193,6 +221,9 @@ class TestSuiteWorkflow: | |||||||
|             profile_query=self._get_profile_query(table_entity) |             profile_query=self._get_profile_query(table_entity) | ||||||
|             if not self._get_profile_sample(table_entity) |             if not self._get_profile_sample(table_entity) | ||||||
|             else None, |             else None, | ||||||
|  |             partition_config=self._get_partition_details(table_entity) | ||||||
|  |             if not self._get_profile_query(table_entity) | ||||||
|  |             else None, | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     def _create_data_tests_runner(self, sqa_interface): |     def _create_data_tests_runner(self, sqa_interface): | ||||||
| @ -344,21 +375,29 @@ class TestSuiteWorkflow: | |||||||
|         unique_table_fqns = self._get_unique_table_entities(test_cases) |         unique_table_fqns = self._get_unique_table_entities(test_cases) | ||||||
| 
 | 
 | ||||||
|         for table_fqn in unique_table_fqns: |         for table_fqn in unique_table_fqns: | ||||||
|             sqa_interface = self._create_sqa_tests_runner_interface(table_fqn) |             try: | ||||||
|             for test_case in test_cases: |                 sqa_interface = self._create_sqa_tests_runner_interface(table_fqn) | ||||||
|                 try: |                 for test_case in test_cases: | ||||||
|                     data_test_runner = self._create_data_tests_runner(sqa_interface) |                     try: | ||||||
|                     test_result = data_test_runner.run_and_handle(test_case) |                         data_test_runner = self._create_data_tests_runner(sqa_interface) | ||||||
|                     if not test_result: |                         test_result = data_test_runner.run_and_handle(test_case) | ||||||
|                         continue |                         if not test_result: | ||||||
|                     if hasattr(self, "sink"): |                             continue | ||||||
|                         self.sink.write_record(test_result) |                         if hasattr(self, "sink"): | ||||||
|                     logger.info(f"Successfuly ran test case {test_case.name.__root__}") |                             self.sink.write_record(test_result) | ||||||
|                     self.status.processed(test_case.fullyQualifiedName.__root__) |                         logger.info( | ||||||
|                 except Exception as exc: |                             f"Successfuly ran test case {test_case.name.__root__}" | ||||||
|                     logger.debug(traceback.format_exc(exc)) |                         ) | ||||||
|                     logger.warning(f"Could not run test case {test_case.name}: {exc}") |                         self.status.processed(test_case.fullyQualifiedName.__root__) | ||||||
|                     self.status.failure(test_case.fullyQualifiedName.__root__) |                     except Exception as exc: | ||||||
|  |                         logger.debug(traceback.format_exc(exc)) | ||||||
|  |                         logger.warning( | ||||||
|  |                             f"Could not run test case {test_case.name}: {exc}" | ||||||
|  |                         ) | ||||||
|  |             except TypeError as exc: | ||||||
|  |                 logger.debug(traceback.format_exc(exc)) | ||||||
|  |                 logger.warning(f"Could not run test case {test_case.name}: {exc}") | ||||||
|  |                 self.status.failure(test_case.fullyQualifiedName.__root__) | ||||||
| 
 | 
 | ||||||
|     def print_status(self) -> int: |     def print_status(self) -> int: | ||||||
|         """ |         """ | ||||||
|  | |||||||
| @ -67,7 +67,7 @@ def table_column_count_to_be_between( | |||||||
|         ( |         ( | ||||||
|             int(param_value.value) |             int(param_value.value) | ||||||
|             for param_value in test_case.parameterValues |             for param_value in test_case.parameterValues | ||||||
|             if param_value.name == "minColvalue" |             if param_value.name == "minColValue" | ||||||
|         ), |         ), | ||||||
|         None, |         None, | ||||||
|     ) |     ) | ||||||
| @ -75,7 +75,7 @@ def table_column_count_to_be_between( | |||||||
|     max_ = next( |     max_ = next( | ||||||
|         int(param_value.value) |         int(param_value.value) | ||||||
|         for param_value in test_case.parameterValues |         for param_value in test_case.parameterValues | ||||||
|         if param_value.name == "maxColvalue" |         if param_value.name == "maxColValue" | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
|     status = ( |     status = ( | ||||||
|  | |||||||
| @ -65,8 +65,8 @@ test_suite_config = { | |||||||
|                             "testDefinitionName": "TableColumnCountToBeBetween", |                             "testDefinitionName": "TableColumnCountToBeBetween", | ||||||
|                             "entityLink": "<#E::table::test_suite_service_test.test_suite_database.test_suite_database_schema.users>", |                             "entityLink": "<#E::table::test_suite_service_test.test_suite_database.test_suite_database_schema.users>", | ||||||
|                             "parameterValues": [ |                             "parameterValues": [ | ||||||
|                                 {"name": "minColvalue", "value": 1}, |                                 {"name": "minColValue", "value": 1}, | ||||||
|                                 {"name": "maxColvalue", "value": 5}, |                                 {"name": "maxColValue", "value": 5}, | ||||||
|                             ], |                             ], | ||||||
|                         }, |                         }, | ||||||
|                         { |                         { | ||||||
|  | |||||||
| @ -76,8 +76,8 @@ class TestSuiteWorkflowTests(unittest.TestCase): | |||||||
|                                     "testDefinitionName": "TableColumnCountToBeBetween", |                                     "testDefinitionName": "TableColumnCountToBeBetween", | ||||||
|                                     "entityLink": "<#E::table::my.fully.qualified.name>", |                                     "entityLink": "<#E::table::my.fully.qualified.name>", | ||||||
|                                     "parameterValues": [ |                                     "parameterValues": [ | ||||||
|                                         {"name": "minColvalue", "value": 1}, |                                         {"name": "minColValue", "value": 1}, | ||||||
|                                         {"name": "maxColvalue", "value": 5}, |                                         {"name": "maxColValue", "value": 5}, | ||||||
|                                     ], |                                     ], | ||||||
|                                 } |                                 } | ||||||
|                             ], |                             ], | ||||||
| @ -150,8 +150,8 @@ class TestSuiteWorkflowTests(unittest.TestCase): | |||||||
|                                     "testDefinitionName": "TableColumnCountToBeBetween", |                                     "testDefinitionName": "TableColumnCountToBeBetween", | ||||||
|                                     "entityLink": "<#E::table::my.fully.qualified.name>", |                                     "entityLink": "<#E::table::my.fully.qualified.name>", | ||||||
|                                     "parameterValues": [ |                                     "parameterValues": [ | ||||||
|                                         {"name": "minColvalue", "value": 1}, |                                         {"name": "minColValue", "value": 1}, | ||||||
|                                         {"name": "maxColvalue", "value": 5}, |                                         {"name": "maxColValue", "value": 5}, | ||||||
|                                     ], |                                     ], | ||||||
|                                 }, |                                 }, | ||||||
|                                 { |                                 { | ||||||
| @ -159,8 +159,8 @@ class TestSuiteWorkflowTests(unittest.TestCase): | |||||||
|                                     "testDefinitionName": "TableColumnCountToBeBetween", |                                     "testDefinitionName": "TableColumnCountToBeBetween", | ||||||
|                                     "entityLink": "<#E::table::my.fully.qualified.name>", |                                     "entityLink": "<#E::table::my.fully.qualified.name>", | ||||||
|                                     "parameterValues": [ |                                     "parameterValues": [ | ||||||
|                                         {"name": "minColvalue", "value": 1}, |                                         {"name": "minColValue", "value": 1}, | ||||||
|                                         {"name": "maxColvalue", "value": 5}, |                                         {"name": "maxColValue", "value": 5}, | ||||||
|                                     ], |                                     ], | ||||||
|                                 }, |                                 }, | ||||||
|                             ], |                             ], | ||||||
| @ -173,8 +173,8 @@ class TestSuiteWorkflowTests(unittest.TestCase): | |||||||
|                                     "testDefinitionName": "TableColumnCountToBeBetween", |                                     "testDefinitionName": "TableColumnCountToBeBetween", | ||||||
|                                     "entityLink": "<#E::table::my.fully.qualified.name>", |                                     "entityLink": "<#E::table::my.fully.qualified.name>", | ||||||
|                                     "parameterValues": [ |                                     "parameterValues": [ | ||||||
|                                         {"name": "minColvalue", "value": 1}, |                                         {"name": "minColValue", "value": 1}, | ||||||
|                                         {"name": "maxColvalue", "value": 5}, |                                         {"name": "maxColValue", "value": 5}, | ||||||
|                                     ], |                                     ], | ||||||
|                                 }, |                                 }, | ||||||
|                             ], |                             ], | ||||||
| @ -216,8 +216,8 @@ class TestSuiteWorkflowTests(unittest.TestCase): | |||||||
|                                     "testDefinitionName": "TableColumnCountToBeBetween", |                                     "testDefinitionName": "TableColumnCountToBeBetween", | ||||||
|                                     "entityLink": "<#E::table::sample_data.ecommerce_db.shopify.dim_address>", |                                     "entityLink": "<#E::table::sample_data.ecommerce_db.shopify.dim_address>", | ||||||
|                                     "parameterValues": [ |                                     "parameterValues": [ | ||||||
|                                         {"name": "minColvalue", "value": 1}, |                                         {"name": "minColValue", "value": 1}, | ||||||
|                                         {"name": "maxColvalue", "value": 5}, |                                         {"name": "maxColValue", "value": 5}, | ||||||
|                                     ], |                                     ], | ||||||
|                                 }, |                                 }, | ||||||
|                                 { |                                 { | ||||||
| @ -290,7 +290,7 @@ class TestSuiteWorkflowTests(unittest.TestCase): | |||||||
|                                     "entityLink": "<#E::table::sample_data.ecommerce_db.shopify.dim_address>", |                                     "entityLink": "<#E::table::sample_data.ecommerce_db.shopify.dim_address>", | ||||||
|                                     "parameterValues": [ |                                     "parameterValues": [ | ||||||
|                                         {"name": "minColValue", "value": 1}, |                                         {"name": "minColValue", "value": 1}, | ||||||
|                                         {"name": "maxColvalue", "value": 10}, |                                         {"name": "maxColValue", "value": 10}, | ||||||
|                                     ], |                                     ], | ||||||
|                                 }, |                                 }, | ||||||
|                             ], |                             ], | ||||||
|  | |||||||
| @ -479,8 +479,8 @@ class testSuiteValidation(unittest.TestCase): | |||||||
|             testSuite=EntityReference(id=uuid4(), type="TestSuite"), |             testSuite=EntityReference(id=uuid4(), type="TestSuite"), | ||||||
|             testDefinition=EntityReference(id=uuid4(), type="TestDefinition"), |             testDefinition=EntityReference(id=uuid4(), type="TestDefinition"), | ||||||
|             parameterValues=[ |             parameterValues=[ | ||||||
|                 TestCaseParameterValue(name="minColvalue", value="2"), |                 TestCaseParameterValue(name="minColValue", value="2"), | ||||||
|                 TestCaseParameterValue(name="maxColvalue", value="10"), |                 TestCaseParameterValue(name="maxColValue", value="10"), | ||||||
|             ], |             ], | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Teddy
						Teddy