mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-07-27 03:10:04 +00:00
Add partial support for BQ partitioned table (#7066)
* Added support for BQ time based partition (not ingestion) * Fixed minor errors in test suite workflow
This commit is contained in:
parent
1dfcb45e86
commit
a39c4db8e7
@ -248,11 +248,8 @@ class SQAInterface(InterfaceProtocol):
|
|||||||
sample=sample,
|
sample=sample,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
if column is not None:
|
||||||
column = column.name
|
column = column.name
|
||||||
except Exception as exc:
|
|
||||||
logger.debug(traceback.format_exc())
|
|
||||||
logger.warning(f"Unexpected exception computing metrics: {exc}")
|
|
||||||
|
|
||||||
return row, column
|
return row, column
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ class TablePartitionConfig(ConfigModel):
|
|||||||
"""table partition config"""
|
"""table partition config"""
|
||||||
|
|
||||||
partitionField: Optional[str] = None
|
partitionField: Optional[str] = None
|
||||||
partitionQueryDuration: Optional[int] = 1
|
partitionQueryDuration: Optional[int] = 30
|
||||||
partitionValues: Optional[List] = None
|
partitionValues: Optional[List] = None
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@ from metadata.config.workflow import get_sink
|
|||||||
from metadata.generated.schema.entity.data.database import Database
|
from metadata.generated.schema.entity.data.database import Database
|
||||||
from metadata.generated.schema.entity.data.table import (
|
from metadata.generated.schema.entity.data.table import (
|
||||||
ColumnProfilerConfig,
|
ColumnProfilerConfig,
|
||||||
|
IntervalType,
|
||||||
Table,
|
Table,
|
||||||
TableProfile,
|
TableProfile,
|
||||||
)
|
)
|
||||||
@ -210,6 +211,26 @@ class ProfilerWorkflow:
|
|||||||
if entity_config:
|
if entity_config:
|
||||||
return entity_config.partitionConfig
|
return entity_config.partitionConfig
|
||||||
|
|
||||||
|
if entity.tablePartition:
|
||||||
|
if entity.tablePartition.intervalType in {
|
||||||
|
IntervalType.TIME_UNIT,
|
||||||
|
IntervalType.INGESTION_TIME,
|
||||||
|
}:
|
||||||
|
try:
|
||||||
|
partition_field = entity.tablePartition.columns[0]
|
||||||
|
except Exception:
|
||||||
|
raise TypeError(
|
||||||
|
"Unsupported ingestion based partition type. Skipping table"
|
||||||
|
)
|
||||||
|
|
||||||
|
return TablePartitionConfig(
|
||||||
|
partitionField=partition_field,
|
||||||
|
)
|
||||||
|
|
||||||
|
raise TypeError(
|
||||||
|
f"Unsupported partition type {entity.tablePartition.intervalType}. Skipping table"
|
||||||
|
)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def create_profiler_interface(self, service_connection_config, table_entity: Table):
|
def create_profiler_interface(self, service_connection_config, table_entity: Table):
|
||||||
|
@ -164,11 +164,10 @@ class partition_filter_handler:
|
|||||||
def handle_and_execute(_self, *args, **kwargs):
|
def handle_and_execute(_self, *args, **kwargs):
|
||||||
"""Handle partitioned queries"""
|
"""Handle partitioned queries"""
|
||||||
if _self._partition_details:
|
if _self._partition_details:
|
||||||
|
partition_field = _self._partition_details["partition_field"]
|
||||||
partition_filter = build_partition_predicate(
|
partition_filter = build_partition_predicate(
|
||||||
_self._partition_details,
|
_self._partition_details,
|
||||||
_self.table.__table__.c.get(
|
_self.table.__table__.c.get(partition_field),
|
||||||
_self._partition_details["partition_field"]
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
if self.build_sample:
|
if self.build_sample:
|
||||||
return (
|
return (
|
||||||
|
@ -26,7 +26,7 @@ from metadata.config.common import WorkflowExecutionError
|
|||||||
from metadata.config.workflow import get_sink
|
from metadata.config.workflow import get_sink
|
||||||
from metadata.generated.schema.api.tests.createTestCase import CreateTestCaseRequest
|
from metadata.generated.schema.api.tests.createTestCase import CreateTestCaseRequest
|
||||||
from metadata.generated.schema.api.tests.createTestSuite import CreateTestSuiteRequest
|
from metadata.generated.schema.api.tests.createTestSuite import CreateTestSuiteRequest
|
||||||
from metadata.generated.schema.entity.data.table import Table
|
from metadata.generated.schema.entity.data.table import IntervalType, Table
|
||||||
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
||||||
OpenMetadataConnection,
|
OpenMetadataConnection,
|
||||||
)
|
)
|
||||||
@ -40,11 +40,11 @@ from metadata.generated.schema.metadataIngestion.workflow import (
|
|||||||
from metadata.generated.schema.tests.testCase import TestCase
|
from metadata.generated.schema.tests.testCase import TestCase
|
||||||
from metadata.generated.schema.tests.testDefinition import TestDefinition
|
from metadata.generated.schema.tests.testDefinition import TestDefinition
|
||||||
from metadata.generated.schema.tests.testSuite import TestSuite
|
from metadata.generated.schema.tests.testSuite import TestSuite
|
||||||
from metadata.generated.schema.type.basic import EntityLink
|
|
||||||
from metadata.ingestion.api.parser import parse_workflow_config_gracefully
|
from metadata.ingestion.api.parser import parse_workflow_config_gracefully
|
||||||
from metadata.ingestion.api.processor import ProcessorStatus
|
from metadata.ingestion.api.processor import ProcessorStatus
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
from metadata.interfaces.sqa_interface import SQAInterface
|
from metadata.interfaces.sqa_interface import SQAInterface
|
||||||
|
from metadata.orm_profiler.api.models import TablePartitionConfig
|
||||||
from metadata.test_suite.api.models import TestCaseDefinition, TestSuiteProcessorConfig
|
from metadata.test_suite.api.models import TestCaseDefinition, TestSuiteProcessorConfig
|
||||||
from metadata.test_suite.runner.core import DataTestsRunner
|
from metadata.test_suite.runner.core import DataTestsRunner
|
||||||
from metadata.utils import entity_link
|
from metadata.utils import entity_link
|
||||||
@ -178,6 +178,34 @@ class TestSuiteWorkflow:
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _get_partition_details(self, entity: Table) -> Optional[TablePartitionConfig]:
|
||||||
|
"""Get partition details
|
||||||
|
|
||||||
|
Args:
|
||||||
|
entity: table entity
|
||||||
|
"""
|
||||||
|
if entity.tablePartition:
|
||||||
|
if entity.tablePartition.intervalType in {
|
||||||
|
IntervalType.TIME_UNIT,
|
||||||
|
IntervalType.INGESTION_TIME,
|
||||||
|
}:
|
||||||
|
try:
|
||||||
|
partition_field = entity.tablePartition.columns[0]
|
||||||
|
except Exception:
|
||||||
|
raise TypeError(
|
||||||
|
"Unsupported ingestion based partition type. Skipping table"
|
||||||
|
)
|
||||||
|
|
||||||
|
return TablePartitionConfig(
|
||||||
|
partitionField=partition_field,
|
||||||
|
)
|
||||||
|
|
||||||
|
raise TypeError(
|
||||||
|
f"Unsupported partition type {entity.tablePartition.intervalType}. Skipping table"
|
||||||
|
)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
def _create_sqa_tests_runner_interface(self, table_fqn: str):
|
def _create_sqa_tests_runner_interface(self, table_fqn: str):
|
||||||
"""create the interface to execute test against SQA sources"""
|
"""create the interface to execute test against SQA sources"""
|
||||||
table_entity = self._get_table_entity_from_test_case(table_fqn)
|
table_entity = self._get_table_entity_from_test_case(table_fqn)
|
||||||
@ -193,6 +221,9 @@ class TestSuiteWorkflow:
|
|||||||
profile_query=self._get_profile_query(table_entity)
|
profile_query=self._get_profile_query(table_entity)
|
||||||
if not self._get_profile_sample(table_entity)
|
if not self._get_profile_sample(table_entity)
|
||||||
else None,
|
else None,
|
||||||
|
partition_config=self._get_partition_details(table_entity)
|
||||||
|
if not self._get_profile_query(table_entity)
|
||||||
|
else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _create_data_tests_runner(self, sqa_interface):
|
def _create_data_tests_runner(self, sqa_interface):
|
||||||
@ -344,6 +375,7 @@ class TestSuiteWorkflow:
|
|||||||
unique_table_fqns = self._get_unique_table_entities(test_cases)
|
unique_table_fqns = self._get_unique_table_entities(test_cases)
|
||||||
|
|
||||||
for table_fqn in unique_table_fqns:
|
for table_fqn in unique_table_fqns:
|
||||||
|
try:
|
||||||
sqa_interface = self._create_sqa_tests_runner_interface(table_fqn)
|
sqa_interface = self._create_sqa_tests_runner_interface(table_fqn)
|
||||||
for test_case in test_cases:
|
for test_case in test_cases:
|
||||||
try:
|
try:
|
||||||
@ -353,9 +385,16 @@ class TestSuiteWorkflow:
|
|||||||
continue
|
continue
|
||||||
if hasattr(self, "sink"):
|
if hasattr(self, "sink"):
|
||||||
self.sink.write_record(test_result)
|
self.sink.write_record(test_result)
|
||||||
logger.info(f"Successfuly ran test case {test_case.name.__root__}")
|
logger.info(
|
||||||
|
f"Successfuly ran test case {test_case.name.__root__}"
|
||||||
|
)
|
||||||
self.status.processed(test_case.fullyQualifiedName.__root__)
|
self.status.processed(test_case.fullyQualifiedName.__root__)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
logger.debug(traceback.format_exc(exc))
|
||||||
|
logger.warning(
|
||||||
|
f"Could not run test case {test_case.name}: {exc}"
|
||||||
|
)
|
||||||
|
except TypeError as exc:
|
||||||
logger.debug(traceback.format_exc(exc))
|
logger.debug(traceback.format_exc(exc))
|
||||||
logger.warning(f"Could not run test case {test_case.name}: {exc}")
|
logger.warning(f"Could not run test case {test_case.name}: {exc}")
|
||||||
self.status.failure(test_case.fullyQualifiedName.__root__)
|
self.status.failure(test_case.fullyQualifiedName.__root__)
|
||||||
|
@ -67,7 +67,7 @@ def table_column_count_to_be_between(
|
|||||||
(
|
(
|
||||||
int(param_value.value)
|
int(param_value.value)
|
||||||
for param_value in test_case.parameterValues
|
for param_value in test_case.parameterValues
|
||||||
if param_value.name == "minColvalue"
|
if param_value.name == "minColValue"
|
||||||
),
|
),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@ -75,7 +75,7 @@ def table_column_count_to_be_between(
|
|||||||
max_ = next(
|
max_ = next(
|
||||||
int(param_value.value)
|
int(param_value.value)
|
||||||
for param_value in test_case.parameterValues
|
for param_value in test_case.parameterValues
|
||||||
if param_value.name == "maxColvalue"
|
if param_value.name == "maxColValue"
|
||||||
)
|
)
|
||||||
|
|
||||||
status = (
|
status = (
|
||||||
|
@ -65,8 +65,8 @@ test_suite_config = {
|
|||||||
"testDefinitionName": "TableColumnCountToBeBetween",
|
"testDefinitionName": "TableColumnCountToBeBetween",
|
||||||
"entityLink": "<#E::table::test_suite_service_test.test_suite_database.test_suite_database_schema.users>",
|
"entityLink": "<#E::table::test_suite_service_test.test_suite_database.test_suite_database_schema.users>",
|
||||||
"parameterValues": [
|
"parameterValues": [
|
||||||
{"name": "minColvalue", "value": 1},
|
{"name": "minColValue", "value": 1},
|
||||||
{"name": "maxColvalue", "value": 5},
|
{"name": "maxColValue", "value": 5},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -76,8 +76,8 @@ class TestSuiteWorkflowTests(unittest.TestCase):
|
|||||||
"testDefinitionName": "TableColumnCountToBeBetween",
|
"testDefinitionName": "TableColumnCountToBeBetween",
|
||||||
"entityLink": "<#E::table::my.fully.qualified.name>",
|
"entityLink": "<#E::table::my.fully.qualified.name>",
|
||||||
"parameterValues": [
|
"parameterValues": [
|
||||||
{"name": "minColvalue", "value": 1},
|
{"name": "minColValue", "value": 1},
|
||||||
{"name": "maxColvalue", "value": 5},
|
{"name": "maxColValue", "value": 5},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -150,8 +150,8 @@ class TestSuiteWorkflowTests(unittest.TestCase):
|
|||||||
"testDefinitionName": "TableColumnCountToBeBetween",
|
"testDefinitionName": "TableColumnCountToBeBetween",
|
||||||
"entityLink": "<#E::table::my.fully.qualified.name>",
|
"entityLink": "<#E::table::my.fully.qualified.name>",
|
||||||
"parameterValues": [
|
"parameterValues": [
|
||||||
{"name": "minColvalue", "value": 1},
|
{"name": "minColValue", "value": 1},
|
||||||
{"name": "maxColvalue", "value": 5},
|
{"name": "maxColValue", "value": 5},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -159,8 +159,8 @@ class TestSuiteWorkflowTests(unittest.TestCase):
|
|||||||
"testDefinitionName": "TableColumnCountToBeBetween",
|
"testDefinitionName": "TableColumnCountToBeBetween",
|
||||||
"entityLink": "<#E::table::my.fully.qualified.name>",
|
"entityLink": "<#E::table::my.fully.qualified.name>",
|
||||||
"parameterValues": [
|
"parameterValues": [
|
||||||
{"name": "minColvalue", "value": 1},
|
{"name": "minColValue", "value": 1},
|
||||||
{"name": "maxColvalue", "value": 5},
|
{"name": "maxColValue", "value": 5},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
@ -173,8 +173,8 @@ class TestSuiteWorkflowTests(unittest.TestCase):
|
|||||||
"testDefinitionName": "TableColumnCountToBeBetween",
|
"testDefinitionName": "TableColumnCountToBeBetween",
|
||||||
"entityLink": "<#E::table::my.fully.qualified.name>",
|
"entityLink": "<#E::table::my.fully.qualified.name>",
|
||||||
"parameterValues": [
|
"parameterValues": [
|
||||||
{"name": "minColvalue", "value": 1},
|
{"name": "minColValue", "value": 1},
|
||||||
{"name": "maxColvalue", "value": 5},
|
{"name": "maxColValue", "value": 5},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
@ -216,8 +216,8 @@ class TestSuiteWorkflowTests(unittest.TestCase):
|
|||||||
"testDefinitionName": "TableColumnCountToBeBetween",
|
"testDefinitionName": "TableColumnCountToBeBetween",
|
||||||
"entityLink": "<#E::table::sample_data.ecommerce_db.shopify.dim_address>",
|
"entityLink": "<#E::table::sample_data.ecommerce_db.shopify.dim_address>",
|
||||||
"parameterValues": [
|
"parameterValues": [
|
||||||
{"name": "minColvalue", "value": 1},
|
{"name": "minColValue", "value": 1},
|
||||||
{"name": "maxColvalue", "value": 5},
|
{"name": "maxColValue", "value": 5},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -290,7 +290,7 @@ class TestSuiteWorkflowTests(unittest.TestCase):
|
|||||||
"entityLink": "<#E::table::sample_data.ecommerce_db.shopify.dim_address>",
|
"entityLink": "<#E::table::sample_data.ecommerce_db.shopify.dim_address>",
|
||||||
"parameterValues": [
|
"parameterValues": [
|
||||||
{"name": "minColValue", "value": 1},
|
{"name": "minColValue", "value": 1},
|
||||||
{"name": "maxColvalue", "value": 10},
|
{"name": "maxColValue", "value": 10},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
@ -479,8 +479,8 @@ class testSuiteValidation(unittest.TestCase):
|
|||||||
testSuite=EntityReference(id=uuid4(), type="TestSuite"),
|
testSuite=EntityReference(id=uuid4(), type="TestSuite"),
|
||||||
testDefinition=EntityReference(id=uuid4(), type="TestDefinition"),
|
testDefinition=EntityReference(id=uuid4(), type="TestDefinition"),
|
||||||
parameterValues=[
|
parameterValues=[
|
||||||
TestCaseParameterValue(name="minColvalue", value="2"),
|
TestCaseParameterValue(name="minColValue", value="2"),
|
||||||
TestCaseParameterValue(name="maxColvalue", value="10"),
|
TestCaseParameterValue(name="maxColValue", value="10"),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user