mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-14 19:41:50 +00:00
parent
d70cf2ea7a
commit
899cd7e1fe
@ -14,16 +14,11 @@ Test Suite Workflow Source
|
|||||||
|
|
||||||
The main goal is to get the configured table from the API.
|
The main goal is to get the configured table from the API.
|
||||||
"""
|
"""
|
||||||
import traceback
|
|
||||||
from typing import Iterable, List, Optional, cast
|
from typing import Iterable, List, Optional, cast
|
||||||
|
|
||||||
from metadata.data_quality.api.models import TableAndTests
|
from metadata.data_quality.api.models import TableAndTests
|
||||||
from metadata.generated.schema.api.tests.createTestSuite import CreateTestSuiteRequest
|
from metadata.generated.schema.api.tests.createTestSuite import CreateTestSuiteRequest
|
||||||
from metadata.generated.schema.entity.data.table import Table
|
from metadata.generated.schema.entity.data.table import Table
|
||||||
from metadata.generated.schema.entity.services.connections.serviceConnection import (
|
|
||||||
ServiceConnection,
|
|
||||||
)
|
|
||||||
from metadata.generated.schema.entity.services.databaseService import DatabaseService
|
|
||||||
from metadata.generated.schema.metadataIngestion.testSuitePipeline import (
|
from metadata.generated.schema.metadataIngestion.testSuitePipeline import (
|
||||||
TestSuitePipeline,
|
TestSuitePipeline,
|
||||||
)
|
)
|
||||||
@ -38,7 +33,6 @@ from metadata.ingestion.api.step import Step
|
|||||||
from metadata.ingestion.api.steps import Source
|
from metadata.ingestion.api.steps import Source
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
from metadata.utils import fqn
|
from metadata.utils import fqn
|
||||||
from metadata.utils.fqn import split
|
|
||||||
from metadata.utils.logger import test_suite_logger
|
from metadata.utils.logger import test_suite_logger
|
||||||
|
|
||||||
logger = test_suite_logger()
|
logger = test_suite_logger()
|
||||||
@ -61,56 +55,8 @@ class TestSuiteSource(Source):
|
|||||||
|
|
||||||
self.source_config: TestSuitePipeline = self.config.source.sourceConfig.config
|
self.source_config: TestSuitePipeline = self.config.source.sourceConfig.config
|
||||||
|
|
||||||
self.service: DatabaseService = self._retrieve_service()
|
|
||||||
self._retrieve_service_connection()
|
|
||||||
|
|
||||||
self.test_connection()
|
self.test_connection()
|
||||||
|
|
||||||
def _retrieve_service(self) -> DatabaseService:
|
|
||||||
"""Get service object from source config `entityFullyQualifiedName`"""
|
|
||||||
fully_qualified_name = self.source_config.entityFullyQualifiedName.__root__
|
|
||||||
try:
|
|
||||||
service_name = split(fully_qualified_name)[0]
|
|
||||||
except IndexError as exc:
|
|
||||||
logger.debug(traceback.format_exc())
|
|
||||||
raise IndexError(
|
|
||||||
f"Could not retrieve service name from entity fully qualified name {fully_qualified_name}: {exc}"
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
service = self.metadata.get_by_name(DatabaseService, service_name)
|
|
||||||
if not service:
|
|
||||||
raise ConnectionError(
|
|
||||||
f"Could not retrieve service with name `{service_name}`. "
|
|
||||||
"Typically caused by the `entityFullyQualifiedName` does not exists in OpenMetadata "
|
|
||||||
"or the JWT Token is invalid."
|
|
||||||
)
|
|
||||||
|
|
||||||
return service
|
|
||||||
|
|
||||||
except ConnectionError as exc:
|
|
||||||
raise exc
|
|
||||||
except Exception as exc:
|
|
||||||
logger.debug(traceback.format_exc())
|
|
||||||
logger.error(
|
|
||||||
f"Error getting service connection for service name [{service_name}]"
|
|
||||||
f" using the secrets manager provider [{self.metadata.config.secretsManagerProvider}]: {exc}"
|
|
||||||
)
|
|
||||||
raise exc
|
|
||||||
|
|
||||||
def _retrieve_service_connection(self) -> None:
|
|
||||||
"""
|
|
||||||
We override the current `serviceConnection` source config object if source workflow service already exists
|
|
||||||
in OM. When it is configured, we retrieve the service connection from the secrets' manager. Otherwise, we get it
|
|
||||||
from the service object itself through the default `SecretsManager`.
|
|
||||||
"""
|
|
||||||
if (
|
|
||||||
not self.config.source.serviceConnection
|
|
||||||
and not self.metadata.config.forceEntityOverwriting
|
|
||||||
):
|
|
||||||
self.config.source.serviceConnection = ServiceConnection(
|
|
||||||
__root__=self.service.connection
|
|
||||||
)
|
|
||||||
|
|
||||||
def _get_table_entity(self) -> Optional[Table]:
|
def _get_table_entity(self) -> Optional[Table]:
|
||||||
"""given an entity fqn return the table entity
|
"""given an entity fqn return the table entity
|
||||||
|
|
||||||
@ -166,9 +112,7 @@ class TestSuiteSource(Source):
|
|||||||
"""
|
"""
|
||||||
Check that the table has the proper test suite built in
|
Check that the table has the proper test suite built in
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# If there is no executable test suite yet for the table, we'll need to create one
|
# If there is no executable test suite yet for the table, we'll need to create one
|
||||||
executable_test_suite = None
|
|
||||||
if not table.testSuite:
|
if not table.testSuite:
|
||||||
executable_test_suite = CreateTestSuiteRequest(
|
executable_test_suite = CreateTestSuiteRequest(
|
||||||
name=fqn.build(
|
name=fqn.build(
|
||||||
@ -184,7 +128,7 @@ class TestSuiteSource(Source):
|
|||||||
yield Either(
|
yield Either(
|
||||||
right=TableAndTests(
|
right=TableAndTests(
|
||||||
executable_test_suite=executable_test_suite,
|
executable_test_suite=executable_test_suite,
|
||||||
service_type=self.service.serviceType.value,
|
service_type=self.config.source.serviceConnection.__root__.config.type.value,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -205,7 +149,7 @@ class TestSuiteSource(Source):
|
|||||||
right=TableAndTests(
|
right=TableAndTests(
|
||||||
table=table,
|
table=table,
|
||||||
test_cases=test_suite_cases,
|
test_cases=test_suite_cases,
|
||||||
service_type=self.service.serviceType.value,
|
service_type=self.config.source.serviceConnection.__root__.config.type.value,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -284,33 +284,56 @@ class BaseWorkflow(ABC, WorkflowStatusMixin):
|
|||||||
workflow has prepared the necessary components, and we will update the SUCCESS/FAILED
|
workflow has prepared the necessary components, and we will update the SUCCESS/FAILED
|
||||||
status at the end of the flow.
|
status at the end of the flow.
|
||||||
"""
|
"""
|
||||||
maybe_pipeline: Optional[IngestionPipeline] = self.metadata.get_by_name(
|
try:
|
||||||
entity=IngestionPipeline, fqn=self.config.ingestionPipelineFQN
|
maybe_pipeline: Optional[IngestionPipeline] = self.metadata.get_by_name(
|
||||||
)
|
entity=IngestionPipeline, fqn=self.config.ingestionPipelineFQN
|
||||||
|
)
|
||||||
|
|
||||||
_, pipeline_name = fqn.split(
|
if maybe_pipeline:
|
||||||
self.config.ingestionPipelineFQN
|
return maybe_pipeline
|
||||||
) # Get the name from <service>.<name>
|
|
||||||
service = self.metadata.get_by_name(
|
# Get the name from <service>.<name> or, for test suites, <tableFQN>.testSuite
|
||||||
|
*_, pipeline_name = fqn.split(self.config.ingestionPipelineFQN)
|
||||||
|
|
||||||
|
service = self._get_ingestion_pipeline_service()
|
||||||
|
|
||||||
|
if service is not None:
|
||||||
|
|
||||||
|
return self.metadata.create_or_update(
|
||||||
|
CreateIngestionPipelineRequest(
|
||||||
|
name=pipeline_name,
|
||||||
|
service=EntityReference(
|
||||||
|
id=service.id,
|
||||||
|
type=get_reference_type_from_service_type(
|
||||||
|
self.service_type
|
||||||
|
),
|
||||||
|
),
|
||||||
|
pipelineType=get_pipeline_type_from_source_config(
|
||||||
|
self.config.source.sourceConfig.config
|
||||||
|
),
|
||||||
|
sourceConfig=self.config.source.sourceConfig,
|
||||||
|
airflowConfig=AirflowConfig(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return maybe_pipeline
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error(
|
||||||
|
f"Error trying to get or create the Ingestion Pipeline due to [{exc}]"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _get_ingestion_pipeline_service(self) -> Optional[T]:
|
||||||
|
"""
|
||||||
|
Ingestion Pipelines are linked to either an EntityService (DatabaseService, MessagingService,...)
|
||||||
|
or a Test Suite.
|
||||||
|
|
||||||
|
Depending on the Source Config Type, we'll need to GET one or the other to create
|
||||||
|
the Ingestion Pipeline
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self.metadata.get_by_name(
|
||||||
entity=get_service_class_from_service_type(self.service_type),
|
entity=get_service_class_from_service_type(self.service_type),
|
||||||
fqn=self.config.source.serviceName,
|
fqn=self.config.source.serviceName,
|
||||||
)
|
)
|
||||||
|
|
||||||
if maybe_pipeline is None and service is not None:
|
|
||||||
|
|
||||||
return self.metadata.create_or_update(
|
|
||||||
CreateIngestionPipelineRequest(
|
|
||||||
name=pipeline_name,
|
|
||||||
service=EntityReference(
|
|
||||||
id=service.id,
|
|
||||||
type=get_reference_type_from_service_type(self.service_type),
|
|
||||||
),
|
|
||||||
pipelineType=get_pipeline_type_from_source_config(
|
|
||||||
self.config.source.sourceConfig.config
|
|
||||||
),
|
|
||||||
sourceConfig=self.config.source.sourceConfig,
|
|
||||||
airflowConfig=AirflowConfig(),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return maybe_pipeline
|
|
||||||
|
@ -11,12 +11,21 @@
|
|||||||
"""
|
"""
|
||||||
Workflow definition for the Data Quality
|
Workflow definition for the Data Quality
|
||||||
"""
|
"""
|
||||||
|
import traceback
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from metadata.data_quality.processor.test_case_runner import TestCaseRunner
|
from metadata.data_quality.processor.test_case_runner import TestCaseRunner
|
||||||
from metadata.data_quality.source.test_suite import TestSuiteSource
|
from metadata.data_quality.source.test_suite import TestSuiteSource
|
||||||
|
from metadata.generated.schema.entity.services.connections.serviceConnection import (
|
||||||
|
ServiceConnection,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.entity.services.databaseService import DatabaseService
|
||||||
|
from metadata.generated.schema.tests.testSuite import ServiceType, TestSuite
|
||||||
from metadata.ingestion.api.steps import Processor, Sink
|
from metadata.ingestion.api.steps import Processor, Sink
|
||||||
|
from metadata.utils import fqn
|
||||||
from metadata.utils.importer import import_sink_class
|
from metadata.utils.importer import import_sink_class
|
||||||
from metadata.utils.logger import test_suite_logger
|
from metadata.utils.logger import test_suite_logger
|
||||||
from metadata.workflow.base import BaseWorkflow
|
from metadata.workflow.base import BaseWorkflow, T
|
||||||
|
|
||||||
logger = test_suite_logger()
|
logger = test_suite_logger()
|
||||||
|
|
||||||
@ -48,3 +57,59 @@ class TestSuiteWorkflow(BaseWorkflow):
|
|||||||
|
|
||||||
def _get_test_runner_processor(self) -> Processor:
|
def _get_test_runner_processor(self) -> Processor:
|
||||||
return TestCaseRunner.create(self.config.dict(), self.metadata)
|
return TestCaseRunner.create(self.config.dict(), self.metadata)
|
||||||
|
|
||||||
|
def _retrieve_service_connection_if_needed(self, service_type: ServiceType) -> None:
|
||||||
|
"""Get service object from source config `entityFullyQualifiedName`"""
|
||||||
|
if (
|
||||||
|
not self.config.source.serviceConnection
|
||||||
|
and not self.metadata.config.forceEntityOverwriting
|
||||||
|
):
|
||||||
|
fully_qualified_name = (
|
||||||
|
self.config.source.sourceConfig.config.entityFullyQualifiedName.__root__
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
service_name = fqn.split(fully_qualified_name)[0]
|
||||||
|
except IndexError as exc:
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
raise IndexError(
|
||||||
|
f"Could not retrieve service name from entity fully qualified name {fully_qualified_name}: {exc}"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
service: DatabaseService = self.metadata.get_by_name(
|
||||||
|
DatabaseService, service_name
|
||||||
|
)
|
||||||
|
if not service:
|
||||||
|
raise ConnectionError(
|
||||||
|
f"Could not retrieve service with name `{service_name}`. "
|
||||||
|
"Typically caused by the `entityFullyQualifiedName` does not exists in OpenMetadata "
|
||||||
|
"or the JWT Token is invalid."
|
||||||
|
)
|
||||||
|
|
||||||
|
self.config.source.serviceConnection = ServiceConnection(
|
||||||
|
__root__=service.connection
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
logger.error(
|
||||||
|
f"Error getting service connection for service name [{service_name}]"
|
||||||
|
f" using the secrets manager provider [{self.metadata.config.secretsManagerProvider}]: {exc}"
|
||||||
|
)
|
||||||
|
raise exc
|
||||||
|
|
||||||
|
def _get_ingestion_pipeline_service(self) -> Optional[T]:
|
||||||
|
"""
|
||||||
|
Ingestion Pipelines are linked to either an EntityService (DatabaseService, MessagingService,...)
|
||||||
|
or a Test Suite.
|
||||||
|
|
||||||
|
Depending on the Source Config Type, we'll need to GET one or the other to create
|
||||||
|
the Ingestion Pipeline
|
||||||
|
"""
|
||||||
|
return self.metadata.get_by_name(
|
||||||
|
entity=TestSuite,
|
||||||
|
fqn=fqn.build(
|
||||||
|
metadata=None,
|
||||||
|
entity_type=TestSuite,
|
||||||
|
table_fqn=self.config.source.sourceConfig.config.entityFullyQualifiedName,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
@ -26,6 +26,7 @@ from metadata.generated.schema.metadataIngestion.workflow import (
|
|||||||
from metadata.ingestion.api.step import Step
|
from metadata.ingestion.api.step import Step
|
||||||
from metadata.ingestion.api.steps import Source
|
from metadata.ingestion.api.steps import Source
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
|
from metadata.utils.helpers import datetime_to_ts
|
||||||
|
|
||||||
SUCCESS_THRESHOLD_VALUE = 90
|
SUCCESS_THRESHOLD_VALUE = 90
|
||||||
|
|
||||||
@ -97,8 +98,8 @@ class WorkflowStatusMixin:
|
|||||||
pipeline_status = PipelineStatus(
|
pipeline_status = PipelineStatus(
|
||||||
runId=self.run_id,
|
runId=self.run_id,
|
||||||
pipelineState=state,
|
pipelineState=state,
|
||||||
startDate=datetime.now().timestamp() * 1000,
|
startDate=datetime_to_ts(datetime.now()),
|
||||||
timestamp=datetime.now().timestamp() * 1000,
|
timestamp=datetime_to_ts(datetime.now()),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
pipeline_status = self.metadata.get_pipeline_status(
|
pipeline_status = self.metadata.get_pipeline_status(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user