mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2026-01-06 04:26:57 +00:00
* Fix #8570: Parsing Workflow Gracefully on Deploy * use entity_class * py format
This commit is contained in:
parent
7c395a82d4
commit
1d93d0e650
@ -216,7 +216,15 @@ def _parse_validation_err(validation_error: ValidationError) -> str:
|
||||
if err.get("type") == "value_error.missing"
|
||||
]
|
||||
|
||||
return "\t - " + "\n\t - ".join(missing_fields + extra_fields)
|
||||
invalid_fields = [
|
||||
f"Invalid parameter value for '{err.get('loc')[0]}'"
|
||||
if len(err.get("loc")) == 1
|
||||
else f"Invalid parameter value for {err.get('loc')}"
|
||||
for err in validation_error.errors()
|
||||
if err.get("type") not in ("value_error.missing", "value_error.extra")
|
||||
]
|
||||
|
||||
return "\t - " + "\n\t - ".join(missing_fields + extra_fields + invalid_fields)
|
||||
|
||||
|
||||
def _unsafe_parse_config(config: dict, cls: T, message: str) -> None:
|
||||
|
||||
@ -14,6 +14,7 @@ class Loggers(Enum):
|
||||
API = "AirflowAPI"
|
||||
OPERATIONS = "AirflowOperations"
|
||||
WORKFLOW = "AirflowWorkflow"
|
||||
UTILS = "AirflowUtils"
|
||||
|
||||
|
||||
def build_logger(logger_name: str) -> logging.Logger:
|
||||
@ -44,3 +45,7 @@ def operations_logger():
|
||||
|
||||
def workflow_logger():
|
||||
return build_logger(Loggers.WORKFLOW.value)
|
||||
|
||||
|
||||
def utils_logger():
|
||||
return build_logger(Loggers.UTILS.value)
|
||||
|
||||
@ -0,0 +1,106 @@
|
||||
# Copyright 2021 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Module to parse source connecetion config, to handle validation error
|
||||
"""
|
||||
|
||||
from openmetadata_managed_apis.utils.logger import utils_logger
|
||||
from pydantic import ValidationError
|
||||
|
||||
from metadata.ingestion.api.parser import (
|
||||
HAS_INNER_CONNECTION,
|
||||
InvalidWorkflowException,
|
||||
_unsafe_parse_config,
|
||||
get_connection_class,
|
||||
get_service_type,
|
||||
)
|
||||
|
||||
logger = utils_logger()
|
||||
|
||||
|
||||
def parse_validation_err(validation_error: ValidationError) -> str:
|
||||
"""
|
||||
Convert the validation error into a message to log
|
||||
"""
|
||||
missing_fields = [
|
||||
f"Extra parameter '{err.get('loc')[0]}'"
|
||||
if len(err.get("loc")) == 1
|
||||
else f"Extra parameter in {err.get('loc')}"
|
||||
for err in validation_error.errors()
|
||||
if err.get("type") == "value_error.extra"
|
||||
]
|
||||
|
||||
extra_fields = [
|
||||
f"Missing parameter '{err.get('loc')[0]}'"
|
||||
if len(err.get("loc")) == 1
|
||||
else f"Missing parameter in {err.get('loc')}"
|
||||
for err in validation_error.errors()
|
||||
if err.get("type") == "value_error.missing"
|
||||
]
|
||||
|
||||
invalid_fields = [
|
||||
f"Invalid parameter value for '{err.get('loc')[0]}'"
|
||||
if len(err.get("loc")) == 1
|
||||
else f"Missing parameter in {err.get('loc')}"
|
||||
for err in validation_error.errors()
|
||||
if err.get("type") not in ("value_error.missing", "value_error.extra")
|
||||
]
|
||||
|
||||
return "\n".join(missing_fields + extra_fields + invalid_fields)
|
||||
|
||||
|
||||
def _parse_inner_connection(connection_dict: dict, source_type: str) -> None:
|
||||
"""
|
||||
Parse the inner connection of the flagged connectors
|
||||
|
||||
:param config_dict: JSON configuration
|
||||
:param source_type: source type name, e.g., Airflow.
|
||||
"""
|
||||
inner_source_type = connection_dict["connection"]["config"]["connection"]["type"]
|
||||
inner_service_type = get_service_type(inner_source_type)
|
||||
inner_connection_class = get_connection_class(inner_source_type, inner_service_type)
|
||||
_unsafe_parse_config(
|
||||
config=connection_dict["connection"]["config"]["connection"],
|
||||
cls=inner_connection_class,
|
||||
message=f"Error parsing the inner service connection for {source_type}",
|
||||
)
|
||||
|
||||
|
||||
def parse_service_connection(connection_dict: dict) -> None:
|
||||
"""
|
||||
Parse the service connection and raise any scoped
|
||||
errors during the validation process
|
||||
|
||||
:param config_dict: JSON configuration
|
||||
"""
|
||||
# Unsafe access to the keys. Allow a KeyError if the config is not well formatted
|
||||
source_type = connection_dict["connection"]["config"].get("type")
|
||||
if source_type is None:
|
||||
raise InvalidWorkflowException("Missing type in the serviceConnection config")
|
||||
|
||||
logger.debug(
|
||||
f"Error parsing the Workflow Configuration for {source_type} ingestion"
|
||||
)
|
||||
|
||||
service_type = get_service_type(source_type)
|
||||
connection_class = get_connection_class(source_type, service_type)
|
||||
|
||||
if source_type in HAS_INNER_CONNECTION:
|
||||
# We will first parse the inner `connection` configuration
|
||||
_parse_inner_connection(connection_dict, source_type)
|
||||
|
||||
# Parse the service connection dictionary with the scoped class
|
||||
_unsafe_parse_config(
|
||||
config=connection_dict["connection"]["config"],
|
||||
cls=connection_class,
|
||||
message="Error parsing the service connection",
|
||||
)
|
||||
@ -18,6 +18,8 @@ from typing import Callable, Optional, Union
|
||||
import airflow
|
||||
from airflow import DAG
|
||||
from openmetadata_managed_apis.api.utils import clean_dag_id
|
||||
from pydantic import ValidationError
|
||||
from requests.utils import quote
|
||||
|
||||
from metadata.data_insight.api.workflow import DataInsightWorkflow
|
||||
from metadata.generated.schema.entity.services.dashboardService import DashboardService
|
||||
@ -38,6 +40,11 @@ try:
|
||||
except ModuleNotFoundError:
|
||||
from airflow.operators.python_operator import PythonOperator
|
||||
|
||||
from openmetadata_managed_apis.utils.logger import workflow_logger
|
||||
from openmetadata_managed_apis.utils.parser import (
|
||||
parse_service_connection,
|
||||
parse_validation_err,
|
||||
)
|
||||
from openmetadata_managed_apis.workflows.ingestion.credentials_builder import (
|
||||
build_secrets_manager_credentials,
|
||||
)
|
||||
@ -54,7 +61,14 @@ from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
Source as WorkflowSource,
|
||||
)
|
||||
from metadata.generated.schema.metadataIngestion.workflow import WorkflowConfig
|
||||
from metadata.ingestion.api.parser import (
|
||||
InvalidWorkflowException,
|
||||
ParsingConfigurationError,
|
||||
)
|
||||
from metadata.ingestion.api.workflow import Workflow
|
||||
from metadata.ingestion.ometa.utils import model_str
|
||||
|
||||
logger = workflow_logger()
|
||||
|
||||
|
||||
class InvalidServiceException(Exception):
|
||||
@ -115,25 +129,57 @@ def build_source(ingestion_pipeline: IngestionPipeline) -> WorkflowSource:
|
||||
sourceConfig=ingestion_pipeline.sourceConfig,
|
||||
)
|
||||
|
||||
if service_type == "databaseService":
|
||||
service: DatabaseService = metadata.get_by_name(
|
||||
entity=DatabaseService, fqn=ingestion_pipeline.service.name
|
||||
)
|
||||
elif service_type == "pipelineService":
|
||||
service: PipelineService = metadata.get_by_name(
|
||||
entity=PipelineService, fqn=ingestion_pipeline.service.name
|
||||
)
|
||||
elif service_type == "dashboardService":
|
||||
service: DashboardService = metadata.get_by_name(
|
||||
entity=DashboardService, fqn=ingestion_pipeline.service.name
|
||||
)
|
||||
elif service_type == "messagingService":
|
||||
service: MessagingService = metadata.get_by_name(
|
||||
entity=MessagingService, fqn=ingestion_pipeline.service.name
|
||||
)
|
||||
elif service_type == "mlmodelService":
|
||||
service: MlModelService = metadata.get_by_name(
|
||||
entity=MlModelService, fqn=ingestion_pipeline.service.name
|
||||
entity_class = None
|
||||
try:
|
||||
if service_type == "databaseService":
|
||||
entity_class = DatabaseService
|
||||
service: DatabaseService = metadata.get_by_name(
|
||||
entity=entity_class, fqn=ingestion_pipeline.service.name
|
||||
)
|
||||
elif service_type == "pipelineService":
|
||||
entity_class = PipelineService
|
||||
service: PipelineService = metadata.get_by_name(
|
||||
entity=entity_class, fqn=ingestion_pipeline.service.name
|
||||
)
|
||||
elif service_type == "dashboardService":
|
||||
entity_class = DashboardService
|
||||
service: DashboardService = metadata.get_by_name(
|
||||
entity=entity_class, fqn=ingestion_pipeline.service.name
|
||||
)
|
||||
elif service_type == "messagingService":
|
||||
entity_class = MessagingService
|
||||
service: MessagingService = metadata.get_by_name(
|
||||
entity=entity_class, fqn=ingestion_pipeline.service.name
|
||||
)
|
||||
elif service_type == "mlmodelService":
|
||||
entity_class = MlModelService
|
||||
service: MlModelService = metadata.get_by_name(
|
||||
entity=entity_class, fqn=ingestion_pipeline.service.name
|
||||
)
|
||||
else:
|
||||
raise InvalidServiceException(f"Invalid Service Type: {service_type}")
|
||||
except ValidationError as original_error:
|
||||
try:
|
||||
resp = metadata.client.get(
|
||||
f"{metadata.get_suffix(entity_class)}/name/{quote(model_str(ingestion_pipeline.service.name), safe='')}"
|
||||
)
|
||||
parse_service_connection(resp)
|
||||
except (ValidationError, InvalidWorkflowException) as scoped_error:
|
||||
if isinstance(scoped_error, ValidationError):
|
||||
# Let's catch validations of internal Workflow models, not the Workflow itself
|
||||
object_error = (
|
||||
scoped_error.model.__name__
|
||||
if scoped_error.model is not None
|
||||
else "workflow"
|
||||
)
|
||||
raise ParsingConfigurationError(
|
||||
f"We encountered an error parsing the configuration of your {object_error}.\n"
|
||||
f"{parse_validation_err(scoped_error)}"
|
||||
)
|
||||
raise scoped_error
|
||||
raise ParsingConfigurationError(
|
||||
f"We encountered an error parsing the configuration of your workflow.\n"
|
||||
f"{parse_validation_err(original_error)}"
|
||||
)
|
||||
|
||||
if not service:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user