mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-13 04:02:34 +00:00
157 lines
5.4 KiB
Python
157 lines
5.4 KiB
Python
# Copyright 2021 Acryl Data, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import logging
|
|
import re
|
|
from typing import Optional
|
|
|
|
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
|
|
from datahub_actions.action.action import Action
|
|
from datahub_actions.action.action_registry import action_registry
|
|
from datahub_actions.api.action_graph import AcrylDataHubGraph
|
|
from datahub_actions.pipeline.pipeline_config import (
|
|
ActionConfig,
|
|
FilterConfig,
|
|
SourceConfig,
|
|
TransformConfig,
|
|
)
|
|
from datahub_actions.pipeline.pipeline_context import PipelineContext
|
|
from datahub_actions.plugin.transform.filter.filter_transformer import (
|
|
FilterTransformer,
|
|
FilterTransformerConfig,
|
|
)
|
|
from datahub_actions.source.event_source import EventSource
|
|
from datahub_actions.source.event_source_registry import event_source_registry
|
|
from datahub_actions.transform.transformer import Transformer
|
|
from datahub_actions.transform.transformer_registry import transformer_registry
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def create_action_context(
|
|
pipeline_name: str, datahub_config: Optional[DatahubClientConfig]
|
|
) -> PipelineContext:
|
|
return PipelineContext(
|
|
pipeline_name,
|
|
(
|
|
AcrylDataHubGraph(DataHubGraph(datahub_config))
|
|
if datahub_config is not None
|
|
else None
|
|
),
|
|
)
|
|
|
|
|
|
def create_event_source(
|
|
source_config: SourceConfig, ctx: PipelineContext
|
|
) -> EventSource:
|
|
event_source_type = source_config.type
|
|
event_source_class = event_source_registry.get(event_source_type)
|
|
event_source_instance = None
|
|
try:
|
|
logger.debug(
|
|
f"Attempting to instantiate new Event Source of type {source_config.type}.."
|
|
)
|
|
event_source_config = (
|
|
source_config.config if source_config.config is not None else {}
|
|
)
|
|
event_source_instance = event_source_class.create(event_source_config, ctx)
|
|
except Exception as e:
|
|
raise Exception(
|
|
f"Caught exception while attempting to instantiate Event Source of type {source_config.type}"
|
|
) from e
|
|
|
|
if event_source_instance is None:
|
|
raise Exception(
|
|
f"Failed to create Event Source with type {event_source_type}. Event Source create method returned 'None'."
|
|
)
|
|
|
|
return event_source_instance
|
|
|
|
|
|
def create_filter_transformer(
|
|
filter_config: FilterConfig, ctx: PipelineContext
|
|
) -> Transformer:
|
|
try:
|
|
logger.debug("Attempting to instantiate filter transformer..")
|
|
filter_transformer_config = FilterTransformerConfig(
|
|
event_type=filter_config.event_type, event=filter_config.event
|
|
)
|
|
return FilterTransformer(filter_transformer_config)
|
|
except Exception as e:
|
|
raise Exception(
|
|
"Caught exception while attempting to instantiate Filter transformer"
|
|
) from e
|
|
|
|
|
|
def create_transformer(
|
|
transform_config: TransformConfig, ctx: PipelineContext
|
|
) -> Transformer:
|
|
transformer_type = transform_config.type
|
|
transformer_class = transformer_registry.get(transformer_type)
|
|
transformer_instance = None
|
|
try:
|
|
logger.debug(
|
|
f"Attempting to instantiate new Transformer of type {transform_config.type}.."
|
|
)
|
|
transformer_config = (
|
|
transform_config.config if transform_config.config is not None else {}
|
|
)
|
|
transformer_instance = transformer_class.create(transformer_config, ctx)
|
|
except Exception as e:
|
|
raise Exception(
|
|
f"Caught exception while attempting to instantiate Transformer with type {transformer_type}"
|
|
) from e
|
|
|
|
if transformer_instance is None:
|
|
raise Exception(
|
|
f"Failed to create transformer with type {transformer_type}. Transformer create method returned 'None'."
|
|
)
|
|
|
|
return transformer_instance
|
|
|
|
|
|
def create_action(action_config: ActionConfig, ctx: PipelineContext) -> Action:
|
|
action_type = action_config.type
|
|
action_instance = None
|
|
try:
|
|
logger.debug(
|
|
f"Attempting to instantiate new Action of type {action_config.type}.."
|
|
)
|
|
action_class = action_registry.get(action_type)
|
|
action_config_dict = (
|
|
action_config.config if action_config.config is not None else {}
|
|
)
|
|
action_instance = action_class.create(action_config_dict, ctx)
|
|
except Exception as e:
|
|
raise Exception(
|
|
f"Caught exception while attempting to instantiate Action with type {action_type}. "
|
|
) from e
|
|
|
|
if action_instance is None:
|
|
raise Exception(
|
|
f"Failed to create action with type {action_type}. Action create method returned 'None'."
|
|
)
|
|
|
|
return action_instance
|
|
|
|
|
|
def normalize_directory_name(name: str) -> str:
|
|
# Lower case & remove whitespaces + periods.
|
|
return re.sub(r"[^\w\-_]", "_", name.lower())
|
|
|
|
|
|
def get_transformer_name(transformer: Transformer) -> str:
|
|
# TODO: Would be better to compute this using the transformer registry itself.
|
|
return type(transformer).__name__
|