Pere Miquel Brull 63533eb388
Fix for connectors based on refactoring of schemas V2 (#3870)
Co-authored-by: Ayush Shah <ayush@getcollate.io>
2022-04-05 18:33:25 -07:00

155 lines
4.8 KiB
Python

# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Workflow related configurations and utilities
"""
import importlib
import logging
from typing import Type, TypeVar
from metadata.generated.schema.metadataIngestion.workflow import (
OpenMetadataServerConfig,
)
from metadata.generated.schema.metadataIngestion.workflow import (
Source as WorkflowSource,
)
from metadata.ingestion.api.common import DynamicTypedConfig
from metadata.ingestion.api.processor import Processor
from metadata.ingestion.api.sink import Sink
from metadata.ingestion.api.source import Source
logger = logging.getLogger("Config")
T = TypeVar("T")
def fetch_type_class(_type: str, is_file: bool):
"""
Helper function to build the path for
dynamic imports
"""
if is_file:
return _type.replace("-", "_")
else:
return "".join([i.title() for i in _type.replace("-", "_").split("_")])
def get_class(key: str) -> Type[T]:
"""
Given an import key, import the class and return it
"""
if key.find(".") >= 0:
# If the key contains a dot, we treat it as an import path and attempt
# to load it dynamically.
module_name, class_name = key.rsplit(".", 1)
my_class = getattr(importlib.import_module(module_name), class_name)
return my_class
def get_ingestion_source(
source_type: str,
source_config: WorkflowSource,
metadata_config: OpenMetadataServerConfig,
) -> Source:
"""
Import the required source class and configure it.
:param source_type: Type specified in the config, e.g., redshift
:param source_config: Specific source configurations, such as the host
:param metadata_config: Metadata server configurations
"""
source_class = get_class(
"metadata.ingestion.source.{}.{}Source".format(
fetch_type_class(source_type, is_file=True),
fetch_type_class(source_type, is_file=False),
)
)
source: Source = source_class.create(source_config.dict(), metadata_config)
logger.debug(f"Source type:{source_type},{source_class} configured")
source.prepare()
logger.debug(f"Source type:{source_type},{source_class} prepared")
return source
def get_sink(
sink_type: str,
sink_config: DynamicTypedConfig,
metadata_config: OpenMetadataServerConfig,
_from: str = "ingestion",
) -> Sink:
"""
Helps us to fetch and importing the sink class.
By default, we will pick it up from `ingestion`.
:param sink_type: Type specified in the config, e.g., metadata-rest
:param sink_config: Specific sink configurations, such as the host
:param metadata_config: Metadata server configurations
:param _from: From where do we load the sink class. Ingestion by default.
"""
sink_class = get_class(
"metadata.{}.sink.{}.{}Sink".format(
_from,
fetch_type_class(sink_type, is_file=True),
fetch_type_class(sink_type, is_file=False),
)
)
sink: Sink = sink_class.create(
sink_config.dict().get("config", {}), metadata_config
)
logger.debug(f"Sink type: {sink_type}, {sink_class} configured")
return sink
def get_processor(
processor_type: str,
processor_config: DynamicTypedConfig,
metadata_config: OpenMetadataServerConfig,
_from: str = "ingestion",
**kwargs,
) -> Processor:
"""
Helps us to fetch and import the Processor class.
By default, we will pick it up from `ingestion`
We allow to pass any other specific object we may require.
E.g., for the ORM Profiler we need a Session to reach
the source tables.
:param processor_type: Type specified in the config, e.g., metadata-rest
:param processor_config: Specific Processor configurations, such as the profiler and tests
:param metadata_config: Metadata server configurations
:param _from: From where do we load the sink class. Ingestion by default.
"""
processor_class = get_class(
"metadata.{}.processor.{}.{}Processor".format(
_from,
fetch_type_class(processor_type, is_file=True),
fetch_type_class(processor_type, is_file=False),
)
)
processor: Processor = processor_class.create(
processor_config.dict().get("config", {}), metadata_config, **kwargs
)
logger.debug(f"Sink type: {processor_type}, {processor_class} configured")
return processor