2022-02-18 07:48:38 +01:00
|
|
|
# Copyright 2021 Collate
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
"""
|
|
|
|
Build and document all supported Engines
|
|
|
|
"""
|
|
|
|
import logging
|
2022-04-19 17:48:55 +02:00
|
|
|
from functools import singledispatch
|
2022-04-22 11:30:59 +05:30
|
|
|
from typing import Union
|
2022-02-18 07:48:38 +01:00
|
|
|
|
|
|
|
from sqlalchemy import create_engine
|
|
|
|
from sqlalchemy.engine.base import Engine
|
2022-04-12 17:06:49 +02:00
|
|
|
from sqlalchemy.exc import OperationalError
|
2022-02-18 07:48:38 +01:00
|
|
|
from sqlalchemy.orm import sessionmaker
|
|
|
|
from sqlalchemy.orm.session import Session
|
|
|
|
|
2022-04-12 14:26:33 +05:30
|
|
|
from metadata.generated.schema.entity.services.connections.connectionBasicType import (
|
|
|
|
ConnectionOptions,
|
|
|
|
)
|
2022-04-19 17:48:55 +02:00
|
|
|
from metadata.generated.schema.entity.services.connections.database.bigQueryConnection import (
|
|
|
|
BigQueryConnection,
|
|
|
|
)
|
2022-04-22 20:07:06 +05:30
|
|
|
from metadata.generated.schema.entity.services.connections.database.databricksConnection import (
|
|
|
|
DatabricksConnection,
|
|
|
|
)
|
2022-04-22 11:30:59 +05:30
|
|
|
from metadata.generated.schema.entity.services.connections.database.dynamoDBConnection import (
|
|
|
|
DynamoDBConnection,
|
|
|
|
)
|
|
|
|
from metadata.generated.schema.entity.services.connections.database.glueConnection import (
|
|
|
|
GlueConnection,
|
|
|
|
)
|
2022-04-22 20:07:06 +05:30
|
|
|
from metadata.generated.schema.entity.services.connections.database.snowflakeConnection import (
|
|
|
|
SnowflakeConnection,
|
2022-04-22 11:30:59 +05:30
|
|
|
)
|
2022-04-22 20:53:42 +05:30
|
|
|
from metadata.utils.connection_clients import DynamoClient, GlueClient
|
2022-04-19 17:48:55 +02:00
|
|
|
from metadata.utils.credentials import set_google_credentials
|
2022-04-07 20:50:37 +01:00
|
|
|
from metadata.utils.source_connections import get_connection_args, get_connection_url
|
2022-04-12 22:14:17 +02:00
|
|
|
from metadata.utils.timeout import timeout
|
2022-02-18 07:48:38 +01:00
|
|
|
|
2022-03-07 00:43:43 +01:00
|
|
|
logger = logging.getLogger("Utils")
|
2022-02-18 07:48:38 +01:00
|
|
|
|
|
|
|
|
2022-04-12 17:06:49 +02:00
|
|
|
class SourceConnectionException(Exception):
|
|
|
|
"""
|
|
|
|
Raised when we cannot connect to the source
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
2022-04-22 11:30:59 +05:30
|
|
|
def create_generic_connection(connection, verbose: bool = False):
|
2022-02-18 07:48:38 +01:00
|
|
|
"""
|
2022-04-19 17:48:55 +02:00
|
|
|
Generic Engine creation from connection object
|
|
|
|
:param connection: JSON Schema connection model
|
|
|
|
:param verbose: debugger or not
|
|
|
|
:return: SQAlchemy Engine
|
2022-02-18 07:48:38 +01:00
|
|
|
"""
|
2022-04-19 12:31:34 +02:00
|
|
|
options = connection.connectionOptions
|
2022-04-06 03:33:25 +02:00
|
|
|
if not options:
|
2022-04-12 14:26:33 +05:30
|
|
|
options = ConnectionOptions()
|
2022-04-12 17:06:49 +02:00
|
|
|
|
2022-02-18 07:48:38 +01:00
|
|
|
engine = create_engine(
|
2022-04-19 12:31:34 +02:00
|
|
|
get_connection_url(connection),
|
2022-04-12 14:26:33 +05:30
|
|
|
**options.dict(),
|
2022-04-19 12:31:34 +02:00
|
|
|
connect_args=get_connection_args(connection),
|
2022-02-18 07:48:38 +01:00
|
|
|
echo=verbose,
|
|
|
|
)
|
|
|
|
|
|
|
|
return engine
|
|
|
|
|
|
|
|
|
2022-04-19 17:48:55 +02:00
|
|
|
@singledispatch
|
2022-04-22 11:30:59 +05:30
|
|
|
def get_connection(
|
|
|
|
connection, verbose: bool = False
|
|
|
|
) -> Union[Engine, DynamoClient, GlueClient]:
|
2022-04-19 17:48:55 +02:00
|
|
|
"""
|
|
|
|
Given an SQL configuration, build the SQLAlchemy Engine
|
|
|
|
"""
|
2022-04-22 11:30:59 +05:30
|
|
|
return create_generic_connection(connection, verbose)
|
2022-04-19 17:48:55 +02:00
|
|
|
|
|
|
|
|
2022-04-22 20:07:06 +05:30
|
|
|
@get_connection.register
|
|
|
|
def _(connection: DatabricksConnection, verbose: bool = False):
|
|
|
|
args = connection.connectionArguments
|
|
|
|
if not args:
|
|
|
|
connection.connectionArguments = dict()
|
|
|
|
connection.connectionArguments["http_path"] = connection.httpPath
|
|
|
|
return create_generic_connection(connection, verbose)
|
|
|
|
|
|
|
|
|
|
|
|
@get_connection.register
|
|
|
|
def _(connection: SnowflakeConnection, verbose: bool = False):
|
|
|
|
if connection.privateKey:
|
|
|
|
import os
|
|
|
|
|
|
|
|
from cryptography.hazmat.backends import default_backend
|
|
|
|
from cryptography.hazmat.primitives import serialization
|
|
|
|
|
|
|
|
snowflake_private_key_passphrase = os.environ.get(
|
|
|
|
"SNOWFLAKE_PRIVATE_KEY_PASSPHRASE", ""
|
|
|
|
)
|
|
|
|
if not snowflake_private_key_passphrase:
|
|
|
|
logger.warning(
|
|
|
|
"Snowflake Private Key Passphrase not found, replacing it with empty string"
|
|
|
|
)
|
|
|
|
p_key = serialization.load_pem_private_key(
|
|
|
|
bytes(connection.privateKey, "utf-8"),
|
|
|
|
password=snowflake_private_key_passphrase.encode(),
|
|
|
|
backend=default_backend(),
|
|
|
|
)
|
|
|
|
pkb = p_key.private_bytes(
|
|
|
|
encoding=serialization.Encoding.DER,
|
|
|
|
format=serialization.PrivateFormat.PKCS8,
|
|
|
|
encryption_algorithm=serialization.NoEncryption(),
|
|
|
|
)
|
|
|
|
if not connection.connectionArguments:
|
|
|
|
connection.connectionArguments = dict()
|
|
|
|
connection.connectionArguments["private_key"] = pkb
|
|
|
|
return create_generic_connection(connection, verbose)
|
|
|
|
|
|
|
|
|
2022-04-22 11:30:59 +05:30
|
|
|
@get_connection.register
|
2022-04-19 17:48:55 +02:00
|
|
|
def _(connection: BigQueryConnection, verbose: bool = False):
|
|
|
|
"""
|
|
|
|
Prepare the engine and the GCS credentials
|
|
|
|
:param connection: BigQuery connection
|
|
|
|
:param verbose: debugger or not
|
|
|
|
:return: Engine
|
|
|
|
"""
|
|
|
|
set_google_credentials(gcs_credentials=connection.credentials)
|
2022-04-22 11:30:59 +05:30
|
|
|
return create_generic_connection(connection, verbose)
|
|
|
|
|
|
|
|
|
|
|
|
@get_connection.register
|
|
|
|
def _(connection: DynamoDBConnection, verbose: bool = False):
|
2022-04-22 20:53:42 +05:30
|
|
|
from metadata.utils.aws_client import AWSClient
|
|
|
|
|
2022-04-22 11:25:05 +02:00
|
|
|
dynomo_connection = AWSClient(connection.awsConfig).get_dynomo_client()
|
2022-04-22 11:30:59 +05:30
|
|
|
return dynomo_connection
|
|
|
|
|
|
|
|
|
|
|
|
@get_connection.register
|
|
|
|
def _(connection: GlueConnection, verbose: bool = False):
|
2022-04-22 20:53:42 +05:30
|
|
|
from metadata.utils.aws_client import AWSClient
|
|
|
|
|
2022-04-22 11:25:05 +02:00
|
|
|
glue_connection = AWSClient(connection.awsConfig).get_glue_client()
|
2022-04-22 11:30:59 +05:30
|
|
|
return glue_connection
|
2022-04-19 17:48:55 +02:00
|
|
|
|
|
|
|
|
2022-02-18 07:48:38 +01:00
|
|
|
def create_and_bind_session(engine: Engine) -> Session:
|
|
|
|
"""
|
|
|
|
Given an engine, create a session bound
|
|
|
|
to it to make our operations.
|
|
|
|
"""
|
|
|
|
session = sessionmaker()
|
|
|
|
session.configure(bind=engine)
|
|
|
|
return session()
|
2022-04-12 17:06:49 +02:00
|
|
|
|
|
|
|
|
2022-04-12 22:14:17 +02:00
|
|
|
@timeout(seconds=120)
|
2022-04-22 11:30:59 +05:30
|
|
|
@singledispatch
|
|
|
|
def test_connection(connection: Engine) -> None:
|
2022-04-12 17:06:49 +02:00
|
|
|
"""
|
|
|
|
Test that we can connect to the source using the given engine
|
|
|
|
:param engine: Engine to test
|
|
|
|
:return: None or raise an exception if we cannot connect
|
|
|
|
"""
|
|
|
|
try:
|
2022-04-22 11:30:59 +05:30
|
|
|
with connection.connect() as _:
|
2022-04-12 17:06:49 +02:00
|
|
|
pass
|
|
|
|
except OperationalError as err:
|
|
|
|
raise SourceConnectionException(
|
2022-04-22 11:30:59 +05:30
|
|
|
f"Connection error for {connection} - {err}. Check the connection details."
|
|
|
|
)
|
|
|
|
except Exception as err:
|
|
|
|
raise SourceConnectionException(
|
|
|
|
f"Unknown error connecting with {connection} - {err}."
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@test_connection.register
|
|
|
|
def _(connection: DynamoClient) -> None:
|
|
|
|
"""
|
|
|
|
Test that we can connect to the source using the given aws resource
|
|
|
|
:param engine: boto service resource to test
|
|
|
|
:return: None or raise an exception if we cannot connect
|
|
|
|
"""
|
2022-04-22 20:53:42 +05:30
|
|
|
from botocore.client import ClientError
|
|
|
|
|
2022-04-22 11:30:59 +05:30
|
|
|
try:
|
|
|
|
connection.client.tables.all()
|
|
|
|
except ClientError as err:
|
|
|
|
raise SourceConnectionException(
|
|
|
|
f"Connection error for {connection} - {err}. Check the connection details."
|
|
|
|
)
|
|
|
|
except Exception as err:
|
|
|
|
raise SourceConnectionException(
|
|
|
|
f"Unknown error connecting with {connection} - {err}."
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@test_connection.register
|
|
|
|
def _(connection: GlueClient) -> None:
|
|
|
|
"""
|
|
|
|
Test that we can connect to the source using the given aws resource
|
|
|
|
:param engine: boto cliet to test
|
|
|
|
:return: None or raise an exception if we cannot connect
|
|
|
|
"""
|
2022-04-22 20:53:42 +05:30
|
|
|
from botocore.client import ClientError
|
|
|
|
|
2022-04-22 11:30:59 +05:30
|
|
|
try:
|
|
|
|
connection.client.list_workflows()
|
|
|
|
except ClientError as err:
|
|
|
|
raise SourceConnectionException(
|
|
|
|
f"Connection error for {connection} - {err}. Check the connection details."
|
2022-04-12 17:06:49 +02:00
|
|
|
)
|
|
|
|
except Exception as err:
|
|
|
|
raise SourceConnectionException(
|
2022-04-22 11:30:59 +05:30
|
|
|
f"Unknown error connecting with {connection} - {err}."
|
2022-04-12 17:06:49 +02:00
|
|
|
)
|