mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-03 14:47:59 +00:00
879 lines
28 KiB
Python
879 lines
28 KiB
Python
# Copyright 2021 Collate
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
Build and document all supported Engines
|
|
"""
|
|
import json
|
|
import logging
|
|
import os
|
|
import traceback
|
|
from functools import singledispatch
|
|
from typing import Union
|
|
|
|
import pkg_resources
|
|
import requests
|
|
from sqlalchemy import create_engine
|
|
from sqlalchemy.engine.base import Engine
|
|
from sqlalchemy.event import listen
|
|
from sqlalchemy.exc import OperationalError
|
|
from sqlalchemy.orm import scoped_session, sessionmaker
|
|
from sqlalchemy.orm.session import Session
|
|
from sqlalchemy.pool import QueuePool
|
|
|
|
from metadata.clients.connection_clients import (
|
|
AirByteClient,
|
|
DatalakeClient,
|
|
DeltaLakeClient,
|
|
DynamoClient,
|
|
FivetranClient,
|
|
GlueDBClient,
|
|
GluePipelineClient,
|
|
KafkaClient,
|
|
LookerClient,
|
|
MetabaseClient,
|
|
MlflowClientWrapper,
|
|
ModeClient,
|
|
NifiClientWrapper,
|
|
PowerBiClient,
|
|
RedashClient,
|
|
SalesforceClient,
|
|
SupersetClient,
|
|
TableauClient,
|
|
)
|
|
from metadata.clients.nifi_client import NifiClient
|
|
from metadata.generated.schema.entity.services.connections.connectionBasicType import (
|
|
ConnectionArguments,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.dashboard.lookerConnection import (
|
|
LookerConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.dashboard.metabaseConnection import (
|
|
MetabaseConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.dashboard.modeConnection import (
|
|
ModeConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.dashboard.powerBIConnection import (
|
|
PowerBIConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.dashboard.redashConnection import (
|
|
RedashConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.dashboard.supersetConnection import (
|
|
SupersetConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.dashboard.tableauConnection import (
|
|
TableauConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.database.bigQueryConnection import (
|
|
BigQueryConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.database.databricksConnection import (
|
|
DatabricksConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.database.datalakeConnection import (
|
|
DatalakeConnection,
|
|
GCSConfig,
|
|
S3Config,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.database.deltaLakeConnection import (
|
|
DeltaLakeConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.database.dynamoDBConnection import (
|
|
DynamoDBConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.database.glueConnection import (
|
|
GlueConnection as GlueDBConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.database.salesforceConnection import (
|
|
SalesforceConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.database.snowflakeConnection import (
|
|
SnowflakeConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.messaging.kafkaConnection import (
|
|
KafkaConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.messaging.redpandaConnection import (
|
|
RedpandaConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.mlmodel.mlflowConnection import (
|
|
MlflowConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.pipeline.airbyteConnection import (
|
|
AirbyteConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.pipeline.airflowConnection import (
|
|
AirflowConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.pipeline.backendConnection import (
|
|
BackendConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.pipeline.dagsterConnection import (
|
|
DagsterConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.pipeline.fivetranConnection import (
|
|
FivetranConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.pipeline.glueConnection import (
|
|
GlueConnection as GluePipelineConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.pipeline.nifiConnection import (
|
|
NifiConnection,
|
|
)
|
|
from metadata.orm_profiler.orm.functions.conn_test import ConnTestFn
|
|
from metadata.utils.credentials import set_google_credentials
|
|
from metadata.utils.source_connections import get_connection_args, get_connection_url
|
|
from metadata.utils.timeout import timeout
|
|
|
|
logger = logging.getLogger("Utils")
|
|
|
|
|
|
class SourceConnectionException(Exception):
|
|
"""
|
|
Raised when we cannot connect to the source
|
|
"""
|
|
|
|
|
|
def render_query_header(ometa_version: str) -> str:
|
|
header_obj = {"app": "OpenMetadata", "version": ometa_version}
|
|
return f"/* {json.dumps(header_obj)} */"
|
|
|
|
|
|
def inject_query_header(conn, cursor, statement, parameters, context, executemany):
|
|
version = pkg_resources.require("openmetadata-ingestion")[0].version
|
|
statement_with_header = render_query_header(version) + "\n" + statement
|
|
return statement_with_header, parameters
|
|
|
|
|
|
def create_generic_connection(connection, verbose: bool = False) -> Engine:
|
|
"""
|
|
Generic Engine creation from connection object
|
|
:param connection: JSON Schema connection model
|
|
:param verbose: debugger or not
|
|
:return: SQAlchemy Engine
|
|
"""
|
|
|
|
engine = create_engine(
|
|
get_connection_url(connection),
|
|
connect_args=get_connection_args(connection),
|
|
poolclass=QueuePool,
|
|
pool_reset_on_return=None, # https://docs.sqlalchemy.org/en/14/core/pooling.html#reset-on-return
|
|
echo=verbose,
|
|
max_overflow=-1,
|
|
)
|
|
|
|
if hasattr(connection, "supportsQueryComment"):
|
|
listen(engine, "before_cursor_execute", inject_query_header, retval=True)
|
|
|
|
return engine
|
|
|
|
|
|
@singledispatch
|
|
def get_connection(
|
|
connection, verbose: bool = False
|
|
) -> Union[
|
|
Engine,
|
|
DynamoClient,
|
|
GlueDBClient,
|
|
GluePipelineClient,
|
|
SalesforceClient,
|
|
KafkaClient,
|
|
]:
|
|
"""
|
|
Given an SQL configuration, build the SQLAlchemy Engine
|
|
"""
|
|
return create_generic_connection(connection, verbose)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: DatabricksConnection, verbose: bool = False):
|
|
if connection.httpPath:
|
|
if not connection.connectionArguments:
|
|
connection.connectionArguments = ConnectionArguments()
|
|
connection.connectionArguments.http_path = connection.httpPath
|
|
return create_generic_connection(connection, verbose)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: SnowflakeConnection, verbose: bool = False) -> Engine:
|
|
if connection.privateKey:
|
|
|
|
from cryptography.hazmat.backends import default_backend
|
|
from cryptography.hazmat.primitives import serialization
|
|
|
|
snowflake_private_key_passphrase = (
|
|
connection.snowflakePrivatekeyPassphrase.get_secret_value()
|
|
if connection.snowflakePrivatekeyPassphrase
|
|
else ""
|
|
)
|
|
|
|
if not snowflake_private_key_passphrase:
|
|
logger.warning(
|
|
"Snowflake Private Key Passphrase not found, replacing it with empty string"
|
|
)
|
|
p_key = serialization.load_pem_private_key(
|
|
bytes(connection.privateKey.get_secret_value(), "utf-8"),
|
|
password=snowflake_private_key_passphrase.encode(),
|
|
backend=default_backend(),
|
|
)
|
|
pkb = p_key.private_bytes(
|
|
encoding=serialization.Encoding.DER,
|
|
format=serialization.PrivateFormat.PKCS8,
|
|
encryption_algorithm=serialization.NoEncryption(),
|
|
)
|
|
|
|
if connection.privateKey:
|
|
connection.connectionArguments = dict()
|
|
connection.connectionArguments["private_key"] = pkb
|
|
|
|
return create_generic_connection(connection, verbose)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: BigQueryConnection, verbose: bool = False) -> Engine:
|
|
"""
|
|
Prepare the engine and the GCS credentials
|
|
:param connection: BigQuery connection
|
|
:param verbose: debugger or not
|
|
:return: Engine
|
|
"""
|
|
set_google_credentials(gcs_credentials=connection.credentials)
|
|
return create_generic_connection(connection, verbose)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: DynamoDBConnection, verbose: bool = False) -> DynamoClient:
|
|
from metadata.clients.aws_client import AWSClient
|
|
|
|
dynomo_connection = AWSClient(connection.awsConfig).get_dynomo_client()
|
|
return dynomo_connection
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: GlueDBConnection, verbose: bool = False) -> GlueDBClient:
|
|
from metadata.clients.aws_client import AWSClient
|
|
|
|
glue_connection = AWSClient(connection.awsConfig).get_glue_db_client()
|
|
return glue_connection
|
|
|
|
|
|
@get_connection.register
|
|
def _(
|
|
connection: GluePipelineConnection, verbose: bool = False
|
|
) -> GluePipelineConnection:
|
|
from metadata.clients.aws_client import AWSClient
|
|
|
|
glue_connection = AWSClient(connection.awsConfig).get_glue_pipeline_client()
|
|
return glue_connection
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: SalesforceConnection, verbose: bool = False) -> SalesforceClient:
|
|
from simple_salesforce import Salesforce
|
|
|
|
salesforce_connection = SalesforceClient(
|
|
Salesforce(
|
|
connection.username,
|
|
password=connection.password.get_secret_value(),
|
|
security_token=connection.securityToken.get_secret_value(),
|
|
)
|
|
)
|
|
return salesforce_connection
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: DeltaLakeConnection, verbose: bool = False) -> DeltaLakeClient:
|
|
import pyspark
|
|
from delta import configure_spark_with_delta_pip
|
|
|
|
builder = (
|
|
pyspark.sql.SparkSession.builder.appName("random")
|
|
.enableHiveSupport()
|
|
.config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
|
|
.config(
|
|
"spark.sql.catalog.spark_catalog",
|
|
"org.apache.spark.sql.delta.catalog.DeltaCatalog",
|
|
)
|
|
# Download delta-core jars when creating the SparkSession
|
|
.config("spark.jars.packages", "io.delta:delta-core_2.12:2.0.0")
|
|
)
|
|
if connection.metastoreHostPort:
|
|
builder.config(
|
|
"hive.metastore.uris",
|
|
f"thrift://{connection.metastoreHostPort}",
|
|
)
|
|
elif connection.metastoreFilePath:
|
|
# From https://stackoverflow.com/questions/38377188/how-to-get-rid-of-derby-log-metastore-db-from-spark-shell
|
|
# derby.system.home is the one in charge of the path for `metastore_db` dir and `derby.log`
|
|
# We can use this option to control testing, as well as to properly point to the right
|
|
# local database when ingesting data
|
|
builder.config(
|
|
"spark.driver.extraJavaOptions",
|
|
f"-Dderby.system.home={connection.metastoreFilePath}",
|
|
)
|
|
|
|
if connection.connectionArguments:
|
|
for key, value in connection.connectionArguments:
|
|
builder.config(key, value)
|
|
|
|
deltalake_connection = DeltaLakeClient(
|
|
configure_spark_with_delta_pip(builder).getOrCreate()
|
|
)
|
|
return deltalake_connection
|
|
|
|
|
|
@get_connection.register(KafkaConnection)
|
|
@get_connection.register(RedpandaConnection)
|
|
def _(connection, verbose: bool = False) -> KafkaClient:
|
|
"""
|
|
Prepare Kafka Admin Client and Schema Registry Client
|
|
"""
|
|
from confluent_kafka.admin import AdminClient
|
|
from confluent_kafka.avro import AvroConsumer
|
|
from confluent_kafka.schema_registry.schema_registry_client import (
|
|
SchemaRegistryClient,
|
|
)
|
|
|
|
admin_client_config = connection.consumerConfig
|
|
admin_client_config["bootstrap.servers"] = connection.bootstrapServers
|
|
admin_client = AdminClient(admin_client_config)
|
|
|
|
schema_registry_client = None
|
|
consumer_client = None
|
|
if connection.schemaRegistryURL:
|
|
connection.schemaRegistryConfig["url"] = connection.schemaRegistryURL
|
|
schema_registry_client = SchemaRegistryClient(connection.schemaRegistryConfig)
|
|
connection.schemaRegistryConfig["url"] = str(connection.schemaRegistryURL)
|
|
consumer_config = {
|
|
**connection.consumerConfig,
|
|
"bootstrap.servers": connection.bootstrapServers,
|
|
}
|
|
if "group.id" not in consumer_config:
|
|
consumer_config["group.id"] = "openmetadata-consumer"
|
|
if "auto.offset.reset" not in consumer_config:
|
|
consumer_config["auto.offset.reset"] = "earliest"
|
|
logger.debug(f"Using Kafka consumer config: {consumer_config}")
|
|
consumer_client = AvroConsumer(
|
|
consumer_config, schema_registry=schema_registry_client
|
|
)
|
|
|
|
return KafkaClient(
|
|
admin_client=admin_client,
|
|
schema_registry_client=schema_registry_client,
|
|
consumer_client=consumer_client,
|
|
)
|
|
|
|
|
|
def create_and_bind_session(engine: Engine) -> Session:
|
|
"""
|
|
Given an engine, create a session bound
|
|
to it to make our operations.
|
|
"""
|
|
session = sessionmaker()
|
|
session.configure(bind=engine)
|
|
return session()
|
|
|
|
|
|
def create_and_bind_thread_safe_session(engine: Engine) -> Session:
|
|
"""
|
|
Given an engine, create a session bound
|
|
to it to make our operations.
|
|
"""
|
|
session = sessionmaker()
|
|
session.configure(bind=engine)
|
|
return scoped_session(session)
|
|
|
|
|
|
@timeout(seconds=120)
|
|
@singledispatch
|
|
def test_connection(connection) -> None:
|
|
"""
|
|
Default implementation is the engine to test.
|
|
|
|
Test that we can connect to the source using the given engine
|
|
:param connection: Engine to test
|
|
:return: None or raise an exception if we cannot connect
|
|
"""
|
|
try:
|
|
with connection.connect() as conn:
|
|
conn.execute(ConnTestFn())
|
|
except OperationalError as err:
|
|
msg = f"Connection error for {connection}: {err}. Check the connection details."
|
|
raise SourceConnectionException(msg)
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: DynamoClient) -> None:
|
|
"""
|
|
Test that we can connect to the source using the given aws resource
|
|
:param engine: boto service resource to test
|
|
:return: None or raise an exception if we cannot connect
|
|
"""
|
|
from botocore.client import ClientError
|
|
|
|
try:
|
|
connection.client.tables.all()
|
|
except ClientError as err:
|
|
msg = f"Connection error for {connection}: {err}. Check the connection details."
|
|
raise SourceConnectionException(msg)
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: GlueDBClient) -> None:
|
|
"""
|
|
Test that we can connect to the source using the given aws resource
|
|
:param engine: boto cliet to test
|
|
:return: None or raise an exception if we cannot connect
|
|
"""
|
|
from botocore.client import ClientError
|
|
|
|
try:
|
|
connection.client.list_workflows()
|
|
except ClientError as err:
|
|
msg = f"Connection error for {connection}: {err}. Check the connection details."
|
|
raise SourceConnectionException(msg)
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: GluePipelineClient) -> None:
|
|
"""
|
|
Test that we can connect to the source using the given aws resource
|
|
:param engine: boto cliet to test
|
|
:return: None or raise an exception if we cannot connect
|
|
"""
|
|
from botocore.client import ClientError
|
|
|
|
try:
|
|
connection.client.get_paginator("get_databases")
|
|
except ClientError as err:
|
|
msg = f"Connection error for {connection}: {err}. Check the connection details."
|
|
raise SourceConnectionException(msg)
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: SalesforceClient) -> None:
|
|
from simple_salesforce.exceptions import SalesforceAuthenticationFailed
|
|
|
|
try:
|
|
connection.client.describe()
|
|
except SalesforceAuthenticationFailed as err:
|
|
msg = f"Connection error for {connection}: {err}. Check the connection details."
|
|
raise SourceConnectionException(msg)
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: KafkaClient) -> None:
|
|
"""
|
|
Test AdminClient.
|
|
|
|
If exists, test the Schema Registry client as well.
|
|
"""
|
|
try:
|
|
_ = connection.admin_client.list_topics().topics
|
|
if connection.schema_registry_client:
|
|
_ = connection.schema_registry_client.get_subjects()
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: DeltaLakeClient) -> None:
|
|
try:
|
|
connection.client.catalog.listDatabases()
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: MetabaseConnection, verbose: bool = False):
|
|
try:
|
|
params = dict()
|
|
params["username"] = connection.username
|
|
params["password"] = connection.password.get_secret_value()
|
|
|
|
HEADERS = {"Content-Type": "application/json", "Accept": "*/*"}
|
|
|
|
resp = requests.post(
|
|
connection.hostPort + "/api/session/",
|
|
data=json.dumps(params),
|
|
headers=HEADERS,
|
|
)
|
|
|
|
session_id = resp.json()["id"]
|
|
metabase_session = {"X-Metabase-Session": session_id}
|
|
conn = {"connection": connection, "metabase_session": metabase_session}
|
|
return MetabaseClient(conn)
|
|
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: MetabaseClient) -> None:
|
|
try:
|
|
requests.get(
|
|
connection.client["connection"].hostPort + "/api/dashboard",
|
|
headers=connection.client["metabase_session"],
|
|
)
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: AirflowConnection) -> None:
|
|
try:
|
|
test_connection(connection.connection)
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: AirflowConnection) -> None:
|
|
try:
|
|
return get_connection(connection.connection)
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: AirbyteConnection, verbose: bool = False):
|
|
from metadata.clients.airbyte_client import AirbyteClient
|
|
|
|
return AirByteClient(AirbyteClient(connection))
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: AirByteClient) -> None:
|
|
try:
|
|
connection.client.list_workspaces()
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: FivetranConnection, verbose: bool = False):
|
|
from metadata.clients.fivetran_client import FivetranClient as FivetranRestClient
|
|
|
|
return FivetranClient(FivetranRestClient(connection))
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: FivetranClient) -> None:
|
|
try:
|
|
connection.client.list_groups()
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: RedashConnection, verbose: bool = False):
|
|
|
|
from redash_toolbelt import Redash
|
|
|
|
try:
|
|
redash = Redash(connection.hostPort, connection.apiKey.get_secret_value())
|
|
redash_client = RedashClient(redash)
|
|
return redash_client
|
|
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: RedashClient) -> None:
|
|
try:
|
|
connection.client.dashboards()
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: SupersetConnection, verbose: bool = False):
|
|
from metadata.ingestion.ometa.superset_rest import SupersetAPIClient
|
|
|
|
superset_connection = SupersetAPIClient(connection)
|
|
superset_client = SupersetClient(superset_connection)
|
|
return superset_client
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: SupersetClient) -> None:
|
|
try:
|
|
connection.client.fetch_menu()
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: TableauConnection, verbose: bool = False):
|
|
|
|
from tableau_api_lib import TableauServerConnection
|
|
|
|
tableau_server_config = {
|
|
f"{connection.env}": {
|
|
"server": connection.hostPort,
|
|
"api_version": connection.apiVersion,
|
|
"site_name": connection.siteName if connection.siteName else "",
|
|
"site_url": connection.siteUrl if connection.siteUrl else "",
|
|
}
|
|
}
|
|
if connection.username and connection.password:
|
|
tableau_server_config[connection.env]["username"] = connection.username
|
|
tableau_server_config[connection.env][
|
|
"password"
|
|
] = connection.password.get_secret_value()
|
|
elif (
|
|
connection.personalAccessTokenName
|
|
and connection.personalAccessTokenSecret.get_secret_value()
|
|
):
|
|
tableau_server_config[connection.env][
|
|
"personal_access_token_name"
|
|
] = connection.personalAccessTokenName
|
|
tableau_server_config[connection.env][
|
|
"personal_access_token_secret"
|
|
] = connection.personalAccessTokenSecret.get_secret_value()
|
|
try:
|
|
conn = TableauServerConnection(
|
|
config_json=tableau_server_config,
|
|
env=connection.env,
|
|
)
|
|
conn.sign_in().json()
|
|
return TableauClient(conn)
|
|
except Exception as exc: # pylint: disable=broad-except
|
|
logger.debug(traceback.format_exc())
|
|
logger.warning(f"Unknown error connecting with {connection}: {exc}.")
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: TableauClient) -> None:
|
|
try:
|
|
connection.client.server_info()
|
|
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: PowerBIConnection, verbose: bool = False):
|
|
from metadata.clients.powerbi_client import PowerBiApiClient
|
|
|
|
return PowerBiClient(PowerBiApiClient(connection))
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: PowerBiClient) -> None:
|
|
try:
|
|
connection.client.fetch_dashboards()
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: LookerConnection, verbose: bool = False):
|
|
import looker_sdk
|
|
|
|
if not os.environ.get("LOOKERSDK_CLIENT_ID"):
|
|
os.environ["LOOKERSDK_CLIENT_ID"] = connection.clientId
|
|
if not os.environ.get("LOOKERSDK_CLIENT_SECRET"):
|
|
os.environ[
|
|
"LOOKERSDK_CLIENT_SECRET"
|
|
] = connection.clientSecret.get_secret_value()
|
|
if not os.environ.get("LOOKERSDK_BASE_URL"):
|
|
os.environ["LOOKERSDK_BASE_URL"] = connection.hostPort
|
|
client = looker_sdk.init40()
|
|
return LookerClient(client=client)
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: LookerClient) -> None:
|
|
try:
|
|
connection.client.me()
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: DatalakeClient) -> None:
|
|
"""
|
|
Test that we can connect to the source using the given aws resource
|
|
:param engine: boto service resource to test
|
|
:return: None or raise an exception if we cannot connect
|
|
"""
|
|
from botocore.client import ClientError
|
|
|
|
try:
|
|
config = connection.config.configSource
|
|
if isinstance(config, GCSConfig):
|
|
if connection.config.bucketName:
|
|
connection.client.get_bucket(connection.config.bucketName)
|
|
else:
|
|
connection.client.list_buckets()
|
|
|
|
if isinstance(config, S3Config):
|
|
if connection.config.bucketName:
|
|
connection.client.list_objects(Bucket=connection.config.bucketName)
|
|
else:
|
|
connection.client.list_buckets()
|
|
|
|
except ClientError as err:
|
|
msg = f"Connection error for {connection}: {err}. Check the connection details."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@singledispatch
|
|
def get_datalake_client(config):
|
|
if config:
|
|
msg = f"Config not implemented for type {type(config)}: {config}"
|
|
raise NotImplementedError(msg)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: DatalakeConnection, verbose: bool = False) -> DatalakeClient:
|
|
datalake_connection = get_datalake_client(connection.configSource)
|
|
return DatalakeClient(client=datalake_connection, config=connection)
|
|
|
|
|
|
@get_datalake_client.register
|
|
def _(config: S3Config):
|
|
from metadata.clients.aws_client import AWSClient
|
|
|
|
s3_client = AWSClient(config.securityConfig).get_client(service_name="s3")
|
|
return s3_client
|
|
|
|
|
|
@get_datalake_client.register
|
|
def _(config: GCSConfig):
|
|
from google.cloud import storage
|
|
|
|
set_google_credentials(gcs_credentials=config.securityConfig)
|
|
gcs_client = storage.Client()
|
|
return gcs_client
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: ModeConnection, verbose: bool = False):
|
|
from metadata.clients.mode_client import ModeApiClient
|
|
|
|
return ModeClient(ModeApiClient(connection))
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: ModeClient) -> None:
|
|
try:
|
|
connection.client.get_user_account()
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: MlflowConnection, verbose: bool = False):
|
|
from mlflow.tracking import MlflowClient
|
|
|
|
return MlflowClientWrapper(
|
|
MlflowClient(
|
|
tracking_uri=connection.trackingUri,
|
|
registry_uri=connection.registryUri,
|
|
)
|
|
)
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: MlflowClientWrapper) -> None:
|
|
try:
|
|
connection.client.list_registered_models()
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: NifiConnection, verbose: bool = False):
|
|
|
|
return NifiClientWrapper(
|
|
NifiClient(
|
|
host_port=connection.hostPort,
|
|
username=connection.username,
|
|
password=connection.password.get_secret_value(),
|
|
verify=connection.verifySSL,
|
|
)
|
|
)
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: NifiClientWrapper) -> None:
|
|
try:
|
|
connection.client.resources
|
|
except Exception as err:
|
|
raise SourceConnectionException(
|
|
f"Unknown error connecting with {connection} - {err}."
|
|
)
|
|
|
|
|
|
@get_connection.register
|
|
def _(_: BackendConnection, verbose: bool = False):
|
|
"""
|
|
Let's use Airflow's internal connection for this
|
|
"""
|
|
from airflow import settings
|
|
|
|
with settings.Session() as session:
|
|
return session.get_bind()
|
|
|
|
|
|
@test_connection.register
|
|
def _(connection: DagsterConnection) -> None:
|
|
try:
|
|
test_connection(connection.dbConnection)
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|
|
|
|
|
|
@get_connection.register
|
|
def _(connection: DagsterConnection) -> None:
|
|
try:
|
|
return get_connection(connection.dbConnection)
|
|
except Exception as exc:
|
|
msg = f"Unknown error connecting with {connection}: {exc}."
|
|
raise SourceConnectionException(msg)
|