Databricks Source Fix (#3917)

Databricks Source Fix (#3917)
This commit is contained in:
Mayur Singal 2022-04-07 20:10:50 +05:30 committed by GitHub
parent 93525aea0d
commit 3651efd7f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 95 additions and 39 deletions

View File

@ -12,7 +12,13 @@
"connectionArguments": {
"javaType": "org.openmetadata.catalog.services.connections.database.ConnectionArguments",
"description": "Additional connection arguments such as security or protocol configs that can be sent to service during connection.",
"type": "object"
"type": "object",
"properties": {
"http_path": {
"description": "HTTP path of databricks cluster",
"type": "string"
}
}
}
}
}

View File

@ -7,5 +7,5 @@ Provides metadata version information.
from incremental import Version
__version__ = Version("metadata", 0, 9, 0, dev=11)
__version__ = Version("metadata", 0, 9, 0, dev=12)
__all__ = ["__version__"]

View File

@ -1,29 +1,28 @@
{
"source": {
"type": "databricks",
"source": {
"type": "databricks",
"serviceName": "local_databricks",
"serviceConnection": {
"config": {
"token": "<databricks token>",
"host_port": "host:port",
"hostPort": "localhost:443",
"database": "default",
"service_name": "local_databricks",
"connect_args":{
"http_path": "http_path"
},
"table_filter_pattern": {
"excludes": []
"connectionArguments":{
"http_path": "<http path of databricks cluster>"
}
}
},
"sink": {
"type": "metadata-rest",
"config": {}
},
"metadata_server": {
"type": "metadata-server",
"config": {
"api_endpoint": "http://localhost:8585/api",
"auth_provider_type": "no-auth"
}
"sourceConfig": {"config": {"enableDataProfiler": false}}
},
"sink": {
"type": "metadata-rest",
"config": {}
},
"workflowConfig": {
"openMetadataServerConfig": {
"hostPort": "http://localhost:8585/api",
"authProvider": "no-auth"
}
}
}

View File

@ -17,11 +17,17 @@ from sqlalchemy.engine import reflection
from sqlalchemy.sql.sqltypes import String
from sqlalchemy_databricks._dialect import DatabricksDialect
from metadata.generated.schema.entity.services.connections.database.databricksConnection import (
DatabricksConnection,
)
from metadata.generated.schema.metadataIngestion.workflow import (
OpenMetadataServerConfig,
)
from metadata.generated.schema.metadataIngestion.workflow import (
Source as WorkflowSource,
)
from metadata.ingestion.api.source import InvalidSourceException
from metadata.ingestion.source.sql_source import SQLSource
from metadata.ingestion.source.sql_source_common import SQLConnectionConfig
class STRUCT(String):
@ -102,24 +108,15 @@ def get_columns(self, connection, table_name, schema=None, **kw):
DatabricksDialect.get_columns = get_columns
from metadata.generated.schema.entity.services.connections.database.databricksConnection import (
DatabricksConnection,
)
class DatabricksConfig(DatabricksConnection, SQLConnectionConfig):
def get_connection_url(self):
url = f"{self.scheme}://token:{self.token}@{self.hostPort}"
if self.database:
url += f"/{self.database}"
return url
class DatabricksSource(SQLSource):
def __init__(self, config, metadata_config):
super().__init__(config, metadata_config)
@classmethod
def create(cls, config_dict, metadata_config: OpenMetadataServerConfig):
config = DatabricksConfig.parse_obj(config_dict)
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
connection: DatabricksConnection = config.serviceConnection.__root__.config
if not isinstance(connection, DatabricksConnection):
raise InvalidSourceException(
f"Expected DatabricksConnection, but got {connection}"
)
return cls(config, metadata_config)

View File

@ -17,6 +17,9 @@ from urllib.parse import quote_plus
from metadata.generated.schema.entity.services.connections.database.clickhouseConnection import (
ClickhouseConnection,
)
from metadata.generated.schema.entity.services.connections.database.databricksConnection import (
DatabricksConnection,
)
from metadata.generated.schema.entity.services.connections.database.mssqlConnection import (
MssqlConnection,
)
@ -82,3 +85,10 @@ def _(connection: SQLiteConnection):
"""
return f"{connection.scheme.value}:///:memory:"
def get_connection_url(connection: DatabricksConnection):
url = f"{connection.scheme.value}://token:{connection.token}@{connection.hostPort}"
if connection.database:
url += f"/{connection.database}"
return url

View File

@ -0,0 +1,44 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from unittest import TestCase
from metadata.generated.schema.entity.services.connections.database.databricksConnection import (
DatabricksConnection,
DatabricksScheme,
)
from metadata.utils.source_connections import get_connection_url
class DatabricksConnectionTest(TestCase):
def test_connection_url_without_db(self):
expected_result = (
"databricks+connector://token:KlivDTACWXKmZVfN1qIM@1.1.1.1:443"
)
databricks_conn_obj = DatabricksConnection(
scheme=DatabricksScheme.databricks_connector,
hostPort="1.1.1.1:443",
token="KlivDTACWXKmZVfN1qIM",
)
assert expected_result == get_connection_url(databricks_conn_obj)
def test_connection_url_with_db(self):
expected_result = (
"databricks+connector://token:KlivDTACWXKmZVfN1qIM@1.1.1.1:443/default"
)
databricks_conn_obj = DatabricksConnection(
scheme=DatabricksScheme.databricks_connector,
hostPort="1.1.1.1:443",
token="KlivDTACWXKmZVfN1qIM",
database="default",
)
assert expected_result == get_connection_url(databricks_conn_obj)