Teradata Connector (#16373)

* [WIP] add teradata connector

* [WIP] add teradata ingestion

* [WIP] add teradata connector

* [WIP] add teradata connector

* [WIP] add teradata connector

* [WIP] add teradata connector

* [WIP] add teradata connector

* [WIP] add teradata connector

* Reformat code

* Remove unused databaseName property
This commit is contained in:
gpby 2024-05-28 07:40:22 +03:00 committed by GitHub
parent 80bbe20a4f
commit d909a3141e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 572 additions and 1 deletions

View File

@ -54,6 +54,7 @@ VERSIONS = {
"elasticsearch8": "elasticsearch8~=8.9.0",
"giturlparse": "giturlparse",
"validators": "validators~=0.22.0",
"teradata": "teradatasqlalchemy>=20.0.0.0",
}
COMMONS = {
@ -264,6 +265,7 @@ plugins: Dict[str, Set[str]] = {
"snowflake": {VERSIONS["snowflake"]},
"superset": {}, # uses requests
"tableau": {VERSIONS["tableau"], VERSIONS["validators"], VERSIONS["packaging"]},
"teradata": {VERSIONS["teradata"]},
"trino": {VERSIONS["trino"]},
"vertica": {"sqlalchemy-vertica[vertica-python]>=0.0.5"},
"pii-processor": {

View File

@ -64,6 +64,9 @@ from metadata.generated.schema.entity.services.connections.database.snowflakeCon
from metadata.generated.schema.entity.services.connections.database.sqliteConnection import (
SQLiteType,
)
from metadata.generated.schema.entity.services.connections.database.teradataConnection import (
TeradataType,
)
from metadata.utils.singleton import Singleton
@ -112,6 +115,7 @@ MAP_CONNECTION_TYPE_DIALECT: Dict[str, Dialect] = {
str(SQLiteType.SQLite.value): Dialect.SQLITE,
str(MssqlType.Mssql.value): Dialect.TSQL,
str(AzureSQLType.AzureSQL.value): Dialect.TSQL,
str(TeradataType.Teradata.value): Dialect.TERADATA,
}

View File

@ -291,6 +291,17 @@ class ColumnTypeParser:
except ImportError:
pass
try:
# pylint: disable=import-outside-toplevel
from teradatasqlalchemy import BYTE, VARBYTE
_COLUMN_TYPE_MAPPING[BYTE] = "BINARY"
_SOURCE_TYPE_TO_OM_TYPE["BYTE"] = "BINARY"
_COLUMN_TYPE_MAPPING[VARBYTE] = "VARBINARY"
_SOURCE_TYPE_TO_OM_TYPE["VARBYTE"] = "VARBINARY"
except ImportError:
pass
@staticmethod
def get_column_type(column_type: Any) -> str:
for func in [

View File

@ -0,0 +1,98 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Source connection handler
"""
import enum
from typing import Optional
from urllib.parse import quote_plus
from sqlalchemy.engine import Engine
from metadata.generated.schema.entity.automations.workflow import (
Workflow as AutomationWorkflow,
)
from metadata.generated.schema.entity.services.connections.database.teradataConnection import (
TeradataConnection,
)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
get_connection_options_dict,
)
from metadata.ingestion.connections.test_connections import test_connection_db_common
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.teradata.queries import TERADATA_GET_DATABASE
def get_connection_url(connection: TeradataConnection) -> str:
"""
Create Teradtaa connection url
"""
url = f"{connection.scheme.value}://{connection.hostPort}/"
url += f"?user={quote_plus(connection.username)}"
if connection.password:
url += f"&password={quote_plus(connection.password.get_secret_value())}"
# add standard options
params = "&".join(
[
f"{key}={quote_plus(str(getattr(connection, key) if not isinstance(getattr(connection, key), enum.Enum) else getattr(connection, key).value))}"
for key in ["account", "logdata", "logmech", "tmode"]
if getattr(connection, key, None)
]
)
url = f"{url}&{params}"
# add additional options if specified
options = get_connection_options_dict(connection)
if options:
params = "&".join(
f"{key}={quote_plus(str(value if not isinstance(value, enum.Enum) else value.value))}"
for (key, value) in options.items()
if value
)
url += f"{url}&{params}"
return url
def get_connection(connection: TeradataConnection) -> Engine:
"""
Create connection
"""
return create_generic_db_connection(
connection=connection,
get_connection_url_fn=get_connection_url,
get_connection_args_fn=get_connection_args_common,
)
def test_connection(
metadata: OpenMetadata,
engine: Engine,
service_connection: TeradataConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
) -> None:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
queries = {"GetDatabases": TERADATA_GET_DATABASE}
test_connection_db_common(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
queries=queries,
)

View File

@ -0,0 +1,148 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Teradata source implementation.
"""
import traceback
from typing import Iterable, Optional
from teradatasqlalchemy.dialect import TeradataDialect
from metadata.generated.schema.api.data.createStoredProcedure import (
CreateStoredProcedureRequest,
)
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
from metadata.generated.schema.entity.data.storedProcedure import StoredProcedureCode
from metadata.generated.schema.entity.services.connections.database.teradataConnection import (
TeradataConnection,
)
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
StackTraceError,
)
from metadata.generated.schema.metadataIngestion.workflow import (
Source as WorkflowSource,
)
from metadata.generated.schema.type.basic import EntityName
from metadata.ingestion.api.models import Either
from metadata.ingestion.api.steps import InvalidSourceException
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.common_db_source import CommonDbSourceService
from metadata.ingestion.source.database.teradata.models import (
STORED_PROC_LANGUAGE_MAP,
TeradataStoredProcedure,
)
from metadata.ingestion.source.database.teradata.queries import (
TERADATA_GET_STORED_PROCEDURES,
TERADATA_SHOW_STORED_PROCEDURE,
)
from metadata.ingestion.source.database.teradata.utils import get_table_comment
from metadata.utils import fqn
from metadata.utils.logger import ingestion_logger
from metadata.utils.sqlalchemy_utils import get_all_table_comments
logger = ingestion_logger()
TeradataDialect.get_table_comment = get_table_comment
TeradataDialect.get_all_table_comments = get_all_table_comments
class TeradataSource(CommonDbSourceService):
"""
Implements the necessary methods to extract
Database metadata from Teradata Source
"""
@classmethod
def create(
cls, config_dict, metadata: OpenMetadata, pipeline_name: Optional[str] = None
):
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
connection = config.serviceConnection.__root__.config
if not isinstance(connection, TeradataConnection):
raise InvalidSourceException(
f"Expected TeradataConnection, but got {connection}"
)
return cls(config, metadata)
def get_stored_procedures(self) -> Iterable[TeradataStoredProcedure]:
"""List Teradata stored procedures"""
if self.source_config.includeStoredProcedures:
results = self.engine.execute(
TERADATA_GET_STORED_PROCEDURES.format(
schema_name=self.context.get().database_schema,
)
).all()
for row in results:
try:
stored_procedure = TeradataStoredProcedure.parse_obj(dict(row))
stored_procedure.definition = self.describe_procedure_definition(
stored_procedure
)
yield stored_procedure
except Exception as exc:
logger.error()
self.status.failed(
error=StackTraceError(
name=dict(row).get("name", "UNKNOWN"),
error=f"Error parsing Stored Procedure payload: {exc}",
stackTrace=traceback.format_exc(),
)
)
def describe_procedure_definition(
self, stored_procedure: TeradataStoredProcedure
) -> str:
"""
We can only get the SP definition via SHOW PROCEDURE
"""
res = self.engine.execute(
TERADATA_SHOW_STORED_PROCEDURE.format(
schema_name=stored_procedure.database_schema,
procedure_name=stored_procedure.procedure_name,
)
)
return str(res.first()[0])
def yield_stored_procedure(
self, stored_procedure: TeradataStoredProcedure
) -> Iterable[Either[CreateStoredProcedureRequest]]:
"""Prepare the stored procedure payload"""
try:
stored_procedure_request = CreateStoredProcedureRequest(
name=EntityName(__root__=stored_procedure.procedure_name),
description=None,
storedProcedureCode=StoredProcedureCode(
language=STORED_PROC_LANGUAGE_MAP.get(
stored_procedure.procedure_type
),
code=stored_procedure.definition,
),
databaseSchema=fqn.build(
metadata=self.metadata,
entity_type=DatabaseSchema,
service_name=self.context.get().database_service,
database_name=self.context.get().database,
schema_name=stored_procedure.database_schema,
),
)
yield Either(right=stored_procedure_request)
self.register_record_stored_proc_request(stored_procedure_request)
except Exception as exc:
yield Either(
left=StackTraceError(
name=stored_procedure.procedure_name,
error=f"Error yielding Stored Procedure [{stored_procedure.procedure_name}] due to [{exc}]",
stackTrace=traceback.format_exc(),
)
)

View File

@ -0,0 +1,22 @@
"""
Teradata models
"""
from typing import Optional
from pydantic import BaseModel, Field
from metadata.generated.schema.entity.data.storedProcedure import Language
STORED_PROC_LANGUAGE_MAP = {
"SQL": Language.SQL,
"EXTERNAL": Language.External,
}
class TeradataStoredProcedure(BaseModel):
"""Teradata stored procedure list query results"""
procedure_name: str = Field(...)
database_schema: Optional[str] = Field(None)
procedure_type: str = Field(Language.SQL)
definition: str = Field(None)

View File

@ -0,0 +1,77 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SQL Queries used during ingestion
"""
TERADATA_GET_TABLE_NAMES = """
SELECT tablename, databasename from dbc.tablesvx
WHERE DataBaseName = :schema AND TableKind in ('T','V','O')
"""
TERADATA_TABLE_COMMENTS = """
SELECT DataBaseName as schema,
TableName as table_name,
CommentString as table_comment
FROM dbc.tablesvx
WHERE TableKind in ('T','V','O')
ORDER BY "schema", "table_name"
"""
TERADATA_GET_STORED_PROCEDURES = """
SELECT T.DatabaseName AS database_schema,
T.TableName AS procedure_name,
case T.TableKind
when 'P' then 'SQL'
when 'E' then 'EXTERNAL'
END as procedure_type
FROM DBC.TablesVX T
WHERE T.TableKind in ('P', 'E')
and T.DatabaseName = '{schema_name}'
"""
TERADATA_SHOW_STORED_PROCEDURE = """
SHOW PROCEDURE {schema_name}.{procedure_name};
"""
TERADATA_VIEW_DEFINITIONS = """
select dbase.DatabaseNameI,
tvm.TVMNameI,
tvm.CreateText
from dbc.tvm tvm join DBC.Dbase dbase
on tvm.DatabaseId = dbase.DatabaseId
where TableKind in ('V')
AND tvm.tvmid NOT IN ( '00C001000000'xb, '00C002000000'xb,
'00C009000000'xb, '00C010000000'xb,
'00C017000000'xb, '000000000000'xb)
AND (tvm.tvmid IN
/* IDs of Tables accessible to the USER or *PUBLIC* */
(SELECT TVMId FROM DBC.View_UserTablesExtVX)
OR
dbase.DatabaseId IN
/* IDs of databases accessible to the USER or *PUBLIC* */
(SELECT DatabaseID FROM DBC.View_UserDBsExtVX)
)
"""
TERADATA_GET_DATABASE = """
select databasename from dbc.databasesvx
"""
TERADATA_GET_DB_NAMES = """
select databasename from dbc.databasesvx
"""
TERADATA_GET_SERVER_VERSION = """
SELECT InfoData FROM dbc.dbcinfo
where InfoKey = 'VERSION'
"""

View File

@ -0,0 +1,34 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Teradata SQLAlchemy Helper Methods
"""
from sqlalchemy.engine import reflection
from metadata.ingestion.source.database.teradata.queries import TERADATA_TABLE_COMMENTS
from metadata.utils.logger import ingestion_logger
from metadata.utils.sqlalchemy_utils import get_table_comment_wrapper
logger = ingestion_logger()
@reflection.cache
def get_table_comment(
self, connection, table_name, schema=None, **kw
): # pylint: disable=unused-argument
return get_table_comment_wrapper(
self,
connection,
table_name=table_name,
schema=schema,
query=TERADATA_TABLE_COMMENTS,
)

View File

@ -0,0 +1,33 @@
{
"name": "Teradata",
"displayName": "Teradata Test Connection",
"description": "This Test Connection validates the access against the database and basic metadata extraction of schemas and tables.",
"steps": [
{
"name": "CheckAccess",
"description": "Validate that we can properly reach the database and authenticate with the given credentials.",
"errorMessage": "Failed to connect to Teradata, please validate the credentials",
"mandatory": true
},
{
"name": "GetSchemas",
"description": "List all the schemas available to the user.",
"errorMessage": "Failed to fetch schemas, please validate if the user has enough privilege to fetch schemas.",
"mandatory": true
},
{
"name": "GetTables",
"description": "From a given schema, list the tables belonging to that schema. If no schema is specified, we'll list the tables of a random schema.",
"errorMessage": "Failed to fetch tables, please validate if the user has enough privilege to fetch tables.",
"mandatory": true
},
{
"name": "GetViews",
"description": "From a given schema, list the views belonging to that schema. If no schema is specified, we'll list the tables of a random schema.",
"errorMessage": "Failed to fetch views, please validate if the user has enough privilege to fetch views.",
"mandatory": true
}
]
}

View File

@ -0,0 +1,109 @@
{
"$id": "https://open-metadata.org/schema/entity/services/connections/database/teradataConnection.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "TeradataConnection",
"description": "Teradata Database Connection Config",
"type": "object",
"javaType": "org.openmetadata.schema.services.connections.database.TeradataConnection",
"definitions": {
"teradataType": {
"description": "Service type.",
"type": "string",
"enum": [
"Teradata"
],
"default": "Teradata"
},
"teradataScheme": {
"description": "SQLAlchemy driver scheme options.",
"type": "string",
"enum": [
"teradatasql"
],
"default": "teradatasql"
}
},
"properties": {
"type": {
"title": "Service Type",
"description": "Service Type",
"$ref": "#/definitions/teradataType",
"default": "Teradata"
},
"scheme": {
"title": "Connection Scheme",
"description": "SQLAlchemy driver scheme options.",
"$ref": "#/definitions/teradataScheme",
"default": "teradatasql"
},
"username": {
"title": "Username",
"description": "Username to connect to Teradata. This user should have privileges to read all the metadata in Teradata.",
"type": "string"
},
"password": {
"title": "Password",
"description": "Password to connect to Teradata.",
"type": "string",
"format": "password"
},
"logmech": {
"title": "LOGMECH",
"description": "Specifies the logon authentication method. Possible values are TD2 (the default), JWT, LDAP, KRB5 for Kerberos, or TDNEGO",
"type": "string",
"enum": ["TD2", "JWT", "KRB5", "CUSTOM", "TDNEGO"],
"default": "TD2"
},
"logdata": {
"title": "Extra data for the chosen logon authentication method (LOGDATA)",
"description": "Specifies additional data needed by a logon mechanism, such as a secure token, Distinguished Name, or a domain/realm name. LOGDATA values are specific to each logon mechanism.",
"type": "string"
},
"hostPort": {
"title": "Host and Port",
"description": "Host and port of the Teradata service.",
"type": "string"
},
"tmode": {
"title": "Transaction mode",
"description": "Specifies the transaction mode for the connection",
"type": "string",
"enum": ["ANSI", "TERA", "DEFAULT"],
"default": "DEFAULT"
},
"account": {
"title": "Teradata Database account",
"description": "Specifies an account string to override the default account string defined for the database user. Accounts are used by the database for workload management and resource usage monitoring.",
"type": "string"
},
"connectionOptions": {
"title": "Connection Options",
"$ref": "../connectionBasicType.json#/definitions/connectionOptions"
},
"connectionArguments": {
"title": "Connection Arguments",
"$ref": "../connectionBasicType.json#/definitions/connectionArguments"
},
"supportsMetadataExtraction": {
"title": "Supports Metadata Extraction",
"$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction"
},
"supportsProfiler": {
"title": "Supports Profiler",
"$ref": "../connectionBasicType.json#/definitions/supportsProfiler"
},
"supportsQueryComment": {
"title": "Supports Query Comment",
"$ref": "../connectionBasicType.json#/definitions/supportsQueryComment"
},
"sampleDataStorageConfig": {
"title": "Storage Config for Sample Data",
"$ref": "../connectionBasicType.json#/definitions/sampleDataStorageConfig"
}
},
"additionalProperties": false,
"required": [
"hostPort",
"username"
]
}

View File

@ -54,7 +54,8 @@
"Doris",
"UnityCatalog",
"SAS",
"Iceberg"
"Iceberg",
"Teradata"
],
"javaEnums": [
{
@ -176,6 +177,9 @@
},
{
"name": "Iceberg"
},
{
"name": "Teradata"
}
]
},
@ -302,6 +306,9 @@
},
{
"$ref": "./connections/database/icebergConnection.json"
},
{
"$ref": "./connections/database/teradataConnection.json"
}
]
}

View File

@ -0,0 +1,3 @@
# Teradata
In this section, we provide guides and references to use the Teradata connector.

View File

@ -0,0 +1,10 @@
<svg width="250" height="250" viewBox="0 0 250 250" fill="none" xmlns="http://www.w3.org/2000/svg">
<g clip-path="url(#clip0_3464_20839)">
<path d="M125 28C68.4167 28 28 73.6708 28 125.647C28 180.209 68.4167 222 125 222C181.583 222 222 180.209 222 125.647C222 73.6708 181.583 28 125 28ZM96.4658 55.8067H124.757V82.1583H153.696V105.277H124.757V147.472C124.757 160.243 130.982 165.821 140.358 165.821C143.753 165.821 148.28 164.851 152.564 163.234C155.636 171.317 161.052 178.592 166.872 184.412C158.248 189.497 148.429 192.203 138.418 192.253C113.198 192.253 96.4658 178.916 96.4658 149.573V55.8875V55.8067Z" fill="#F37440"/>
</g>
<defs>
<clipPath id="clip0_3464_20839">
<rect width="194" height="194" fill="white" transform="translate(28 28)"/>
</clipPath>
</defs>
</svg>

After

Width:  |  Height:  |  Size: 772 B

View File

@ -94,6 +94,7 @@ import openSearch from '../assets/svg/open-search.svg';
import pipelineDefault from '../assets/svg/pipeline.svg';
import plus from '../assets/svg/plus.svg';
import mlflow from '../assets/svg/service-icon-mlflow.svg';
import teradata from '../assets/svg/teradata.svg';
import topicDefault from '../assets/svg/topic.svg';
import { EntityType } from '../enums/entity.enum';
import { ServiceCategory } from '../enums/service.enum';
@ -201,6 +202,7 @@ export const OPEN_SEARCH = openSearch;
export const PLUS = plus;
export const NOSERVICE = noService;
export const ICEBERGE = iceberge;
export const TERADATA = teradata;
export const excludedService = [
MlModelServiceType.Sklearn,
MetadataServiceType.MetadataES,
@ -396,6 +398,7 @@ export const BETA_SERVICES = [
DatabaseServiceType.Couchbase,
DatabaseServiceType.Greenplum,
DatabaseServiceType.Iceberg,
DatabaseServiceType.Teradata,
];
export const TEST_CONNECTION_INITIAL_MESSAGE = i18n.t(

View File

@ -49,6 +49,7 @@ import sasConnection from '../jsons/connectionSchemas/connections/database/sasCo
import singleStoreConnection from '../jsons/connectionSchemas/connections/database/singleStoreConnection.json';
import snowflakeConnection from '../jsons/connectionSchemas/connections/database/snowflakeConnection.json';
import sqliteConnection from '../jsons/connectionSchemas/connections/database/sqliteConnection.json';
import teradataConnection from '../jsons/connectionSchemas/connections/database/teradataConnection.json';
import trinoConnection from '../jsons/connectionSchemas/connections/database/trinoConnection.json';
import unityCatalogConnection from '../jsons/connectionSchemas/connections/database/unityCatalogConnection.json';
import verticaConnection from '../jsons/connectionSchemas/connections/database/verticaConnection.json';
@ -248,6 +249,11 @@ export const getDatabaseConfig = (type: DatabaseServiceType) => {
break;
}
case DatabaseServiceType.Teradata: {
schema = teradataConnection;
break;
}
default: {
schema = {};

View File

@ -84,6 +84,7 @@ import {
SQLITE,
SUPERSET,
TABLEAU,
TERADATA,
TOPIC_DEFAULT,
TRINO,
UNITYCATALOG,
@ -380,6 +381,9 @@ class ServiceUtilClassBase {
case SearchServiceType.OpenSearch:
return OPEN_SEARCH;
case DatabaseServiceType.Teradata:
return TERADATA;
default: {
let logo;
if (serviceTypes.messagingServices.includes(type)) {