issue-696: Added trino support for Openmetadata (#697)

* issue-696: Added trino support for Openmetadata

* issue-696: fixed linting issues

* issue-696: not mentioning Trino for now as it will be part of 0.5 release

Co-authored-by: jbuoncri <jbuoncri@cisco.com>
This commit is contained in:
James 2021-10-07 11:15:34 -07:00 committed by GitHub
parent 9657b53257
commit d455409cc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 210 additions and 2 deletions

View File

@ -19,6 +19,7 @@
"Oracle",
"Athena",
"Presto",
"Trino",
"Vertica"
],
"javaEnums": [
@ -52,6 +53,9 @@
{
"name": "Presto"
},
{
"name": "Trino"
},
{
"name": "Vertica"
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

View File

@ -36,6 +36,7 @@ import snowflakes from '../assets/img/service-icon-snowflakes.png';
import mysql from '../assets/img/service-icon-sql.png';
import superset from '../assets/img/service-icon-superset.png';
import tableau from '../assets/img/service-icon-tableau.png';
import trino from '../assets/img/service-icon-trino.png';
import plus from '../assets/svg/plus.svg';
export const MYSQL = mysql;
@ -48,6 +49,7 @@ export const ORACLE = oracle;
export const SNOWFLAKE = snowflakes;
export const ATHENA = athena;
export const PRESTO = presto;
export const TRINO = trino;
export const KAFKA = kafka;
export const PULSAR = pulsar;
export const SUPERSET = superset;
@ -74,6 +76,7 @@ export const serviceTypes: Record<ServiceTypes, Array<string>> = {
'Oracle',
'Athena',
'Presto',
'Trino',
],
messagingServices: ['Kafka'],
dashboardServices: ['Superset', 'Looker', 'Tableau', 'Redash'],

View File

@ -33,6 +33,7 @@ export enum DatabaseServiceType {
MSSQL = 'MSSQL',
ATHENA = 'Athena',
PRESTO = 'Presto',
TRINO = 'Trino',
}
export enum MessagingServiceType {

View File

@ -76,4 +76,5 @@ export enum DatabaseServiceType {
Redshift = 'Redshift',
Snowflake = 'Snowflake',
Vertica = 'Vertica',
Trino = 'Trino',
}

View File

@ -94,4 +94,5 @@ export enum DatabaseServiceType {
Redshift = 'Redshift',
Snowflake = 'Snowflake',
Vertica = 'Vertica',
Trino = 'Trino',
}

View File

@ -23,6 +23,7 @@ import {
SNOWFLAKE,
SUPERSET,
TABLEAU,
TRINO,
} from '../constants/services.const';
import {
DashboardServiceType,
@ -64,6 +65,9 @@ export const serviceTypeLogo = (type: string) => {
case DatabaseServiceType.PRESTO:
return PRESTO;
case DatabaseServiceType.TRINO:
return TRINO;
case MessagingServiceType.KAFKA:
return KAFKA;
@ -206,6 +210,7 @@ export const getEntityCountByService = (buckets: Array<Bucket>) => {
case DatabaseServiceType.ORACLE:
case DatabaseServiceType.POSTGRES:
case DatabaseServiceType.PRESTO:
case DatabaseServiceType.TRINO:
case DatabaseServiceType.REDSHIFT:
case DatabaseServiceType.SNOWFLAKE:
entityCounts.tableCount += bucket.doc_count;

View File

@ -83,6 +83,7 @@
* [Oracle](install/metadata-ingestion/connectors/database-services/oracle.md)
* [Postgres](install/metadata-ingestion/connectors/database-services/postgres.md)
* [Presto](install/metadata-ingestion/connectors/database-services/presto.md)
* [Trino](install/metadata-ingestion/connectors/database-services/trino.md)
* [Redshift](install/metadata-ingestion/connectors/database-services/redshift.md)
* [Redshift Usage](install/metadata-ingestion/connectors/database-services/redshift-usage.md)
* [Snowflake](install/metadata-ingestion/connectors/database-services/snowflake.md)

View File

@ -0,0 +1,95 @@
---
description: This guide will help install Trino connector and run manually
---
# Trino
{% hint style="info" %}
**Prerequisites**
1. Python 3.7 or above
2. OpenMetadata Server up and running
{% endhint %}
### Install from PyPI or Source
{% tabs %}
{% tab title="Install Using PyPI" %}
```bash
pip install 'openmetadata-ingestion[trino]'
```
{% endtab %}
{% endtabs %}
## Run Manually
```bash
metadata ingest -c ./examples/workflows/trino.json
```
### Configuration
{% code title="trino.json" %}
```javascript
"source": {
"type": "trino",
"config": {
"service_name": "local_trino",
"host_port": "192.168.1.32:8080",
"database": "default"
}
}, ...
```
{% endcode %}
1. **username** - this is an optional configuration if you are using username/password with trino. Please use these fields to configure them
2. **password** - password for the username
3. **host_port** - host and port of the Trino cluster
4. **service_name** - Service Name for this Trino cluster. If you added the Trino cluster through OpenMetadata UI, make sure the service name matches the same.
5. **filter_pattern** - It contains includes, excludes options to choose which pattern of datasets you want to ingest into OpenMetadata
## Publish to OpenMetadata
Below is the configuration to publish Trino data into the OpenMeatadata service.
add `metadata-rest-tables` sink along with `metadata-server` config
{% code title="trino.json" %}
```javascript
{
"source": {
"type": "Trino",
"config": {
"service_name": "local_trino",
"host_port": "192.168.1.32:8080",
"database": "default"
}
},
"sink": {
"type": "metadata-rest",
"config": {
}
},
"metadata_server": {
"type": "metadata-server",
"config": {
"api_endpoint": "http://localhost:8585/api",
"auth_provider_type": "no-auth"
}
},
"cron": {
"minute": "*/5",
"hour": null,
"day": null,
"month": null,
"day_of_week": null
}
}
```
{% endcode %}

View File

@ -48,7 +48,6 @@ Type: `object`
9. _"Athena"_
10. _"Presto"_
11. _"Vertica"_
12. _"Trino"_
_This document was updated on: Thursday, September 16, 2021_

View File

@ -56,6 +56,7 @@ our roadmap yet, please file an Issue [Github](https://github.com/open-metadata/
### Other features
* Data quality - Data profiler integration work in progress
* Schema versioning
* Support for Trino
## 0.6 Release - Nov 17th, 2021

View File

@ -6,6 +6,13 @@
"pipelineUrl": "http://localhost:8080/tree?dag_id=presto_etl",
"tasks": ["presto_task", "assert_table_exists"]
},
{
"name": "trino_etl",
"displayName": "Trino ETL",
"description": "Trino ETL pipeline",
"pipelineUrl": "http://localhost:8080/tree?dag_id=trino_etl",
"tasks": ["trino_task", "assert_table_exists"]
},
{
"name": "hive_etl",
"displayName": "Hive ETL",

View File

@ -30,6 +30,14 @@
"taskUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
"downstreamTasks": ["assert_table_exists"],
"taskType": "PrestoOperator"
},
{
"name": "trino_task",
"displayName": "Trino Task",
"description": "Airflow operator to perform ETL on trino tables",
"taskUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
"downstreamTasks": ["assert_table_exists"],
"taskType": "TrinoOperator"
}
]
}

View File

@ -0,0 +1,29 @@
{
"source": {
"type": "trino",
"config": {
"service_name": "local_trino",
"host_port": "192.168.1.32:8080",
"database": "default"
}
},
"sink": {
"type": "metadata-rest",
"config": {
}
},
"metadata_server": {
"type": "metadata-server",
"config": {
"api_endpoint": "http://localhost:8585/api",
"auth_provider_type": "no-auth"
}
},
"cron": {
"minute": "*/5",
"hour": null,
"day": null,
"month": null,
"day_of_week": null
}
}

View File

@ -98,6 +98,7 @@ plugins: Dict[str, Set[str]] = {
"oracle": {"cx_Oracle"},
"pii-processor": pii_requirements,
"presto": {"pyhive~=0.6.3"},
"trino": {"sqlalchemy-trino"},
"postgres": {"pymysql>=1.0.2", "psycopg2-binary", "GeoAlchemy2"},
"redash": {"redash-toolbelt==0.1.4"},
"redshift": {"openmetadata-sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2"},

View File

@ -23,6 +23,7 @@ class DatabaseServiceType(Enum):
Oracle = 'Oracle'
Athena = 'Athena'
Presto = 'Presto'
Trino = 'Trino'
Vertica = 'Vertica'

View File

@ -64,6 +64,8 @@ def get_service_type_from_database_uri(uri: str) -> str:
return "snowflake"
if uri.startswith("presto"):
return "presto"
if uri.startswith("trino"):
return "trino"
if uri.startswith("postgresql"):
return "postgres"
if uri.startswith("pinot"):

View File

@ -0,0 +1,48 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from urllib.parse import quote_plus
from .sql_source import SQLSource, SQLConnectionConfig
from ..ometa.openmetadata_rest import MetadataServerConfig
class TrinoConfig(SQLConnectionConfig):
host_port = "localhost:8080"
scheme = "trino"
service_type = "Trino"
def get_connection_url(self):
url = f"{self.scheme}://"
if self.username:
url += f"{quote_plus(self.username)}"
if self.password:
url += f":{quote_plus(self.password)}"
url += f"{self.host_port}"
if self.database:
url += f"?schema={quote_plus(self.database)}"
return url
class TrinoSource(SQLSource):
def __init__(self, config, metadata_config, ctx):
super().__init__(config, metadata_config, ctx)
@classmethod
def create(cls, config_dict, metadata_config_dict, ctx):
config = TrinoConfig.parse_obj(config_dict)
metadata_config = MetadataServerConfig.parse_obj(metadata_config_dict)
return cls(config, metadata_config, ctx)

View File

@ -57,6 +57,7 @@ our roadmap yet, please file an Issue [Github](https://github.com/open-metadata/
### Other features
* Data quality - Data profiler integration work in progress
* Schema versioning
* Support for Trino
## 0.6 Release - Nov 17th, 2021