mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-01 11:09:14 +00:00
issue-696: Added trino support for Openmetadata (#697)
* issue-696: Added trino support for Openmetadata * issue-696: fixed linting issues * issue-696: not mentioning Trino for now as it will be part of 0.5 release Co-authored-by: jbuoncri <jbuoncri@cisco.com>
This commit is contained in:
parent
9657b53257
commit
d455409cc9
@ -19,6 +19,7 @@
|
||||
"Oracle",
|
||||
"Athena",
|
||||
"Presto",
|
||||
"Trino",
|
||||
"Vertica"
|
||||
],
|
||||
"javaEnums": [
|
||||
@ -52,6 +53,9 @@
|
||||
{
|
||||
"name": "Presto"
|
||||
},
|
||||
{
|
||||
"name": "Trino"
|
||||
},
|
||||
{
|
||||
"name": "Vertica"
|
||||
}
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 21 KiB |
@ -36,6 +36,7 @@ import snowflakes from '../assets/img/service-icon-snowflakes.png';
|
||||
import mysql from '../assets/img/service-icon-sql.png';
|
||||
import superset from '../assets/img/service-icon-superset.png';
|
||||
import tableau from '../assets/img/service-icon-tableau.png';
|
||||
import trino from '../assets/img/service-icon-trino.png';
|
||||
import plus from '../assets/svg/plus.svg';
|
||||
|
||||
export const MYSQL = mysql;
|
||||
@ -48,6 +49,7 @@ export const ORACLE = oracle;
|
||||
export const SNOWFLAKE = snowflakes;
|
||||
export const ATHENA = athena;
|
||||
export const PRESTO = presto;
|
||||
export const TRINO = trino;
|
||||
export const KAFKA = kafka;
|
||||
export const PULSAR = pulsar;
|
||||
export const SUPERSET = superset;
|
||||
@ -74,6 +76,7 @@ export const serviceTypes: Record<ServiceTypes, Array<string>> = {
|
||||
'Oracle',
|
||||
'Athena',
|
||||
'Presto',
|
||||
'Trino',
|
||||
],
|
||||
messagingServices: ['Kafka'],
|
||||
dashboardServices: ['Superset', 'Looker', 'Tableau', 'Redash'],
|
||||
|
||||
@ -33,6 +33,7 @@ export enum DatabaseServiceType {
|
||||
MSSQL = 'MSSQL',
|
||||
ATHENA = 'Athena',
|
||||
PRESTO = 'Presto',
|
||||
TRINO = 'Trino',
|
||||
}
|
||||
|
||||
export enum MessagingServiceType {
|
||||
|
||||
@ -76,4 +76,5 @@ export enum DatabaseServiceType {
|
||||
Redshift = 'Redshift',
|
||||
Snowflake = 'Snowflake',
|
||||
Vertica = 'Vertica',
|
||||
Trino = 'Trino',
|
||||
}
|
||||
|
||||
@ -94,4 +94,5 @@ export enum DatabaseServiceType {
|
||||
Redshift = 'Redshift',
|
||||
Snowflake = 'Snowflake',
|
||||
Vertica = 'Vertica',
|
||||
Trino = 'Trino',
|
||||
}
|
||||
|
||||
@ -23,6 +23,7 @@ import {
|
||||
SNOWFLAKE,
|
||||
SUPERSET,
|
||||
TABLEAU,
|
||||
TRINO,
|
||||
} from '../constants/services.const';
|
||||
import {
|
||||
DashboardServiceType,
|
||||
@ -64,6 +65,9 @@ export const serviceTypeLogo = (type: string) => {
|
||||
case DatabaseServiceType.PRESTO:
|
||||
return PRESTO;
|
||||
|
||||
case DatabaseServiceType.TRINO:
|
||||
return TRINO;
|
||||
|
||||
case MessagingServiceType.KAFKA:
|
||||
return KAFKA;
|
||||
|
||||
@ -206,6 +210,7 @@ export const getEntityCountByService = (buckets: Array<Bucket>) => {
|
||||
case DatabaseServiceType.ORACLE:
|
||||
case DatabaseServiceType.POSTGRES:
|
||||
case DatabaseServiceType.PRESTO:
|
||||
case DatabaseServiceType.TRINO:
|
||||
case DatabaseServiceType.REDSHIFT:
|
||||
case DatabaseServiceType.SNOWFLAKE:
|
||||
entityCounts.tableCount += bucket.doc_count;
|
||||
|
||||
@ -83,6 +83,7 @@
|
||||
* [Oracle](install/metadata-ingestion/connectors/database-services/oracle.md)
|
||||
* [Postgres](install/metadata-ingestion/connectors/database-services/postgres.md)
|
||||
* [Presto](install/metadata-ingestion/connectors/database-services/presto.md)
|
||||
* [Trino](install/metadata-ingestion/connectors/database-services/trino.md)
|
||||
* [Redshift](install/metadata-ingestion/connectors/database-services/redshift.md)
|
||||
* [Redshift Usage](install/metadata-ingestion/connectors/database-services/redshift-usage.md)
|
||||
* [Snowflake](install/metadata-ingestion/connectors/database-services/snowflake.md)
|
||||
|
||||
@ -0,0 +1,95 @@
|
||||
---
|
||||
description: This guide will help install Trino connector and run manually
|
||||
---
|
||||
|
||||
# Trino
|
||||
|
||||
{% hint style="info" %}
|
||||
**Prerequisites**
|
||||
|
||||
1. Python 3.7 or above
|
||||
2. OpenMetadata Server up and running
|
||||
{% endhint %}
|
||||
|
||||
### Install from PyPI or Source
|
||||
|
||||
{% tabs %}
|
||||
{% tab title="Install Using PyPI" %}
|
||||
|
||||
```bash
|
||||
pip install 'openmetadata-ingestion[trino]'
|
||||
```
|
||||
|
||||
{% endtab %}
|
||||
{% endtabs %}
|
||||
|
||||
## Run Manually
|
||||
|
||||
```bash
|
||||
metadata ingest -c ./examples/workflows/trino.json
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
{% code title="trino.json" %}
|
||||
|
||||
```javascript
|
||||
"source": {
|
||||
"type": "trino",
|
||||
"config": {
|
||||
"service_name": "local_trino",
|
||||
"host_port": "192.168.1.32:8080",
|
||||
"database": "default"
|
||||
}
|
||||
}, ...
|
||||
```
|
||||
|
||||
{% endcode %}
|
||||
|
||||
1. **username** - this is an optional configuration if you are using username/password with trino. Please use these fields to configure them
|
||||
2. **password** - password for the username
|
||||
3. **host_port** - host and port of the Trino cluster
|
||||
4. **service_name** - Service Name for this Trino cluster. If you added the Trino cluster through OpenMetadata UI, make sure the service name matches the same.
|
||||
5. **filter_pattern** - It contains includes, excludes options to choose which pattern of datasets you want to ingest into OpenMetadata
|
||||
|
||||
## Publish to OpenMetadata
|
||||
|
||||
Below is the configuration to publish Trino data into the OpenMeatadata service.
|
||||
|
||||
add `metadata-rest-tables` sink along with `metadata-server` config
|
||||
|
||||
{% code title="trino.json" %}
|
||||
|
||||
```javascript
|
||||
{
|
||||
"source": {
|
||||
"type": "Trino",
|
||||
"config": {
|
||||
"service_name": "local_trino",
|
||||
"host_port": "192.168.1.32:8080",
|
||||
"database": "default"
|
||||
}
|
||||
},
|
||||
"sink": {
|
||||
"type": "metadata-rest",
|
||||
"config": {
|
||||
}
|
||||
},
|
||||
"metadata_server": {
|
||||
"type": "metadata-server",
|
||||
"config": {
|
||||
"api_endpoint": "http://localhost:8585/api",
|
||||
"auth_provider_type": "no-auth"
|
||||
}
|
||||
},
|
||||
"cron": {
|
||||
"minute": "*/5",
|
||||
"hour": null,
|
||||
"day": null,
|
||||
"month": null,
|
||||
"day_of_week": null
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
{% endcode %}
|
||||
@ -48,7 +48,6 @@ Type: `object`
|
||||
9. _"Athena"_
|
||||
10. _"Presto"_
|
||||
11. _"Vertica"_
|
||||
|
||||
|
||||
12. _"Trino"_
|
||||
|
||||
_This document was updated on: Thursday, September 16, 2021_
|
||||
@ -56,6 +56,7 @@ our roadmap yet, please file an Issue [Github](https://github.com/open-metadata/
|
||||
### Other features
|
||||
* Data quality - Data profiler integration work in progress
|
||||
* Schema versioning
|
||||
* Support for Trino
|
||||
|
||||
|
||||
## 0.6 Release - Nov 17th, 2021
|
||||
|
||||
@ -6,6 +6,13 @@
|
||||
"pipelineUrl": "http://localhost:8080/tree?dag_id=presto_etl",
|
||||
"tasks": ["presto_task", "assert_table_exists"]
|
||||
},
|
||||
{
|
||||
"name": "trino_etl",
|
||||
"displayName": "Trino ETL",
|
||||
"description": "Trino ETL pipeline",
|
||||
"pipelineUrl": "http://localhost:8080/tree?dag_id=trino_etl",
|
||||
"tasks": ["trino_task", "assert_table_exists"]
|
||||
},
|
||||
{
|
||||
"name": "hive_etl",
|
||||
"displayName": "Hive ETL",
|
||||
|
||||
@ -30,6 +30,14 @@
|
||||
"taskUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
|
||||
"downstreamTasks": ["assert_table_exists"],
|
||||
"taskType": "PrestoOperator"
|
||||
},
|
||||
{
|
||||
"name": "trino_task",
|
||||
"displayName": "Trino Task",
|
||||
"description": "Airflow operator to perform ETL on trino tables",
|
||||
"taskUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
|
||||
"downstreamTasks": ["assert_table_exists"],
|
||||
"taskType": "TrinoOperator"
|
||||
}
|
||||
]
|
||||
}
|
||||
29
ingestion/examples/workflows/trino.json
Normal file
29
ingestion/examples/workflows/trino.json
Normal file
@ -0,0 +1,29 @@
|
||||
{
|
||||
"source": {
|
||||
"type": "trino",
|
||||
"config": {
|
||||
"service_name": "local_trino",
|
||||
"host_port": "192.168.1.32:8080",
|
||||
"database": "default"
|
||||
}
|
||||
},
|
||||
"sink": {
|
||||
"type": "metadata-rest",
|
||||
"config": {
|
||||
}
|
||||
},
|
||||
"metadata_server": {
|
||||
"type": "metadata-server",
|
||||
"config": {
|
||||
"api_endpoint": "http://localhost:8585/api",
|
||||
"auth_provider_type": "no-auth"
|
||||
}
|
||||
},
|
||||
"cron": {
|
||||
"minute": "*/5",
|
||||
"hour": null,
|
||||
"day": null,
|
||||
"month": null,
|
||||
"day_of_week": null
|
||||
}
|
||||
}
|
||||
@ -98,6 +98,7 @@ plugins: Dict[str, Set[str]] = {
|
||||
"oracle": {"cx_Oracle"},
|
||||
"pii-processor": pii_requirements,
|
||||
"presto": {"pyhive~=0.6.3"},
|
||||
"trino": {"sqlalchemy-trino"},
|
||||
"postgres": {"pymysql>=1.0.2", "psycopg2-binary", "GeoAlchemy2"},
|
||||
"redash": {"redash-toolbelt==0.1.4"},
|
||||
"redshift": {"openmetadata-sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2"},
|
||||
|
||||
@ -23,6 +23,7 @@ class DatabaseServiceType(Enum):
|
||||
Oracle = 'Oracle'
|
||||
Athena = 'Athena'
|
||||
Presto = 'Presto'
|
||||
Trino = 'Trino'
|
||||
Vertica = 'Vertica'
|
||||
|
||||
|
||||
|
||||
@ -64,6 +64,8 @@ def get_service_type_from_database_uri(uri: str) -> str:
|
||||
return "snowflake"
|
||||
if uri.startswith("presto"):
|
||||
return "presto"
|
||||
if uri.startswith("trino"):
|
||||
return "trino"
|
||||
if uri.startswith("postgresql"):
|
||||
return "postgres"
|
||||
if uri.startswith("pinot"):
|
||||
|
||||
48
ingestion/src/metadata/ingestion/source/trino.py
Normal file
48
ingestion/src/metadata/ingestion/source/trino.py
Normal file
@ -0,0 +1,48 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
from .sql_source import SQLSource, SQLConnectionConfig
|
||||
from ..ometa.openmetadata_rest import MetadataServerConfig
|
||||
|
||||
|
||||
class TrinoConfig(SQLConnectionConfig):
|
||||
host_port = "localhost:8080"
|
||||
scheme = "trino"
|
||||
service_type = "Trino"
|
||||
|
||||
def get_connection_url(self):
|
||||
url = f"{self.scheme}://"
|
||||
if self.username:
|
||||
url += f"{quote_plus(self.username)}"
|
||||
if self.password:
|
||||
url += f":{quote_plus(self.password)}"
|
||||
url += f"{self.host_port}"
|
||||
if self.database:
|
||||
url += f"?schema={quote_plus(self.database)}"
|
||||
return url
|
||||
|
||||
|
||||
|
||||
|
||||
class TrinoSource(SQLSource):
|
||||
def __init__(self, config, metadata_config, ctx):
|
||||
super().__init__(config, metadata_config, ctx)
|
||||
|
||||
@classmethod
|
||||
def create(cls, config_dict, metadata_config_dict, ctx):
|
||||
config = TrinoConfig.parse_obj(config_dict)
|
||||
metadata_config = MetadataServerConfig.parse_obj(metadata_config_dict)
|
||||
return cls(config, metadata_config, ctx)
|
||||
@ -57,6 +57,7 @@ our roadmap yet, please file an Issue [Github](https://github.com/open-metadata/
|
||||
### Other features
|
||||
* Data quality - Data profiler integration work in progress
|
||||
* Schema versioning
|
||||
* Support for Trino
|
||||
|
||||
|
||||
## 0.6 Release - Nov 17th, 2021
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user