mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-04 20:49:54 +00:00
issue-696: Added trino support for Openmetadata (#697)
* issue-696: Added trino support for Openmetadata * issue-696: fixed linting issues * issue-696: not mentioning Trino for now as it will be part of 0.5 release Co-authored-by: jbuoncri <jbuoncri@cisco.com>
This commit is contained in:
parent
9657b53257
commit
d455409cc9
@ -19,6 +19,7 @@
|
|||||||
"Oracle",
|
"Oracle",
|
||||||
"Athena",
|
"Athena",
|
||||||
"Presto",
|
"Presto",
|
||||||
|
"Trino",
|
||||||
"Vertica"
|
"Vertica"
|
||||||
],
|
],
|
||||||
"javaEnums": [
|
"javaEnums": [
|
||||||
@ -52,6 +53,9 @@
|
|||||||
{
|
{
|
||||||
"name": "Presto"
|
"name": "Presto"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "Trino"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "Vertica"
|
"name": "Vertica"
|
||||||
}
|
}
|
||||||
|
|||||||
Binary file not shown.
|
After Width: | Height: | Size: 21 KiB |
@ -36,6 +36,7 @@ import snowflakes from '../assets/img/service-icon-snowflakes.png';
|
|||||||
import mysql from '../assets/img/service-icon-sql.png';
|
import mysql from '../assets/img/service-icon-sql.png';
|
||||||
import superset from '../assets/img/service-icon-superset.png';
|
import superset from '../assets/img/service-icon-superset.png';
|
||||||
import tableau from '../assets/img/service-icon-tableau.png';
|
import tableau from '../assets/img/service-icon-tableau.png';
|
||||||
|
import trino from '../assets/img/service-icon-trino.png';
|
||||||
import plus from '../assets/svg/plus.svg';
|
import plus from '../assets/svg/plus.svg';
|
||||||
|
|
||||||
export const MYSQL = mysql;
|
export const MYSQL = mysql;
|
||||||
@ -48,6 +49,7 @@ export const ORACLE = oracle;
|
|||||||
export const SNOWFLAKE = snowflakes;
|
export const SNOWFLAKE = snowflakes;
|
||||||
export const ATHENA = athena;
|
export const ATHENA = athena;
|
||||||
export const PRESTO = presto;
|
export const PRESTO = presto;
|
||||||
|
export const TRINO = trino;
|
||||||
export const KAFKA = kafka;
|
export const KAFKA = kafka;
|
||||||
export const PULSAR = pulsar;
|
export const PULSAR = pulsar;
|
||||||
export const SUPERSET = superset;
|
export const SUPERSET = superset;
|
||||||
@ -74,6 +76,7 @@ export const serviceTypes: Record<ServiceTypes, Array<string>> = {
|
|||||||
'Oracle',
|
'Oracle',
|
||||||
'Athena',
|
'Athena',
|
||||||
'Presto',
|
'Presto',
|
||||||
|
'Trino',
|
||||||
],
|
],
|
||||||
messagingServices: ['Kafka'],
|
messagingServices: ['Kafka'],
|
||||||
dashboardServices: ['Superset', 'Looker', 'Tableau', 'Redash'],
|
dashboardServices: ['Superset', 'Looker', 'Tableau', 'Redash'],
|
||||||
|
|||||||
@ -33,6 +33,7 @@ export enum DatabaseServiceType {
|
|||||||
MSSQL = 'MSSQL',
|
MSSQL = 'MSSQL',
|
||||||
ATHENA = 'Athena',
|
ATHENA = 'Athena',
|
||||||
PRESTO = 'Presto',
|
PRESTO = 'Presto',
|
||||||
|
TRINO = 'Trino',
|
||||||
}
|
}
|
||||||
|
|
||||||
export enum MessagingServiceType {
|
export enum MessagingServiceType {
|
||||||
|
|||||||
@ -76,4 +76,5 @@ export enum DatabaseServiceType {
|
|||||||
Redshift = 'Redshift',
|
Redshift = 'Redshift',
|
||||||
Snowflake = 'Snowflake',
|
Snowflake = 'Snowflake',
|
||||||
Vertica = 'Vertica',
|
Vertica = 'Vertica',
|
||||||
|
Trino = 'Trino',
|
||||||
}
|
}
|
||||||
|
|||||||
@ -94,4 +94,5 @@ export enum DatabaseServiceType {
|
|||||||
Redshift = 'Redshift',
|
Redshift = 'Redshift',
|
||||||
Snowflake = 'Snowflake',
|
Snowflake = 'Snowflake',
|
||||||
Vertica = 'Vertica',
|
Vertica = 'Vertica',
|
||||||
|
Trino = 'Trino',
|
||||||
}
|
}
|
||||||
|
|||||||
@ -23,6 +23,7 @@ import {
|
|||||||
SNOWFLAKE,
|
SNOWFLAKE,
|
||||||
SUPERSET,
|
SUPERSET,
|
||||||
TABLEAU,
|
TABLEAU,
|
||||||
|
TRINO,
|
||||||
} from '../constants/services.const';
|
} from '../constants/services.const';
|
||||||
import {
|
import {
|
||||||
DashboardServiceType,
|
DashboardServiceType,
|
||||||
@ -64,6 +65,9 @@ export const serviceTypeLogo = (type: string) => {
|
|||||||
case DatabaseServiceType.PRESTO:
|
case DatabaseServiceType.PRESTO:
|
||||||
return PRESTO;
|
return PRESTO;
|
||||||
|
|
||||||
|
case DatabaseServiceType.TRINO:
|
||||||
|
return TRINO;
|
||||||
|
|
||||||
case MessagingServiceType.KAFKA:
|
case MessagingServiceType.KAFKA:
|
||||||
return KAFKA;
|
return KAFKA;
|
||||||
|
|
||||||
@ -206,6 +210,7 @@ export const getEntityCountByService = (buckets: Array<Bucket>) => {
|
|||||||
case DatabaseServiceType.ORACLE:
|
case DatabaseServiceType.ORACLE:
|
||||||
case DatabaseServiceType.POSTGRES:
|
case DatabaseServiceType.POSTGRES:
|
||||||
case DatabaseServiceType.PRESTO:
|
case DatabaseServiceType.PRESTO:
|
||||||
|
case DatabaseServiceType.TRINO:
|
||||||
case DatabaseServiceType.REDSHIFT:
|
case DatabaseServiceType.REDSHIFT:
|
||||||
case DatabaseServiceType.SNOWFLAKE:
|
case DatabaseServiceType.SNOWFLAKE:
|
||||||
entityCounts.tableCount += bucket.doc_count;
|
entityCounts.tableCount += bucket.doc_count;
|
||||||
|
|||||||
@ -83,6 +83,7 @@
|
|||||||
* [Oracle](install/metadata-ingestion/connectors/database-services/oracle.md)
|
* [Oracle](install/metadata-ingestion/connectors/database-services/oracle.md)
|
||||||
* [Postgres](install/metadata-ingestion/connectors/database-services/postgres.md)
|
* [Postgres](install/metadata-ingestion/connectors/database-services/postgres.md)
|
||||||
* [Presto](install/metadata-ingestion/connectors/database-services/presto.md)
|
* [Presto](install/metadata-ingestion/connectors/database-services/presto.md)
|
||||||
|
* [Trino](install/metadata-ingestion/connectors/database-services/trino.md)
|
||||||
* [Redshift](install/metadata-ingestion/connectors/database-services/redshift.md)
|
* [Redshift](install/metadata-ingestion/connectors/database-services/redshift.md)
|
||||||
* [Redshift Usage](install/metadata-ingestion/connectors/database-services/redshift-usage.md)
|
* [Redshift Usage](install/metadata-ingestion/connectors/database-services/redshift-usage.md)
|
||||||
* [Snowflake](install/metadata-ingestion/connectors/database-services/snowflake.md)
|
* [Snowflake](install/metadata-ingestion/connectors/database-services/snowflake.md)
|
||||||
|
|||||||
@ -0,0 +1,95 @@
|
|||||||
|
---
|
||||||
|
description: This guide will help install Trino connector and run manually
|
||||||
|
---
|
||||||
|
|
||||||
|
# Trino
|
||||||
|
|
||||||
|
{% hint style="info" %}
|
||||||
|
**Prerequisites**
|
||||||
|
|
||||||
|
1. Python 3.7 or above
|
||||||
|
2. OpenMetadata Server up and running
|
||||||
|
{% endhint %}
|
||||||
|
|
||||||
|
### Install from PyPI or Source
|
||||||
|
|
||||||
|
{% tabs %}
|
||||||
|
{% tab title="Install Using PyPI" %}
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install 'openmetadata-ingestion[trino]'
|
||||||
|
```
|
||||||
|
|
||||||
|
{% endtab %}
|
||||||
|
{% endtabs %}
|
||||||
|
|
||||||
|
## Run Manually
|
||||||
|
|
||||||
|
```bash
|
||||||
|
metadata ingest -c ./examples/workflows/trino.json
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
{% code title="trino.json" %}
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
"source": {
|
||||||
|
"type": "trino",
|
||||||
|
"config": {
|
||||||
|
"service_name": "local_trino",
|
||||||
|
"host_port": "192.168.1.32:8080",
|
||||||
|
"database": "default"
|
||||||
|
}
|
||||||
|
}, ...
|
||||||
|
```
|
||||||
|
|
||||||
|
{% endcode %}
|
||||||
|
|
||||||
|
1. **username** - this is an optional configuration if you are using username/password with trino. Please use these fields to configure them
|
||||||
|
2. **password** - password for the username
|
||||||
|
3. **host_port** - host and port of the Trino cluster
|
||||||
|
4. **service_name** - Service Name for this Trino cluster. If you added the Trino cluster through OpenMetadata UI, make sure the service name matches the same.
|
||||||
|
5. **filter_pattern** - It contains includes, excludes options to choose which pattern of datasets you want to ingest into OpenMetadata
|
||||||
|
|
||||||
|
## Publish to OpenMetadata
|
||||||
|
|
||||||
|
Below is the configuration to publish Trino data into the OpenMeatadata service.
|
||||||
|
|
||||||
|
add `metadata-rest-tables` sink along with `metadata-server` config
|
||||||
|
|
||||||
|
{% code title="trino.json" %}
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
{
|
||||||
|
"source": {
|
||||||
|
"type": "Trino",
|
||||||
|
"config": {
|
||||||
|
"service_name": "local_trino",
|
||||||
|
"host_port": "192.168.1.32:8080",
|
||||||
|
"database": "default"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"sink": {
|
||||||
|
"type": "metadata-rest",
|
||||||
|
"config": {
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metadata_server": {
|
||||||
|
"type": "metadata-server",
|
||||||
|
"config": {
|
||||||
|
"api_endpoint": "http://localhost:8585/api",
|
||||||
|
"auth_provider_type": "no-auth"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cron": {
|
||||||
|
"minute": "*/5",
|
||||||
|
"hour": null,
|
||||||
|
"day": null,
|
||||||
|
"month": null,
|
||||||
|
"day_of_week": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
{% endcode %}
|
||||||
@ -48,7 +48,6 @@ Type: `object`
|
|||||||
9. _"Athena"_
|
9. _"Athena"_
|
||||||
10. _"Presto"_
|
10. _"Presto"_
|
||||||
11. _"Vertica"_
|
11. _"Vertica"_
|
||||||
|
12. _"Trino"_
|
||||||
|
|
||||||
|
|
||||||
_This document was updated on: Thursday, September 16, 2021_
|
_This document was updated on: Thursday, September 16, 2021_
|
||||||
@ -56,6 +56,7 @@ our roadmap yet, please file an Issue [Github](https://github.com/open-metadata/
|
|||||||
### Other features
|
### Other features
|
||||||
* Data quality - Data profiler integration work in progress
|
* Data quality - Data profiler integration work in progress
|
||||||
* Schema versioning
|
* Schema versioning
|
||||||
|
* Support for Trino
|
||||||
|
|
||||||
|
|
||||||
## 0.6 Release - Nov 17th, 2021
|
## 0.6 Release - Nov 17th, 2021
|
||||||
|
|||||||
@ -6,6 +6,13 @@
|
|||||||
"pipelineUrl": "http://localhost:8080/tree?dag_id=presto_etl",
|
"pipelineUrl": "http://localhost:8080/tree?dag_id=presto_etl",
|
||||||
"tasks": ["presto_task", "assert_table_exists"]
|
"tasks": ["presto_task", "assert_table_exists"]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "trino_etl",
|
||||||
|
"displayName": "Trino ETL",
|
||||||
|
"description": "Trino ETL pipeline",
|
||||||
|
"pipelineUrl": "http://localhost:8080/tree?dag_id=trino_etl",
|
||||||
|
"tasks": ["trino_task", "assert_table_exists"]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "hive_etl",
|
"name": "hive_etl",
|
||||||
"displayName": "Hive ETL",
|
"displayName": "Hive ETL",
|
||||||
|
|||||||
@ -30,6 +30,14 @@
|
|||||||
"taskUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
|
"taskUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
|
||||||
"downstreamTasks": ["assert_table_exists"],
|
"downstreamTasks": ["assert_table_exists"],
|
||||||
"taskType": "PrestoOperator"
|
"taskType": "PrestoOperator"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "trino_task",
|
||||||
|
"displayName": "Trino Task",
|
||||||
|
"description": "Airflow operator to perform ETL on trino tables",
|
||||||
|
"taskUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
|
||||||
|
"downstreamTasks": ["assert_table_exists"],
|
||||||
|
"taskType": "TrinoOperator"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
29
ingestion/examples/workflows/trino.json
Normal file
29
ingestion/examples/workflows/trino.json
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
{
|
||||||
|
"source": {
|
||||||
|
"type": "trino",
|
||||||
|
"config": {
|
||||||
|
"service_name": "local_trino",
|
||||||
|
"host_port": "192.168.1.32:8080",
|
||||||
|
"database": "default"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"sink": {
|
||||||
|
"type": "metadata-rest",
|
||||||
|
"config": {
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metadata_server": {
|
||||||
|
"type": "metadata-server",
|
||||||
|
"config": {
|
||||||
|
"api_endpoint": "http://localhost:8585/api",
|
||||||
|
"auth_provider_type": "no-auth"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cron": {
|
||||||
|
"minute": "*/5",
|
||||||
|
"hour": null,
|
||||||
|
"day": null,
|
||||||
|
"month": null,
|
||||||
|
"day_of_week": null
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -98,6 +98,7 @@ plugins: Dict[str, Set[str]] = {
|
|||||||
"oracle": {"cx_Oracle"},
|
"oracle": {"cx_Oracle"},
|
||||||
"pii-processor": pii_requirements,
|
"pii-processor": pii_requirements,
|
||||||
"presto": {"pyhive~=0.6.3"},
|
"presto": {"pyhive~=0.6.3"},
|
||||||
|
"trino": {"sqlalchemy-trino"},
|
||||||
"postgres": {"pymysql>=1.0.2", "psycopg2-binary", "GeoAlchemy2"},
|
"postgres": {"pymysql>=1.0.2", "psycopg2-binary", "GeoAlchemy2"},
|
||||||
"redash": {"redash-toolbelt==0.1.4"},
|
"redash": {"redash-toolbelt==0.1.4"},
|
||||||
"redshift": {"openmetadata-sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2"},
|
"redshift": {"openmetadata-sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2"},
|
||||||
|
|||||||
@ -23,6 +23,7 @@ class DatabaseServiceType(Enum):
|
|||||||
Oracle = 'Oracle'
|
Oracle = 'Oracle'
|
||||||
Athena = 'Athena'
|
Athena = 'Athena'
|
||||||
Presto = 'Presto'
|
Presto = 'Presto'
|
||||||
|
Trino = 'Trino'
|
||||||
Vertica = 'Vertica'
|
Vertica = 'Vertica'
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -64,6 +64,8 @@ def get_service_type_from_database_uri(uri: str) -> str:
|
|||||||
return "snowflake"
|
return "snowflake"
|
||||||
if uri.startswith("presto"):
|
if uri.startswith("presto"):
|
||||||
return "presto"
|
return "presto"
|
||||||
|
if uri.startswith("trino"):
|
||||||
|
return "trino"
|
||||||
if uri.startswith("postgresql"):
|
if uri.startswith("postgresql"):
|
||||||
return "postgres"
|
return "postgres"
|
||||||
if uri.startswith("pinot"):
|
if uri.startswith("pinot"):
|
||||||
|
|||||||
48
ingestion/src/metadata/ingestion/source/trino.py
Normal file
48
ingestion/src/metadata/ingestion/source/trino.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
from urllib.parse import quote_plus
|
||||||
|
|
||||||
|
from .sql_source import SQLSource, SQLConnectionConfig
|
||||||
|
from ..ometa.openmetadata_rest import MetadataServerConfig
|
||||||
|
|
||||||
|
|
||||||
|
class TrinoConfig(SQLConnectionConfig):
|
||||||
|
host_port = "localhost:8080"
|
||||||
|
scheme = "trino"
|
||||||
|
service_type = "Trino"
|
||||||
|
|
||||||
|
def get_connection_url(self):
|
||||||
|
url = f"{self.scheme}://"
|
||||||
|
if self.username:
|
||||||
|
url += f"{quote_plus(self.username)}"
|
||||||
|
if self.password:
|
||||||
|
url += f":{quote_plus(self.password)}"
|
||||||
|
url += f"{self.host_port}"
|
||||||
|
if self.database:
|
||||||
|
url += f"?schema={quote_plus(self.database)}"
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TrinoSource(SQLSource):
|
||||||
|
def __init__(self, config, metadata_config, ctx):
|
||||||
|
super().__init__(config, metadata_config, ctx)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def create(cls, config_dict, metadata_config_dict, ctx):
|
||||||
|
config = TrinoConfig.parse_obj(config_dict)
|
||||||
|
metadata_config = MetadataServerConfig.parse_obj(metadata_config_dict)
|
||||||
|
return cls(config, metadata_config, ctx)
|
||||||
@ -57,6 +57,7 @@ our roadmap yet, please file an Issue [Github](https://github.com/open-metadata/
|
|||||||
### Other features
|
### Other features
|
||||||
* Data quality - Data profiler integration work in progress
|
* Data quality - Data profiler integration work in progress
|
||||||
* Schema versioning
|
* Schema versioning
|
||||||
|
* Support for Trino
|
||||||
|
|
||||||
|
|
||||||
## 0.6 Release - Nov 17th, 2021
|
## 0.6 Release - Nov 17th, 2021
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user