diff --git a/catalog-rest-service/src/main/resources/json/schema/entity/services/databaseService.json b/catalog-rest-service/src/main/resources/json/schema/entity/services/databaseService.json index dc87653e184..3af01dcaae8 100644 --- a/catalog-rest-service/src/main/resources/json/schema/entity/services/databaseService.json +++ b/catalog-rest-service/src/main/resources/json/schema/entity/services/databaseService.json @@ -19,6 +19,7 @@ "Oracle", "Athena", "Presto", + "Trino", "Vertica" ], "javaEnums": [ @@ -52,6 +53,9 @@ { "name": "Presto" }, + { + "name": "Trino" + }, { "name": "Vertica" } diff --git a/catalog-rest-service/src/main/resources/ui/src/assets/img/service-icon-trino.png b/catalog-rest-service/src/main/resources/ui/src/assets/img/service-icon-trino.png new file mode 100644 index 00000000000..ffbd8047406 Binary files /dev/null and b/catalog-rest-service/src/main/resources/ui/src/assets/img/service-icon-trino.png differ diff --git a/catalog-rest-service/src/main/resources/ui/src/constants/services.const.ts b/catalog-rest-service/src/main/resources/ui/src/constants/services.const.ts index e1247825db2..7cbb98d83af 100644 --- a/catalog-rest-service/src/main/resources/ui/src/constants/services.const.ts +++ b/catalog-rest-service/src/main/resources/ui/src/constants/services.const.ts @@ -36,6 +36,7 @@ import snowflakes from '../assets/img/service-icon-snowflakes.png'; import mysql from '../assets/img/service-icon-sql.png'; import superset from '../assets/img/service-icon-superset.png'; import tableau from '../assets/img/service-icon-tableau.png'; +import trino from '../assets/img/service-icon-trino.png'; import plus from '../assets/svg/plus.svg'; export const MYSQL = mysql; @@ -48,6 +49,7 @@ export const ORACLE = oracle; export const SNOWFLAKE = snowflakes; export const ATHENA = athena; export const PRESTO = presto; +export const TRINO = trino; export const KAFKA = kafka; export const PULSAR = pulsar; export const SUPERSET = superset; @@ -74,6 +76,7 @@ export const serviceTypes: Record> = { 'Oracle', 'Athena', 'Presto', + 'Trino', ], messagingServices: ['Kafka'], dashboardServices: ['Superset', 'Looker', 'Tableau', 'Redash'], diff --git a/catalog-rest-service/src/main/resources/ui/src/enums/service.enum.ts b/catalog-rest-service/src/main/resources/ui/src/enums/service.enum.ts index 5d6353c8f28..6d73ce065e2 100644 --- a/catalog-rest-service/src/main/resources/ui/src/enums/service.enum.ts +++ b/catalog-rest-service/src/main/resources/ui/src/enums/service.enum.ts @@ -33,6 +33,7 @@ export enum DatabaseServiceType { MSSQL = 'MSSQL', ATHENA = 'Athena', PRESTO = 'Presto', + TRINO = 'Trino', } export enum MessagingServiceType { diff --git a/catalog-rest-service/src/main/resources/ui/src/generated/api/services/createDatabaseService.ts b/catalog-rest-service/src/main/resources/ui/src/generated/api/services/createDatabaseService.ts index 8e611a2210f..fbaa2b59a83 100644 --- a/catalog-rest-service/src/main/resources/ui/src/generated/api/services/createDatabaseService.ts +++ b/catalog-rest-service/src/main/resources/ui/src/generated/api/services/createDatabaseService.ts @@ -76,4 +76,5 @@ export enum DatabaseServiceType { Redshift = 'Redshift', Snowflake = 'Snowflake', Vertica = 'Vertica', + Trino = 'Trino', } diff --git a/catalog-rest-service/src/main/resources/ui/src/generated/entity/services/databaseService.ts b/catalog-rest-service/src/main/resources/ui/src/generated/entity/services/databaseService.ts index 0ee0eb186ff..abc200c0eb6 100644 --- a/catalog-rest-service/src/main/resources/ui/src/generated/entity/services/databaseService.ts +++ b/catalog-rest-service/src/main/resources/ui/src/generated/entity/services/databaseService.ts @@ -94,4 +94,5 @@ export enum DatabaseServiceType { Redshift = 'Redshift', Snowflake = 'Snowflake', Vertica = 'Vertica', + Trino = 'Trino', } diff --git a/catalog-rest-service/src/main/resources/ui/src/utils/ServiceUtils.ts b/catalog-rest-service/src/main/resources/ui/src/utils/ServiceUtils.ts index 96305e6afcf..ae883f7456f 100644 --- a/catalog-rest-service/src/main/resources/ui/src/utils/ServiceUtils.ts +++ b/catalog-rest-service/src/main/resources/ui/src/utils/ServiceUtils.ts @@ -23,6 +23,7 @@ import { SNOWFLAKE, SUPERSET, TABLEAU, + TRINO, } from '../constants/services.const'; import { DashboardServiceType, @@ -64,6 +65,9 @@ export const serviceTypeLogo = (type: string) => { case DatabaseServiceType.PRESTO: return PRESTO; + case DatabaseServiceType.TRINO: + return TRINO; + case MessagingServiceType.KAFKA: return KAFKA; @@ -206,6 +210,7 @@ export const getEntityCountByService = (buckets: Array) => { case DatabaseServiceType.ORACLE: case DatabaseServiceType.POSTGRES: case DatabaseServiceType.PRESTO: + case DatabaseServiceType.TRINO: case DatabaseServiceType.REDSHIFT: case DatabaseServiceType.SNOWFLAKE: entityCounts.tableCount += bucket.doc_count; diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 73b61b1bad1..ab1b464b1de 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -83,6 +83,7 @@ * [Oracle](install/metadata-ingestion/connectors/database-services/oracle.md) * [Postgres](install/metadata-ingestion/connectors/database-services/postgres.md) * [Presto](install/metadata-ingestion/connectors/database-services/presto.md) + * [Trino](install/metadata-ingestion/connectors/database-services/trino.md) * [Redshift](install/metadata-ingestion/connectors/database-services/redshift.md) * [Redshift Usage](install/metadata-ingestion/connectors/database-services/redshift-usage.md) * [Snowflake](install/metadata-ingestion/connectors/database-services/snowflake.md) diff --git a/docs/install/metadata-ingestion/connectors/database-services/trino.md b/docs/install/metadata-ingestion/connectors/database-services/trino.md new file mode 100644 index 00000000000..a20221dd4d5 --- /dev/null +++ b/docs/install/metadata-ingestion/connectors/database-services/trino.md @@ -0,0 +1,95 @@ +--- +description: This guide will help install Trino connector and run manually +--- + +# Trino + +{% hint style="info" %} +**Prerequisites** + +1. Python 3.7 or above +2. OpenMetadata Server up and running + {% endhint %} + +### Install from PyPI or Source + +{% tabs %} +{% tab title="Install Using PyPI" %} + +```bash +pip install 'openmetadata-ingestion[trino]' +``` + +{% endtab %} +{% endtabs %} + +## Run Manually + +```bash +metadata ingest -c ./examples/workflows/trino.json +``` + +### Configuration + +{% code title="trino.json" %} + +```javascript + "source": { + "type": "trino", + "config": { + "service_name": "local_trino", + "host_port": "192.168.1.32:8080", + "database": "default" + } + }, ... +``` + +{% endcode %} + +1. **username** - this is an optional configuration if you are using username/password with trino. Please use these fields to configure them +2. **password** - password for the username +3. **host_port** - host and port of the Trino cluster +4. **service_name** - Service Name for this Trino cluster. If you added the Trino cluster through OpenMetadata UI, make sure the service name matches the same. +5. **filter_pattern** - It contains includes, excludes options to choose which pattern of datasets you want to ingest into OpenMetadata + +## Publish to OpenMetadata + +Below is the configuration to publish Trino data into the OpenMeatadata service. + +add `metadata-rest-tables` sink along with `metadata-server` config + +{% code title="trino.json" %} + +```javascript +{ + "source": { + "type": "Trino", + "config": { + "service_name": "local_trino", + "host_port": "192.168.1.32:8080", + "database": "default" + } + }, + "sink": { + "type": "metadata-rest", + "config": { + } + }, + "metadata_server": { + "type": "metadata-server", + "config": { + "api_endpoint": "http://localhost:8585/api", + "auth_provider_type": "no-auth" + } + }, + "cron": { + "minute": "*/5", + "hour": null, + "day": null, + "month": null, + "day_of_week": null + } +} +``` + +{% endcode %} diff --git a/docs/openmetadata-apis/schemas/entities/databaseservice.md b/docs/openmetadata-apis/schemas/entities/databaseservice.md index 1f65df0418c..650dade7222 100644 --- a/docs/openmetadata-apis/schemas/entities/databaseservice.md +++ b/docs/openmetadata-apis/schemas/entities/databaseservice.md @@ -48,7 +48,6 @@ Type: `object` 9. _"Athena"_ 10. _"Presto"_ 11. _"Vertica"_ - - + 12. _"Trino"_ _This document was updated on: Thursday, September 16, 2021_ \ No newline at end of file diff --git a/docs/roadmap.md b/docs/roadmap.md index e94a80f2ab7..048e86937bc 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -56,6 +56,7 @@ our roadmap yet, please file an Issue [Github](https://github.com/open-metadata/ ### Other features * Data quality - Data profiler integration work in progress * Schema versioning +* Support for Trino ## 0.6 Release - Nov 17th, 2021 diff --git a/ingestion/examples/sample_data/pipelines/pipelines.json b/ingestion/examples/sample_data/pipelines/pipelines.json index d1c83160aa5..3b834334309 100644 --- a/ingestion/examples/sample_data/pipelines/pipelines.json +++ b/ingestion/examples/sample_data/pipelines/pipelines.json @@ -6,6 +6,13 @@ "pipelineUrl": "http://localhost:8080/tree?dag_id=presto_etl", "tasks": ["presto_task", "assert_table_exists"] }, + { + "name": "trino_etl", + "displayName": "Trino ETL", + "description": "Trino ETL pipeline", + "pipelineUrl": "http://localhost:8080/tree?dag_id=trino_etl", + "tasks": ["trino_task", "assert_table_exists"] + }, { "name": "hive_etl", "displayName": "Hive ETL", diff --git a/ingestion/examples/sample_data/pipelines/tasks.json b/ingestion/examples/sample_data/pipelines/tasks.json index 0e7f9f68f2f..28d17249a95 100644 --- a/ingestion/examples/sample_data/pipelines/tasks.json +++ b/ingestion/examples/sample_data/pipelines/tasks.json @@ -30,6 +30,14 @@ "taskUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists", "downstreamTasks": ["assert_table_exists"], "taskType": "PrestoOperator" + }, + { + "name": "trino_task", + "displayName": "Trino Task", + "description": "Airflow operator to perform ETL on trino tables", + "taskUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists", + "downstreamTasks": ["assert_table_exists"], + "taskType": "TrinoOperator" } ] } \ No newline at end of file diff --git a/ingestion/examples/workflows/trino.json b/ingestion/examples/workflows/trino.json new file mode 100644 index 00000000000..5749a98117f --- /dev/null +++ b/ingestion/examples/workflows/trino.json @@ -0,0 +1,29 @@ +{ + "source": { + "type": "trino", + "config": { + "service_name": "local_trino", + "host_port": "192.168.1.32:8080", + "database": "default" + } + }, + "sink": { + "type": "metadata-rest", + "config": { + } + }, + "metadata_server": { + "type": "metadata-server", + "config": { + "api_endpoint": "http://localhost:8585/api", + "auth_provider_type": "no-auth" + } + }, + "cron": { + "minute": "*/5", + "hour": null, + "day": null, + "month": null, + "day_of_week": null + } +} diff --git a/ingestion/setup.py b/ingestion/setup.py index 7c02dc5eaac..0f03004bdae 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -98,6 +98,7 @@ plugins: Dict[str, Set[str]] = { "oracle": {"cx_Oracle"}, "pii-processor": pii_requirements, "presto": {"pyhive~=0.6.3"}, + "trino": {"sqlalchemy-trino"}, "postgres": {"pymysql>=1.0.2", "psycopg2-binary", "GeoAlchemy2"}, "redash": {"redash-toolbelt==0.1.4"}, "redshift": {"openmetadata-sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2"}, diff --git a/ingestion/src/metadata/generated/schema/entity/services/databaseService.py b/ingestion/src/metadata/generated/schema/entity/services/databaseService.py index 214b7ad3228..0781afc883e 100644 --- a/ingestion/src/metadata/generated/schema/entity/services/databaseService.py +++ b/ingestion/src/metadata/generated/schema/entity/services/databaseService.py @@ -23,6 +23,7 @@ class DatabaseServiceType(Enum): Oracle = 'Oracle' Athena = 'Athena' Presto = 'Presto' + Trino = 'Trino' Vertica = 'Vertica' diff --git a/ingestion/src/metadata/ingestion/source/superset.py b/ingestion/src/metadata/ingestion/source/superset.py index b3f7992eeef..1ce66482c95 100644 --- a/ingestion/src/metadata/ingestion/source/superset.py +++ b/ingestion/src/metadata/ingestion/source/superset.py @@ -64,6 +64,8 @@ def get_service_type_from_database_uri(uri: str) -> str: return "snowflake" if uri.startswith("presto"): return "presto" + if uri.startswith("trino"): + return "trino" if uri.startswith("postgresql"): return "postgres" if uri.startswith("pinot"): diff --git a/ingestion/src/metadata/ingestion/source/trino.py b/ingestion/src/metadata/ingestion/source/trino.py new file mode 100644 index 00000000000..01d593bb36f --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/trino.py @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from urllib.parse import quote_plus + +from .sql_source import SQLSource, SQLConnectionConfig +from ..ometa.openmetadata_rest import MetadataServerConfig + + +class TrinoConfig(SQLConnectionConfig): + host_port = "localhost:8080" + scheme = "trino" + service_type = "Trino" + + def get_connection_url(self): + url = f"{self.scheme}://" + if self.username: + url += f"{quote_plus(self.username)}" + if self.password: + url += f":{quote_plus(self.password)}" + url += f"{self.host_port}" + if self.database: + url += f"?schema={quote_plus(self.database)}" + return url + + + + +class TrinoSource(SQLSource): + def __init__(self, config, metadata_config, ctx): + super().__init__(config, metadata_config, ctx) + + @classmethod + def create(cls, config_dict, metadata_config_dict, ctx): + config = TrinoConfig.parse_obj(config_dict) + metadata_config = MetadataServerConfig.parse_obj(metadata_config_dict) + return cls(config, metadata_config, ctx) diff --git a/roadmap.md b/roadmap.md index d7a7cdedb9e..a23990020f8 100644 --- a/roadmap.md +++ b/roadmap.md @@ -57,6 +57,7 @@ our roadmap yet, please file an Issue [Github](https://github.com/open-metadata/ ### Other features * Data quality - Data profiler integration work in progress * Schema versioning +* Support for Trino ## 0.6 Release - Nov 17th, 2021