From 3114ef9b5c2e4ddd0a58333a978faaecad2d06ec Mon Sep 17 00:00:00 2001 From: Onkar Ravgan Date: Thu, 17 Aug 2023 10:52:58 +0530 Subject: [PATCH] Add support for JWT auth in Trino (#12823) * Added jwt field in trino * fixed postgres migration * fixed pytests --- .../v015__create_db_connection_info.sql | 26 +++++++++++- .../v015__create_db_connection_info.sql | 26 +++++++++++- ingestion/examples/workflows/trino.yaml | 6 +++ .../source/database/trino/connection.py | 25 ++++++++++-- .../tests/unit/test_source_connection.py | 40 ++++++++++++++----- .../connections/database/common/jwtAuth.json | 17 ++++++++ .../connections/database/trinoConnection.json | 24 +++++------ .../ui/public/locales/en-US/Database/Trino.md | 24 ++++++++--- 8 files changed, 154 insertions(+), 34 deletions(-) create mode 100644 openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/common/jwtAuth.json diff --git a/bootstrap/sql/com.mysql.cj.jdbc.Driver/v015__create_db_connection_info.sql b/bootstrap/sql/com.mysql.cj.jdbc.Driver/v015__create_db_connection_info.sql index 665ccd764c8..42654516980 100644 --- a/bootstrap/sql/com.mysql.cj.jdbc.Driver/v015__create_db_connection_info.sql +++ b/bootstrap/sql/com.mysql.cj.jdbc.Driver/v015__create_db_connection_info.sql @@ -2,6 +2,30 @@ ALTER TABLE query_entity DROP COLUMN deleted; ALTER TABLE event_subscription_entity DROP COLUMN deleted; +-- queries to rename params to connectionOptions for trino +UPDATE dbservice_entity +SET json = JSON_SET( + JSON_REMOVE(json, '$.connection.config.connectionOptions'), + '$.connection.config.connectionOptions', + JSON_EXTRACT(json, '$.connection.config.params') +) +WHERE serviceType = 'Trino'; + +UPDATE dbservice_entity +SET json = JSON_REMOVE(json, '$.connection.config.params') +WHERE serviceType = 'Trino'; + +-- Modify migrations for service connection of trino to move password under authType +UPDATE dbservice_entity +SET json = JSON_INSERT( + JSON_REMOVE(json, '$.connection.config.password'), + '$.connection.config.authType', + JSON_OBJECT(), + '$.connection.config.authType.password', + JSON_EXTRACT(json, '$.connection.config.password')) +where serviceType = 'Trino' +AND JSON_EXTRACT(json, '$.connection.config.password') IS NOT NULL; + -- create domain entity table CREATE TABLE IF NOT EXISTS domain_entity ( id VARCHAR(36) GENERATED ALWAYS AS (json ->> '$.id') STORED NOT NULL, @@ -51,4 +75,4 @@ CREATE TABLE IF NOT EXISTS search_index_entity ( deleted BOOLEAN GENERATED ALWAYS AS (json -> '$.deleted'), PRIMARY KEY (id), UNIQUE (fqnHash) - ); \ No newline at end of file + ); diff --git a/bootstrap/sql/org.postgresql.Driver/v015__create_db_connection_info.sql b/bootstrap/sql/org.postgresql.Driver/v015__create_db_connection_info.sql index 8d8c24f4cee..1784d2d3233 100644 --- a/bootstrap/sql/org.postgresql.Driver/v015__create_db_connection_info.sql +++ b/bootstrap/sql/org.postgresql.Driver/v015__create_db_connection_info.sql @@ -2,6 +2,30 @@ ALTER TABLE query_entity DROP COLUMN deleted; ALTER TABLE event_subscription_entity DROP COLUMN deleted; +-- queries to rename params to connectionOptions for trino +UPDATE dbservice_entity +SET json = jsonb_set( + json, + '{connection,config,connectionOptions}', + jsonb_extract_path(json, 'connection', 'config', 'params'), + true +) +WHERE serviceType = 'Trino'; + +UPDATE dbservice_entity +SET json = json::jsonb #- '{connection,config,params}' +where json #> '{serviceType}' in ('"Trino"'); + +-- Modify migrations for service connection of trino to move password under authType +UPDATE dbservice_entity +SET json = jsonb_set( +json #-'{connection,config,password}', +'{connection,config,authType}', +jsonb_build_object('password',json#>'{connection,config,password}') +) +WHERE serviceType = 'Trino' + and json#>'{connection,config,password}' is not null; + -- create domain entity table CREATE TABLE IF NOT EXISTS domain_entity ( id VARCHAR(36) GENERATED ALWAYS AS (json ->> 'id') STORED NOT NULL, @@ -51,4 +75,4 @@ CREATE TABLE IF NOT EXISTS search_index_entity ( deleted BOOLEAN GENERATED ALWAYS AS ((json ->> 'deleted')::boolean) STORED, PRIMARY KEY (id), UNIQUE (fqnHash) - ); \ No newline at end of file + ); diff --git a/ingestion/examples/workflows/trino.yaml b/ingestion/examples/workflows/trino.yaml index c8c7499aea6..1bb4b49144a 100644 --- a/ingestion/examples/workflows/trino.yaml +++ b/ingestion/examples/workflows/trino.yaml @@ -6,6 +6,12 @@ source: type: Trino hostPort: localhost:8080 username: user + # For trino, choose one of basic or jwt auth configurations + authType: + # # For basic auth + # password: password + # # For JWT auth + # jwt: jwt_token catalog: catalog_name databaseSchema: schema_name connectionOptions: {} diff --git a/ingestion/src/metadata/ingestion/source/database/trino/connection.py b/ingestion/src/metadata/ingestion/source/database/trino/connection.py index 3d44e81bb44..c4aac4ed7ec 100644 --- a/ingestion/src/metadata/ingestion/source/database/trino/connection.py +++ b/ingestion/src/metadata/ingestion/source/database/trino/connection.py @@ -21,6 +21,10 @@ from sqlalchemy.engine import Engine from metadata.generated.schema.entity.automations.workflow import ( Workflow as AutomationWorkflow, ) +from metadata.generated.schema.entity.services.connections.database.common import ( + basicAuth, + jwtAuth, +) from metadata.generated.schema.entity.services.connections.database.trinoConnection import ( TrinoConnection, ) @@ -28,6 +32,7 @@ from metadata.ingestion.connections.builders import ( create_generic_db_connection, get_connection_args_common, init_empty_connection_arguments, + init_empty_connection_options, ) from metadata.ingestion.connections.secrets import connection_with_options_secrets from metadata.ingestion.connections.test_connections import ( @@ -38,19 +43,31 @@ from metadata.ingestion.source.database.trino.queries import TRINO_GET_DATABASE def get_connection_url(connection: TrinoConnection) -> str: + """ + Prepare the connection url for trino + """ url = f"{connection.scheme.value}://" if connection.username: url += f"{quote_plus(connection.username)}" - if connection.password: - url += f":{quote_plus(connection.password.get_secret_value())}" + if ( + isinstance(connection.authType, basicAuth.BasicAuth) + and connection.authType.password + ): + url += f":{quote_plus(connection.authType.password.get_secret_value())}" url += "@" url += f"{connection.hostPort}" if connection.catalog: url += f"/{connection.catalog}" - if connection.params is not None: + if isinstance(connection.authType, jwtAuth.JwtAuth): + if not connection.connectionOptions: + connection.connectionOptions = init_empty_connection_options() + connection.connectionOptions.__root__[ + "access_token" + ] = connection.authType.jwt.get_secret_value() + if connection.connectionOptions is not None: params = "&".join( f"{key}={quote_plus(value)}" - for (key, value) in connection.params.items() + for (key, value) in connection.connectionOptions.__root__.items() if value ) url = f"{url}?{params}" diff --git a/ingestion/tests/unit/test_source_connection.py b/ingestion/tests/unit/test_source_connection.py index fcdb57937bb..e0a867bd9e4 100644 --- a/ingestion/tests/unit/test_source_connection.py +++ b/ingestion/tests/unit/test_source_connection.py @@ -22,6 +22,9 @@ from metadata.generated.schema.entity.services.connections.database.clickhouseCo from metadata.generated.schema.entity.services.connections.database.common.basicAuth import ( BasicAuth, ) +from metadata.generated.schema.entity.services.connections.database.common.jwtAuth import ( + JwtAuth, +) from metadata.generated.schema.entity.services.connections.database.databricksConnection import ( DatabricksConnection, DatabricksScheme, @@ -397,7 +400,7 @@ class SourceConnectionTest(TestCase): scheme=TrinoScheme.trino, hostPort="localhost:443", username="username", - password="pass", + authType=BasicAuth(password="pass"), catalog="catalog", ) @@ -409,7 +412,7 @@ class SourceConnectionTest(TestCase): scheme=TrinoScheme.trino, hostPort="localhost:443", username="username@444", - password="pass@111", + authType=BasicAuth(password="pass@111"), catalog="catalog", ) @@ -424,7 +427,7 @@ class SourceConnectionTest(TestCase): expected_args = {} trino_conn_obj = TrinoConnection( username="user", - password=None, + authType=BasicAuth(password=None), hostPort="localhost:443", catalog="tpcds", connectionArguments=None, @@ -436,7 +439,7 @@ class SourceConnectionTest(TestCase): expected_args = {"user": "user-to-be-impersonated"} trino_conn_obj = TrinoConnection( username="user", - password=None, + authType=BasicAuth(password=None), hostPort="localhost:443", catalog="tpcds", connectionArguments={"user": "user-to-be-impersonated"}, @@ -448,7 +451,7 @@ class SourceConnectionTest(TestCase): expected_args = {} trino_conn_obj = TrinoConnection( username="user", - password=None, + authType=BasicAuth(password=None), hostPort="localhost:443", catalog="tpcds", connectionArguments=None, @@ -464,7 +467,7 @@ class SourceConnectionTest(TestCase): expected_args = {"user": "user-to-be-impersonated"} trino_conn_obj = TrinoConnection( username="user", - password=None, + authType=BasicAuth(password=None), hostPort="localhost:443", catalog="tpcds", connectionArguments={"user": "user-to-be-impersonated"}, @@ -486,9 +489,26 @@ class SourceConnectionTest(TestCase): scheme=TrinoScheme.trino, hostPort="localhost:443", username="username", - password="pass", + authType=BasicAuth(password="pass"), + catalog="catalog", + connectionOptions={"param": "value"}, + ) + assert expected_url == get_connection_url(trino_conn_obj) + + def test_trino_url_with_jwt_auth(self): + from metadata.ingestion.source.database.trino.connection import ( + get_connection_url, + ) + + expected_url = ( + "trino://username@localhost:443/catalog?access_token=jwt_token_value" + ) + trino_conn_obj = TrinoConnection( + scheme=TrinoScheme.trino, + hostPort="localhost:443", + username="username", + authType=JwtAuth(jwt="jwt_token_value"), catalog="catalog", - params={"param": "value"}, ) assert expected_url == get_connection_url(trino_conn_obj) @@ -502,7 +522,7 @@ class SourceConnectionTest(TestCase): scheme=TrinoScheme.trino, hostPort="localhost:443", username="username", - password="pass", + authType=BasicAuth(password="pass"), catalog="catalog", proxies=test_proxies, ) @@ -522,7 +542,7 @@ class SourceConnectionTest(TestCase): scheme=TrinoScheme.trino, hostPort="localhost:443", username="username", - password="pass", + authType=BasicAuth(password="pass"), ) assert expected_url == get_connection_url(trino_conn_obj) diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/common/jwtAuth.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/common/jwtAuth.json new file mode 100644 index 00000000000..1b2d15903c4 --- /dev/null +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/common/jwtAuth.json @@ -0,0 +1,17 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/database/common/jwtAuth.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "JWT Auth", + "description": "Common Database Connection Config", + "javaType": "org.openmetadata.schema.services.connections.database.common.jwtAuth", + "type": "object", + "properties": { + "jwt": { + "title": "JWT", + "description": "JWT to connect to source.", + "type": "string", + "format": "password" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/trinoConnection.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/trinoConnection.json index db89936c96f..ffbc2c993da 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/trinoConnection.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/trinoConnection.json @@ -37,11 +37,17 @@ "description": "Username to connect to Trino. This user should have privileges to read all the metadata in Trino.", "type": "string" }, - "password": { - "title": "Password", - "description": "Password to connect to Trino.", - "type": "string", - "format": "password" + "authType": { + "title": "Auth Configuration Type", + "description": "Choose Auth Config Type.", + "oneOf": [ + { + "$ref": "./common/basicAuth.json" + }, + { + "$ref": "./common/jwtAuth.json" + } + ] }, "hostPort": { "title": "Host and Port", @@ -66,14 +72,6 @@ "type": "string" } }, - "params": { - "title": "URL Parameters", - "description": "URL parameters for connection to the Trino data source", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, "connectionOptions": { "title": "Connection Options", "$ref": "../connectionBasicType.json#/definitions/connectionOptions" diff --git a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/Trino.md b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/Trino.md index 5b6d13a5a6c..e6369593415 100644 --- a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/Trino.md +++ b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/Trino.md @@ -29,11 +29,30 @@ $$section Username to connect to Trino. This user should have `SELECT` permission on the `SYSTEM.METADATA` and `INFORMATION_SCHEMA` - see the section above for more details. $$ +### Auth Config $(id="authType") +There are 2 types of auth configs: +- Basic Auth. +- JWT Auth. + +User can authenticate the Trino Instance with auth type as `Basic Authentication` i.e. Password **or** by using `JWT Authentication`. + + +## Basic Auth + $$section ### Password $(id="password") Password to connect to Trino. $$ +## JWT Auth Config + +$$section +### JWT $(id="jwt") +JWT can be used to authenticate with trino. +Follow the steps in the [official trino](https://trino.io/docs/current/security/jwt.html) documentation to setup trino with jwt. + +$$ + $$section ### Host Port $(id="hostPort") This parameter specifies the host and port of the Trino instance. This should be specified as a string in the format `hostname:port`. For example, you might set the hostPort parameter to `localhost:8080`. @@ -56,11 +75,6 @@ $$section Proxies for the connection to Trino data source $$ -$$section -### Params $(id="params") -URL parameters for connection to the Trino data source -$$ - $$section ### Connection Options $(id="connectionOptions") Additional connection options to build the URL that can be sent to service during the connection.