From ed0a01edea0f2d33f368b8f23f3a399c74d4bed4 Mon Sep 17 00:00:00 2001 From: Pere Miquel Brull Date: Fri, 19 Aug 2022 11:19:20 +0200 Subject: [PATCH] Fix #6787 - GCS credentials marked as required + validate privateKey (#6804) Fix #6787 - GCS credentials marked as required + validate privateKey (#6804) --- .../security/credentials/gcsCredentials.json | 11 ++- .../src/metadata/ingestion/api/parser.py | 32 +++++--- ingestion/src/metadata/utils/credentials.py | 65 +++++++++++---- ingestion/tests/unit/test_credentials.py | 82 +++++++++++++++++++ .../content/deployment/kubernetes/values.md | 7 ++ .../deployment/upgrade/backup-metadata.md | 4 + .../connectors/database/bigquery/index.md | 4 + .../DBTConfigFormBuilder.test.tsx | 22 ++++- .../DBTGCSConfig.test.tsx | 48 +++++++++-- 9 files changed, 240 insertions(+), 35 deletions(-) create mode 100644 ingestion/tests/unit/test_credentials.py diff --git a/catalog-rest-service/src/main/resources/json/schema/security/credentials/gcsCredentials.json b/catalog-rest-service/src/main/resources/json/schema/security/credentials/gcsCredentials.json index 0500bda10f3..9b5a9ce9075 100644 --- a/catalog-rest-service/src/main/resources/json/schema/security/credentials/gcsCredentials.json +++ b/catalog-rest-service/src/main/resources/json/schema/security/credentials/gcsCredentials.json @@ -70,7 +70,16 @@ "format": "uri" } }, - "additionalProperties": false + "additionalProperties": false, + "required": [ + "type", + "projectId", + "privateKeyId", + "privateKey", + "clientEmail", + "clientId", + "clientX509CertUrl" + ] }, "GCSCredentialsPath": { "title": "GCS Credentials Path", diff --git a/ingestion/src/metadata/ingestion/api/parser.py b/ingestion/src/metadata/ingestion/api/parser.py index 2164b4728f9..3acec819811 100644 --- a/ingestion/src/metadata/ingestion/api/parser.py +++ b/ingestion/src/metadata/ingestion/api/parser.py @@ -202,6 +202,25 @@ def _unsafe_parse_config(config: dict, cls: T, message: str) -> None: raise err +def _parse_inner_connection(config_dict: dict, source_type: str) -> None: + """ + Parse the inner connection of the flagged connectors + + :param config_dict: JSON configuration + :param source_type: source type name, e.g., Airflow. + """ + inner_source_type = config_dict["source"]["serviceConnection"]["config"][ + "connection" + ]["type"] + inner_service_type = get_service_type(inner_source_type) + inner_connection_class = get_connection_class(inner_source_type, inner_service_type) + _unsafe_parse_config( + config=config_dict["source"]["serviceConnection"]["config"]["connection"], + cls=inner_connection_class, + message=f"Error parsing the inner service connection for {source_type}", + ) + + def parse_service_connection(config_dict: dict) -> None: """ Parse the service connection and raise any scoped @@ -221,18 +240,7 @@ def parse_service_connection(config_dict: dict) -> None: if source_type in HAS_INNER_CONNECTION: # We will first parse the inner `connection` configuration - inner_source_type = config_dict["source"]["serviceConnection"]["config"][ - "connection" - ]["type"] - inner_service_type = get_service_type(inner_source_type) - inner_connection_class = get_connection_class( - inner_source_type, inner_service_type - ) - _unsafe_parse_config( - config=config_dict["source"]["serviceConnection"]["config"]["connection"], - cls=inner_connection_class, - message=f"Error parsing the inner service connection for {source_type}", - ) + _parse_inner_connection(config_dict, source_type) # Parse the service connection dictionary with the scoped class _unsafe_parse_config( diff --git a/ingestion/src/metadata/utils/credentials.py b/ingestion/src/metadata/utils/credentials.py index 36171c76691..2af80de1262 100644 --- a/ingestion/src/metadata/utils/credentials.py +++ b/ingestion/src/metadata/utils/credentials.py @@ -14,6 +14,9 @@ Credentials helper module import json import os import tempfile +from typing import Dict + +from cryptography.hazmat.primitives import serialization from metadata.generated.schema.security.credentials.gcsCredentials import ( GCSCredentials, @@ -33,6 +36,24 @@ class InvalidGcsConfigException(Exception): """ +class InvalidPrivateKeyException(Exception): + """ + If the key cannot be serialised + """ + + +def validate_private_key(private_key: str) -> None: + """ + Make sure that a private key can be properly parsed + by cryptography backends + :param private_key: key to validate + """ + try: + serialization.load_pem_private_key(private_key.encode(), password=None) + except ValueError as err: + raise InvalidPrivateKeyException(f"Cannot serialise key - {err}") + + def create_credential_tmp_file(credentials: dict) -> str: """ Given a credentials' dict, store it in a tmp file @@ -46,6 +67,31 @@ def create_credential_tmp_file(credentials: dict) -> str: return fp.name +def build_google_credentials_dict(gcs_values: GCSValues) -> Dict[str, str]: + """ + Given GCSValues, build a dictionary as the JSON file + downloaded from GCS with the service_account + :param gcs_values: GCS credentials + :return: Dictionary with credentials + """ + + private_key_str = gcs_values.privateKey.get_secret_value() + validate_private_key(private_key_str) + + return { + "type": gcs_values.type, + "project_id": gcs_values.projectId, + "private_key_id": gcs_values.privateKeyId, + "private_key": private_key_str, + "client_email": gcs_values.clientEmail, + "client_id": gcs_values.clientId, + "auth_uri": str(gcs_values.authUri), + "token_uri": str(gcs_values.tokenUri), + "auth_provider_x509_cert_url": str(gcs_values.authProviderX509CertUrl), + "client_x509_cert_url": str(gcs_values.clientX509CertUrl), + } + + def set_google_credentials(gcs_credentials: GCSCredentials) -> None: """ Set GCS credentials environment variable @@ -57,26 +103,15 @@ def set_google_credentials(gcs_credentials: GCSCredentials) -> None: if isinstance(gcs_credentials.gcsConfig, GCSCredentialsPath): os.environ[GOOGLE_CREDENTIALS] = str(gcs_credentials.gcsConfig.__root__) return + if gcs_credentials.gcsConfig.projectId is None: logger.info( - "No credentials available, using the current environment permissions authenticated via gcloud SDK ." + "No credentials available, using the current environment permissions authenticated via gcloud SDK." ) return + if isinstance(gcs_credentials.gcsConfig, GCSValues): - credentials_dict = { - "type": gcs_credentials.gcsConfig.type, - "project_id": gcs_credentials.gcsConfig.projectId, - "private_key_id": gcs_credentials.gcsConfig.privateKeyId, - "private_key": gcs_credentials.gcsConfig.privateKey.get_secret_value(), - "client_email": gcs_credentials.gcsConfig.clientEmail, - "client_id": gcs_credentials.gcsConfig.clientId, - "auth_uri": str(gcs_credentials.gcsConfig.authUri), - "token_uri": str(gcs_credentials.gcsConfig.tokenUri), - "auth_provider_x509_cert_url": str( - gcs_credentials.gcsConfig.authProviderX509CertUrl - ), - "client_x509_cert_url": str(gcs_credentials.gcsConfig.clientX509CertUrl), - } + credentials_dict = build_google_credentials_dict(gcs_credentials.gcsConfig) tmp_credentials_file = create_credential_tmp_file(credentials=credentials_dict) os.environ[GOOGLE_CREDENTIALS] = tmp_credentials_file diff --git a/ingestion/tests/unit/test_credentials.py b/ingestion/tests/unit/test_credentials.py new file mode 100644 index 00000000000..50975202f2d --- /dev/null +++ b/ingestion/tests/unit/test_credentials.py @@ -0,0 +1,82 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Test Credentials helper module +""" +from unittest import TestCase + +from pydantic import SecretStr + +from metadata.generated.schema.security.credentials.gcsCredentials import GCSValues +from metadata.utils.credentials import ( + InvalidPrivateKeyException, + build_google_credentials_dict, +) + + +class TestCredentials(TestCase): + """ + Validate credentials handling + """ + + def test_build_google_credentials_dict(self): + """ + Check how we can validate GCS values + """ + + # Key mocked online + private_key = """-----BEGIN RSA PRIVATE KEY----- +MIICXQIBAAKBgQDMGwM93kIt3D4r4+dWAGdoTboSaZcFLhsG1lvnZlYEpnZoFo1M +ek7laRKDUW3CkdTlSid9p4/RTs9SYKuuXvNKNSLApHUeR2zgKBIHYTGGv1t1bEWc +ohVeqr7w8HkFr9LV4qxgFEWBBd3QYncY/Y1iZgTtbmMiUxJN9vj/kuH0xQIDAQAB +AoGAPDqAY2JRrwy9v9/ZpPQrj4jYLpS//sRTL1pT9l2pZmfkquR0v6ub2nB+CQgf +VnoIE70lGBw5AS+7V/i00JiuO6GP/MWWqxKdc5McjBGYDIb+9gQ/DrryVDHsqgGX +iZrWr7rIrpGsbCB2xt2HPpKR7D9IpI8FA+EEU9fIPfETM6ECQQDv69L78zdijSNk +CYx70dVHqCiDZT5RbkJqDmQwKabIGXBqZLTM+7ZAHotq0EXGc5BvQGyIMso/qIOs +Wq3imi3dAkEA2ci4xEzj5guQcGxoVcxfGm+M/VqXLuw/eW1sYdOp52OwdDywxG+I +6tpm5ByVowhqT8PHDJVOy8GEV9QNw0Y4CQJBAJiyn/rJJlPr/j1aMnZP642KwhY2 +pr4PDegQNsXMjKDISBr+82+POMSAbD1UR0RyItgbybe5k62GZB+bKxaRCGUCQEVj +l8MrwH0eeCHp2IBlwnN40VIz1/GiYkL9I0g0GXFZKPKQF74uz1AM0DWkCeVNHBpY +BYaz18xB1znonY33RIkCQQDE3wAWxFrvr582J12qJkE4enmNhRJFdcSREDX54d/5 +VEhPQF0i0tUU7Fl071hcYaiQoZx4nIjN+NG6p5QKbl6k +-----END RSA PRIVATE KEY-----""" + + gcs_values = GCSValues( + type="my_type", + projectId="project_id", + privateKeyId="private_key_id", + privateKey=private_key, + clientEmail="email@mail.com", + clientId="client_id", + clientX509CertUrl="http://localhost:1234", + ) + + expected_dict = { + "type": "my_type", + "project_id": "project_id", + "private_key_id": "private_key_id", + "private_key": private_key, + "client_email": "email@mail.com", + "client_id": "client_id", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "http://localhost:1234", + } + + build_google_credentials_dict(gcs_values) + + self.assertEqual(expected_dict, build_google_credentials_dict(gcs_values)) + + gcs_values.privateKey = SecretStr("I don't think I am a proper Private Key") + + with self.assertRaises(InvalidPrivateKeyException): + build_google_credentials_dict(gcs_values) diff --git a/openmetadata-docs/content/deployment/kubernetes/values.md b/openmetadata-docs/content/deployment/kubernetes/values.md index 7ff8b71ca09..d9f6b0a7297 100644 --- a/openmetadata-docs/content/deployment/kubernetes/values.md +++ b/openmetadata-docs/content/deployment/kubernetes/values.md @@ -10,6 +10,8 @@ This page list all the supported helm values for OpenMetadata Helm Charts. ## Global Chart Values + + | Key | Type | Default | | :---------- | :---------- | :---------- | | global.authentication.provider | string | `no-auth` | @@ -75,6 +77,7 @@ This page list all the supported helm values for OpenMetadata Helm Charts. | global.elasticsearch.trustStore.password.secretRef | string | `elasticsearch-truststore-secrets` | | global.elasticsearch.trustStore.password.secretKey | string | `openmetadata-elasticsearch-truststore-password` | | global.jwtTokenConfiguration.enabled | bool | `false` | +| global.fernetKey | string | `jJ/9sz0g0OHxsfxOoSfdFdmk3ysNmPRnH3TUAbz3IHA=` | | global.jwtTokenConfiguration.rsapublicKeyFilePath | string | `Empty String` | | global.jwtTokenConfiguration.rsaprivateKeyFilePath | string | `Empty String` | | global.jwtTokenConfiguration.jwtissuer | string | `open-metadata.org` | @@ -84,9 +87,11 @@ This page list all the supported helm values for OpenMetadata Helm Charts. | global.openmetadata.host | string | `openmetadata` | | global.openmetadata.port | int | 8585 | +
## Chart Values + | Key | Type | Default | | :---------- | :---------- | :---------- | @@ -125,3 +130,5 @@ This page list all the supported helm values for OpenMetadata Helm Charts. | serviceAccount.name | string | `nil` | | sidecars | list | `[]` | | tolerations | list | `[]` | + +
diff --git a/openmetadata-docs/content/deployment/upgrade/backup-metadata.md b/openmetadata-docs/content/deployment/upgrade/backup-metadata.md index 740a9ed8d61..d2046de3682 100644 --- a/openmetadata-docs/content/deployment/upgrade/backup-metadata.md +++ b/openmetadata-docs/content/deployment/upgrade/backup-metadata.md @@ -14,6 +14,10 @@ Recovery practices. While there are cloud services that feature automatic snapshots and replication, the metadata CLI now allows all users to perform backups regardless of the underlying infrastructure. +## Requirements + +The backup CLI needs to be used with `openmetadata-ingestion` version 0.12 or higher. + ## Installation The CLI comes bundled in the base `openmetadata-ingestion` Python package. You can install it with: diff --git a/openmetadata-docs/content/openmetadata/connectors/database/bigquery/index.md b/openmetadata-docs/content/openmetadata/connectors/database/bigquery/index.md index a868042bd02..03bf8819800 100644 --- a/openmetadata-docs/content/openmetadata/connectors/database/bigquery/index.md +++ b/openmetadata-docs/content/openmetadata/connectors/database/bigquery/index.md @@ -11,6 +11,8 @@ slug: /openmetadata/connectors/database/bigquery

To execute metadata extraction and usage workflow successfully the user or the service account should have enough access to fetch required data. Following table describes the minimum required permissions

+ + | # | GCP Permission | GCP Role | Required For | | :---------- | :---------- | :---------- | :---------- | | 1 | bigquery.datasets.get | BigQuery Data Viewer | Metadata Ingestion | @@ -25,6 +27,8 @@ slug: /openmetadata/connectors/database/bigquery | 10 | bigquery.readsessions.create | BigQuery Admin | Bigquery Usage Workflow | | 11 | bigquery.readsessions.getData | BigQuery Admin | Bigquery Usage Workflow | +
+

Connection Options

diff --git a/openmetadata-ui/src/main/resources/ui/src/components/common/DBTConfigFormBuilder/DBTConfigFormBuilder.test.tsx b/openmetadata-ui/src/main/resources/ui/src/components/common/DBTConfigFormBuilder/DBTConfigFormBuilder.test.tsx index 625023d1a69..2ac27eced3b 100644 --- a/openmetadata-ui/src/main/resources/ui/src/components/common/DBTConfigFormBuilder/DBTConfigFormBuilder.test.tsx +++ b/openmetadata-ui/src/main/resources/ui/src/components/common/DBTConfigFormBuilder/DBTConfigFormBuilder.test.tsx @@ -25,7 +25,27 @@ const mockSecurityConfigS3 = { }; const mockSecurityConfigGCSValue = { - gcsConfig: {}, + gcsConfig: { + authProviderX509CertUrl: 'url', + + authUri: 'uri', + + clientEmail: 'email', + + clientId: 'id', + + clientX509CertUrl: 'certUrl', + + privateKey: 'privateKey', + + privateKeyId: 'keyId', + + projectId: 'projectId', + + tokenUri: 'tokenUri', + + type: 'type', + }, }; const mockPrefixConfig = { diff --git a/openmetadata-ui/src/main/resources/ui/src/components/common/DBTConfigFormBuilder/DBTGCSConfig.test.tsx b/openmetadata-ui/src/main/resources/ui/src/components/common/DBTConfigFormBuilder/DBTGCSConfig.test.tsx index e05563e4e82..25b813f5ce9 100644 --- a/openmetadata-ui/src/main/resources/ui/src/components/common/DBTConfigFormBuilder/DBTGCSConfig.test.tsx +++ b/openmetadata-ui/src/main/resources/ui/src/components/common/DBTConfigFormBuilder/DBTGCSConfig.test.tsx @@ -21,6 +21,28 @@ const mockSubmit = jest.fn(); const mockPrefixConfigChange = jest.fn(); const mockSecurityConfigChange = jest.fn(); +const gsConfig = { + authProviderX509CertUrl: 'url', + + authUri: 'uri', + + clientEmail: 'email', + + clientId: 'id', + + clientX509CertUrl: 'certUrl', + + privateKey: 'privateKey', + + privateKeyId: 'keyId', + + projectId: 'projectId', + + tokenUri: 'tokenUri', + + type: 'type', +}; + const mockProps = { okText: 'Next', cancelText: 'Back', @@ -86,6 +108,7 @@ describe('Test DBT GCS Config Form', () => { {...mockProps} dbtSecurityConfig={{ gcsConfig: { + ...gsConfig, type: 'CredsType', }, }} @@ -102,6 +125,7 @@ describe('Test DBT GCS Config Form', () => { {...mockProps} dbtSecurityConfig={{ gcsConfig: { + ...gsConfig, projectId: 'ProjectId', }, }} @@ -118,6 +142,7 @@ describe('Test DBT GCS Config Form', () => { {...mockProps} dbtSecurityConfig={{ gcsConfig: { + ...gsConfig, privateKeyId: 'PrivateKeyId', }, }} @@ -134,6 +159,7 @@ describe('Test DBT GCS Config Form', () => { {...mockProps} dbtSecurityConfig={{ gcsConfig: { + ...gsConfig, privateKey: 'PrivateKey', }, }} @@ -148,7 +174,9 @@ describe('Test DBT GCS Config Form', () => { const { container } = render( ); const inputClientEmail = getByTestId(container, 'client-email'); @@ -160,7 +188,7 @@ describe('Test DBT GCS Config Form', () => { const { container } = render( ); const inputClientId = getByTestId(container, 'client-id'); @@ -172,7 +200,9 @@ describe('Test DBT GCS Config Form', () => { const { container } = render( ); const inputAuthUri = getByTestId(container, 'auth-uri'); @@ -185,7 +215,7 @@ describe('Test DBT GCS Config Form', () => { ); @@ -199,7 +229,10 @@ describe('Test DBT GCS Config Form', () => { ); @@ -216,7 +249,10 @@ describe('Test DBT GCS Config Form', () => { );