From fdfdaa14aa1cb1e6fef0162497dfa1be8aff632d Mon Sep 17 00:00:00 2001 From: Nahuel Date: Tue, 26 Jul 2022 15:42:40 +0200 Subject: [PATCH] Fix#5921: Implentation for retrieving auth provider config from Secret Manager (#6330) * Implentation for retrieving auth provider config from Secret Manager * Address PR comments * Address code smells from SonarCloud Co-authored-by: Pere Miquel Brull --- .../src/metadata/ingestion/ometa/ometa_api.py | 13 +- .../src/metadata/utils/secrets_manager.py | 150 ++++++++++++++---- .../tests/integration/ometa/test_ometa_api.py | 17 -- .../ometa/test_ometa_secrets_manager.py | 88 ++++++++++ .../metadata/utils/test_secrets_manager.py | 92 ++++++++--- .../workflows/ingestion/common.py | 4 +- 6 files changed, 291 insertions(+), 73 deletions(-) create mode 100644 ingestion/tests/integration/ometa/test_ometa_secrets_manager.py diff --git a/ingestion/src/metadata/ingestion/ometa/ometa_api.py b/ingestion/src/metadata/ingestion/ometa/ometa_api.py index a73b6490468..d43c1261bcf 100644 --- a/ingestion/src/metadata/ingestion/ometa/ometa_api.py +++ b/ingestion/src/metadata/ingestion/ometa/ometa_api.py @@ -160,6 +160,14 @@ class OpenMetadata( def __init__(self, config: OpenMetadataConnection, raw_data: bool = False): self.config = config + # Load the secrets' manager client + self.secrets_manager_client = get_secrets_manager( + config.secretsManagerProvider, config.secretsManagerCredentials + ) + + # Load auth provider config from Secret Manager if necessary + self.secrets_manager_client.add_auth_provider_security_config(self.config) + # Load the auth provider init from the registry auth_provider_fn = auth_provider_registry.registry.get( self.config.authProvider.value @@ -171,11 +179,6 @@ class OpenMetadata( self._auth_provider = auth_provider_fn(self.config) - # Load the secrets' manager client - self.secrets_manager_client = get_secrets_manager( - config.secretsManagerProvider, config.secretsManagerCredentials - ) - client_config: ClientConfig = ClientConfig( base_url=self.config.hostPort, api_version=self.config.apiVersion, diff --git a/ingestion/src/metadata/utils/secrets_manager.py b/ingestion/src/metadata/utils/secrets_manager.py index 8439a9107c6..c39e0b01af9 100644 --- a/ingestion/src/metadata/utils/secrets_manager.py +++ b/ingestion/src/metadata/utils/secrets_manager.py @@ -12,18 +12,27 @@ import inspect import json from abc import abstractmethod from pydoc import locate -from typing import NewType, Optional, Union +from typing import Dict, NewType, Optional, Union import boto3 from botocore.exceptions import ClientError -from pydantic.main import ModelMetaclass from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import ( + AuthProvider, + OpenMetadataConnection, SecretsManagerProvider, ) from metadata.generated.schema.entity.services.connections.serviceConnection import ( ServiceConnection, ) +from metadata.generated.schema.security.client import ( + auth0SSOClientConfig, + azureSSOClientConfig, + customOidcSSOClientConfig, + googleSSOClientConfig, + oktaSSOClientConfig, + openMetadataJWTClientConfig, +) from metadata.generated.schema.security.credentials.awsCredentials import AWSCredentials from metadata.utils.logger import ingestion_logger from metadata.utils.singleton import Singleton @@ -32,30 +41,82 @@ logger = ingestion_logger() SECRET_MANAGER_AIRFLOW_CONF = "openmetadata_secrets_manager" +# new typing type wrapping types from the '__root__' field of 'ServiceConnection' class ServiceConnectionType = NewType( "ServiceConnectionType", ServiceConnection.__fields__["__root__"].type_ ) +# new typing type wrapping types from the 'securityConfig' field of 'OpenMetadataConnection' class +AuthProviderClientType = NewType( + "AuthProviderClientType", OpenMetadataConnection.__fields__["securityConfig"].type_ +) + +AUTH_PROVIDER_MAPPING: Dict[AuthProvider, AuthProviderClientType] = { + AuthProvider.google: googleSSOClientConfig.GoogleSSOClientConfig, + AuthProvider.okta: oktaSSOClientConfig.OktaSSOClientConfig, + AuthProvider.auth0: auth0SSOClientConfig.Auth0SSOClientConfig, + AuthProvider.azure: azureSSOClientConfig.AzureSSOClientConfig, + AuthProvider.custom_oidc: customOidcSSOClientConfig.CustomOIDCSSOClientConfig, + AuthProvider.openmetadata: openMetadataJWTClientConfig.OpenMetadataJWTClientConfig, +} + class SecretsManager(metaclass=Singleton): + """ + Abstract class implemented by different secrets' manager providers. + + It contains a set of auxiliary methods for adding missing fields which have been encrypted in the secrets' manager + providers. + """ + @abstractmethod def add_service_config_connection( self, service: ServiceConnectionType, service_type: str, - ) -> ServiceConnectionType: + ) -> None: + """ + Add the service connection config from the secret manager to a given service connection object. + :param service: Service connection object e.g. DatabaseConnection + :param service_type: Service type e.g. databaseService + """ + pass + + @abstractmethod + def add_auth_provider_security_config(self, config: OpenMetadataConnection) -> None: + """ + Add the auth provider security config from the secret manager to a given OpenMetadata connection object. + :param config: OpenMetadataConnection object + """ pass @staticmethod def to_service_simple(service_type: str) -> str: + """ + Return the service simple name. + :param service_type: Service type e.g. databaseService + :return: + """ return service_type.replace("Service", "").lower() - def build_secret_id( - self, service_type: str, service_connection_type: str, service_name: str - ) -> str: - return f"openmetadata-{self.to_service_simple(service_type).lower()}-{service_connection_type.lower()}-{service_name.lower()}" + @staticmethod + def build_secret_id(*args: str) -> str: + """ + Returns a secret_id used by the secrets' manager providers for retrieving a secret. + For example: + If `args` are `Database`, `SERVICE` and `MySql` it will return `openmetadata-database-service-mysql` + :param args: sorted parameters for building the secret_id + :return: the secret_id + """ + secret_suffix = "-".join([arg.lower() for arg in args]) + return f"openmetadata-{secret_suffix}" - def get_service_connection_class(self, service_type) -> ModelMetaclass: + def get_service_connection_class(self, service_type: str) -> object: + """ + Returns the located service object by dotted path, importing as necessary. + :param service_type: Service type e.g. databaseService + :return: Located service object + """ service_conn_name = next( ( clazz[1] @@ -67,42 +128,69 @@ class SecretsManager(metaclass=Singleton): == f"{self.to_service_simple(service_type)}connection" ) ).__name__ - service_conn_class = locate( + return locate( f"metadata.generated.schema.entity.services.{service_type}.{service_conn_name}" ) - return service_conn_class def get_connection_class( - self, service_type: str, service_connection_type - ) -> ModelMetaclass: + self, service_type: str, service_connection_type: str + ) -> object: + """ + Returns the located connection object by dotted path, importing as necessary. + :param service_type: Service type e.g. databaseService + :param service_connection_type: Service connection type e.g. Mysql + :return: Located connection object + """ connection_py_file = ( service_connection_type[0].lower() + service_connection_type[1:] ) - conn_class = locate( + return locate( f"metadata.generated.schema.entity.services.connections.{self.to_service_simple(service_type)}.{connection_py_file}Connection.{service_connection_type}Connection" ) - return conn_class class LocalSecretsManager(SecretsManager): + """ + LocalSecretsManager is used when there is not a secrets' manager configured. + """ + + def add_auth_provider_security_config( + self, open_metadata_connection: OpenMetadataConnection + ) -> None: + """ + The LocalSecretsManager does not modify the OpenMetadataConnection object + """ + pass + def add_service_config_connection( self, service: ServiceConnectionType, service_type: str, - ) -> ServiceConnectionType: - return service + ) -> None: + """ + The LocalSecretsManager does not modify the ServiceConnection object + """ + pass class AWSSecretsManager(SecretsManager): + def __init__(self, credentials: AWSCredentials): + session = boto3.Session( + aws_access_key_id=credentials.awsAccessKeyId, + aws_secret_access_key=credentials.awsSecretAccessKey.get_secret_value(), + region_name=credentials.awsRegion, + ) + self.secretsmanager_client = session.client("secretsmanager") + def add_service_config_connection( self, service: ServiceConnectionType, service_type: str, - ) -> ServiceConnectionType: + ) -> None: service_connection_type = service.serviceType.value service_name = service.name.__root__ secret_id = self.build_secret_id( - service_type, service_connection_type, service_name + self.to_service_simple(service_type), service_connection_type, service_name ) connection_class = self.get_connection_class( service_type, service_connection_type @@ -114,22 +202,28 @@ class AWSSecretsManager(SecretsManager): ) ) - return service - - def __init__(self, credentials: AWSCredentials): - session = boto3.Session( - aws_access_key_id=credentials.awsAccessKeyId, - aws_secret_access_key=credentials.awsSecretAccessKey.get_secret_value(), - region_name=credentials.awsRegion, + def add_auth_provider_security_config(self, config: OpenMetadataConnection) -> None: + if config.authProvider == AuthProvider.no_auth: + return config + secret_id = self.build_secret_id( + "auth-provider", config.authProvider.value.lower() ) - self.secretsmanager_client = session.client("secretsmanager") + auth_config_json = self._get_string_value(secret_id) + try: + config.securityConfig = AUTH_PROVIDER_MAPPING.get( + config.authProvider + ).parse_obj(json.loads(auth_config_json)) + except KeyError: + raise NotImplementedError( + f"No client implemented for auth provider: [{config.authProvider}]" + ) def _get_string_value(self, name: str) -> str: """ :param name: The secret name to retrieve. Current stage is always retrieved. :return: The value of the secret. When the secret is a string, the value is - contained in the `SecretString` field. When the secret is bytes, - it is contained in the `SecretBinary` field. + contained in the `SecretString` field. When the secret is bytes or not present, + it throws a `ValueError` exception. """ if name is None: raise ValueError diff --git a/ingestion/tests/integration/ometa/test_ometa_api.py b/ingestion/tests/integration/ometa/test_ometa_api.py index b107db92863..a937d87be6e 100644 --- a/ingestion/tests/integration/ometa/test_ometa_api.py +++ b/ingestion/tests/integration/ometa/test_ometa_api.py @@ -14,11 +14,8 @@ OpenMetadata API initialization """ from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import ( OpenMetadataConnection, - SecretsManagerProvider, ) -from metadata.generated.schema.security.credentials.awsCredentials import AWSCredentials from metadata.ingestion.ometa.ometa_api import OpenMetadata -from metadata.utils.secrets_manager import AWSSecretsManager, LocalSecretsManager def test_init_ometa(): @@ -26,17 +23,3 @@ def test_init_ometa(): metadata = OpenMetadata(server_config) assert metadata.health_check() - assert type(metadata.secrets_manager_client) is LocalSecretsManager - - -def test_init_ometa_with_aws_secret_manager(): - server_config = OpenMetadataConnection( - hostPort="http://localhost:8585/api", - secretsManagerProvider=SecretsManagerProvider.aws, - secretsManagerCredentials=AWSCredentials( - awsRegion="test", awsSecretAccessKey="test" - ), - ) - metadata = OpenMetadata(server_config) - - assert type(metadata.secrets_manager_client) is AWSSecretsManager diff --git a/ingestion/tests/integration/ometa/test_ometa_secrets_manager.py b/ingestion/tests/integration/ometa/test_ometa_secrets_manager.py new file mode 100644 index 00000000000..6e55026b1db --- /dev/null +++ b/ingestion/tests/integration/ometa/test_ometa_secrets_manager.py @@ -0,0 +1,88 @@ +# Copyright 2022 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from copy import copy +from unittest import TestCase +from unittest.mock import Mock, patch + +from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import ( + AuthProvider, + OpenMetadataConnection, + SecretsManagerProvider, +) +from metadata.generated.schema.security.client.googleSSOClientConfig import ( + GoogleSSOClientConfig, +) +from metadata.generated.schema.security.credentials.awsCredentials import AWSCredentials +from metadata.ingestion.ometa.auth_provider import ( + GoogleAuthenticationProvider, + NoOpAuthenticationProvider, +) +from metadata.ingestion.ometa.ometa_api import OpenMetadata +from metadata.utils.secrets_manager import AWSSecretsManager, LocalSecretsManager + + +class OMetaSecretManagerTest(TestCase): + metadata: OpenMetadata + aws_server_config: OpenMetadataConnection + local_server_config: OpenMetadataConnection + + @classmethod + def setUp(cls) -> None: + cls.local_server_config = OpenMetadataConnection( + hostPort="http://localhost:8585/api", + enableVersionValidation=False, + ) + cls.aws_server_config = OpenMetadataConnection( + hostPort="http://localhost:8585/api", + secretsManagerProvider=SecretsManagerProvider.aws, + secretsManagerCredentials=AWSCredentials( + awsRegion="test", awsSecretAccessKey="test" + ), + enableVersionValidation=False, + ) + + def test_ometa_with_local_secret_manager(self): + self._init_local_secret_manager() + assert type(self.metadata.secrets_manager_client) is LocalSecretsManager + assert type(self.metadata._auth_provider) is NoOpAuthenticationProvider + + def test_ometa_with_local_secret_manager_with_google_auth(self): + self.local_server_config.authProvider = AuthProvider.google + self.local_server_config.securityConfig = GoogleSSOClientConfig( + secretKey="/fake/path" + ) + self._init_local_secret_manager() + assert type(self.metadata.secrets_manager_client) is LocalSecretsManager + assert type(self.metadata._auth_provider) is GoogleAuthenticationProvider + + def test_ometa_with_aws_secret_manager(self): + self._init_aws_secret_manager() + assert type(self.metadata.secrets_manager_client) is AWSSecretsManager + assert type(self.metadata._auth_provider) is NoOpAuthenticationProvider + + @patch("metadata.ingestion.ometa.ometa_api.get_secrets_manager") + def test_ometa_with_aws_secret_manager_with_google_auth(self, secrets_manager_mock): + security_config = copy(self.aws_server_config) + security_config.securityConfig = GoogleSSOClientConfig(secretKey="/fake/path") + secret_client_mock = Mock() + secret_client_mock.add_auth_provider_security_config.return_value = ( + security_config + ) + secrets_manager_mock.return_value = secret_client_mock + self.aws_server_config.authProvider = AuthProvider.google + self._init_aws_secret_manager() + assert type(self.metadata._auth_provider) is GoogleAuthenticationProvider + + def _init_local_secret_manager(self): + self.metadata = OpenMetadata(self.local_server_config) + + def _init_aws_secret_manager(self): + self.metadata = OpenMetadata(self.aws_server_config) diff --git a/ingestion/tests/unit/metadata/utils/test_secrets_manager.py b/ingestion/tests/unit/metadata/utils/test_secrets_manager.py index 81fa497c94a..14127c339c5 100644 --- a/ingestion/tests/unit/metadata/utils/test_secrets_manager.py +++ b/ingestion/tests/unit/metadata/utils/test_secrets_manager.py @@ -23,6 +23,8 @@ from metadata.generated.schema.entity.services.connections.database.mysqlConnect MysqlConnection, ) from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import ( + AuthProvider, + OpenMetadataConnection, SecretsManagerProvider, ) from metadata.generated.schema.entity.services.databaseService import ( @@ -30,8 +32,15 @@ from metadata.generated.schema.entity.services.databaseService import ( DatabaseService, DatabaseServiceType, ) +from metadata.generated.schema.security.client.googleSSOClientConfig import ( + GoogleSSOClientConfig, +) from metadata.generated.schema.security.credentials.awsCredentials import AWSCredentials -from metadata.utils.secrets_manager import Singleton, get_secrets_manager +from metadata.utils.secrets_manager import ( + AUTH_PROVIDER_MAPPING, + Singleton, + get_secrets_manager, +) DATABASE_CONNECTION = {"username": "test", "hostPort": "localhost:3306"} @@ -42,15 +51,23 @@ DATABASE_SERVICE = { "connection": DatabaseConnection(), } +AUTH_PROVIDER_CONFIG = {"secretKey": "/fake/path"} + class TestSecretsManager(TestCase): service_type: str = "databaseService" service: DatabaseService database_connection = MysqlConnection(**DATABASE_CONNECTION) + auth_provider_config = GoogleSSOClientConfig(**AUTH_PROVIDER_CONFIG) + om_connection: OpenMetadataConnection @classmethod def setUpClass(cls) -> None: cls.service = DatabaseService(**DATABASE_SERVICE) + cls.om_connection = OpenMetadataConnection( + authProvider=AuthProvider.google, + hostPort="http://localhost:8585/api", + ) @classmethod def setUp(cls) -> None: @@ -66,22 +83,23 @@ class TestSecretsManager(TestCase): self.assertEqual(expected_service, self.service) assert id(self.database_connection) == id(self.service.connection.config) + def test_local_manager_add_auth_provider_security_config(self): + local_manager = get_secrets_manager(SecretsManagerProvider.local, None) + actual_om_connection = deepcopy(self.om_connection) + actual_om_connection.securityConfig = self.auth_provider_config + + local_manager.add_auth_provider_security_config(actual_om_connection) + + self.assertEqual(self.auth_provider_config, actual_om_connection.securityConfig) + assert id(self.auth_provider_config) == id(actual_om_connection.securityConfig) + @patch("metadata.utils.secrets_manager.boto3") def test_aws_manager_add_service_config_connection(self, boto3_mock): - self._init_boto3_mock( + aws_manager = self._build_secret_manager( boto3_mock, {"SecretString": json.dumps(DATABASE_CONNECTION)} ) - aws_manager = get_secrets_manager( - SecretsManagerProvider.aws, - AWSCredentials( - awsAccessKeyId="fake_key", - awsSecretAccessKey="fake_access", - awsRegion="fake-region", - ), - ) expected_service = deepcopy(self.service) expected_service.connection.config = self.database_connection - self.service.connection = None aws_manager.add_service_config_connection(self.service, self.service_type) @@ -89,19 +107,34 @@ class TestSecretsManager(TestCase): self.assertEqual(expected_service, self.service) assert id(self.database_connection) != id(self.service.connection.config) + @patch("metadata.utils.secrets_manager.boto3") + def test_aws_manager_fails_add_auth_provider_security_config(self, mocked_boto3): + aws_manager = self._build_secret_manager(mocked_boto3, {}) + + with self.assertRaises(ValueError) as value_error: + aws_manager.add_service_config_connection(self.service, self.service_type) + self.assertEqual( + "[SecretString] not present in the response.", value_error.exception + ) + + @patch("metadata.utils.secrets_manager.boto3") + def test_aws_manager_add_auth_provider_security_config(self, boto3_mock): + aws_manager = self._build_secret_manager( + boto3_mock, {"SecretString": json.dumps(AUTH_PROVIDER_CONFIG)} + ) + actual_om_connection = deepcopy(self.om_connection) + actual_om_connection.securityConfig = None + + aws_manager.add_auth_provider_security_config(actual_om_connection) + + self.assertEqual(self.auth_provider_config, actual_om_connection.securityConfig) + assert id(self.auth_provider_config) != id(actual_om_connection.securityConfig) + @patch("metadata.utils.secrets_manager.boto3") def test_aws_manager_fails_add_service_config_connection_when_not_stored( self, mocked_boto3 ): - self._init_boto3_mock(mocked_boto3, {}) - aws_manager = get_secrets_manager( - SecretsManagerProvider.aws, - AWSCredentials( - awsAccessKeyId="fake_key", - awsSecretAccessKey="fake_access", - awsRegion="fake-region", - ), - ) + aws_manager = self._build_secret_manager(mocked_boto3, {}) with self.assertRaises(ValueError) as value_error: aws_manager.add_service_config_connection(self.service, self.service_type) @@ -116,6 +149,25 @@ class TestSecretsManager(TestCase): "[any] is not implemented.", not_implemented_error.exception ) + def test_all_auth_provider_has_auth_client(self): + auth_provider_with_client = [ + e for e in AuthProvider if e is not AuthProvider.no_auth + ] + for auth_provider in auth_provider_with_client: + assert AUTH_PROVIDER_MAPPING.get(auth_provider, None) is not None + + def _build_secret_manager(self, mocked_boto3: Mock, expected_json: Dict[str, Any]): + self._init_boto3_mock(mocked_boto3, expected_json) + aws_manager = get_secrets_manager( + SecretsManagerProvider.aws, + AWSCredentials( + awsAccessKeyId="fake_key", + awsSecretAccessKey="fake_access", + awsRegion="fake-region", + ), + ) + return aws_manager + @staticmethod def _init_boto3_mock(boto3_mock: Mock, client_return: Dict[str, Any]): mocked_client = Mock() diff --git a/openmetadata-airflow-apis/src/openmetadata/workflows/ingestion/common.py b/openmetadata-airflow-apis/src/openmetadata/workflows/ingestion/common.py index d34c853e0af..972827eec36 100644 --- a/openmetadata-airflow-apis/src/openmetadata/workflows/ingestion/common.py +++ b/openmetadata-airflow-apis/src/openmetadata/workflows/ingestion/common.py @@ -99,9 +99,7 @@ def build_source(ingestion_pipeline: IngestionPipeline) -> WorkflowSource: if not service: raise ValueError(f"Could not get service from type {service_type}") - service = metadata.secrets_manager_client.add_service_config_connection( - service, service_type - ) + metadata.secrets_manager_client.add_service_config_connection(service, service_type) return WorkflowSource( type=service.serviceType.value.lower(),