diff --git a/ingestion/src/metadata/ingestion/ometa/client.py b/ingestion/src/metadata/ingestion/ometa/client.py index e3fa1b8c84f..03417abb93f 100644 --- a/ingestion/src/metadata/ingestion/ometa/client.py +++ b/ingestion/src/metadata/ingestion/ometa/client.py @@ -14,7 +14,8 @@ Python API REST wrapper and helpers import datetime import time import traceback -from typing import Callable, Dict, List, Optional, Union +from copy import deepcopy +from typing import Any, Callable, Dict, List, Optional, Union import requests from requests.exceptions import HTTPError @@ -167,7 +168,10 @@ class REST: if self.config.extra_headers: extra_headers: Dict[str, str] = self.config.extra_headers extra_headers = {k: (v % headers) for k, v in extra_headers.items()} - logger.debug("Extra headers provided '%s'", extra_headers) + logger.debug( + "Extra headers provided '%s'", + self._mask_authorization_headers(extra_headers), + ) headers = {**headers, **extra_headers} opts = { @@ -180,6 +184,8 @@ class REST: "verify": self._verify, } + masked_opts = self._mask_authorization_headers(opts) + method_key = "params" if method.upper() == "GET" else "data" opts[method_key] = data @@ -188,7 +194,7 @@ class REST: while retry >= 0: try: logger.debug("URL %s, method %s", url, method) - logger.debug("Data %s", opts) + logger.debug("Data %s", masked_opts) return self._one_request(method, url, opts, retry) except RetryException: retry_wait = self._retry_wait * (total_retries - retry + 1) @@ -318,3 +324,12 @@ class REST: def __exit__(self, exc_type, exc_val, exc_tb): self.close() + + def _mask_authorization_headers(self, opts: Dict[str, Any]) -> Dict[str, Any]: + if opts and opts["headers"]: + if self.config.auth_header and opts["headers"][self.config.auth_header]: + masked_opts = deepcopy(opts) + if self.config.auth_header and opts["headers"][self.config.auth_header]: + masked_opts["headers"][self.config.auth_header] = "********" + return masked_opts + return opts diff --git a/ingestion/tests/integration/ometa/test_ometa_api.py b/ingestion/tests/integration/ometa/test_ometa_api.py index 6b45bececb9..f149bfb50d5 100644 --- a/ingestion/tests/integration/ometa/test_ometa_api.py +++ b/ingestion/tests/integration/ometa/test_ometa_api.py @@ -12,6 +12,8 @@ """ OpenMetadata API initialization """ +import pytest + from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import ( OpenMetadataConnection, ) @@ -20,15 +22,45 @@ from metadata.generated.schema.security.client.openMetadataJWTClientConfig impor ) from metadata.ingestion.ometa.ometa_api import OpenMetadata +server_config = OpenMetadataConnection( + hostPort="http://localhost:8585/api", + authProvider="openmetadata", + securityConfig=OpenMetadataJWTClientConfig( + jwtToken="eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg" + ), +) +metadata = OpenMetadata(server_config) + def test_init_ometa(): - server_config = OpenMetadataConnection( - hostPort="http://localhost:8585/api", - authProvider="openmetadata", - securityConfig=OpenMetadataJWTClientConfig( - jwtToken="eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg" - ), - ) - metadata = OpenMetadata(server_config) - assert metadata.health_check() + + +@pytest.mark.parametrize( + ("header", "value", "is_masked"), + [ + ("Authorization", "Bearer 1234", True), + ("Custom", "Bearer 1234", True), + ("Random", "any value", False), + (None, "", False), + ], +) +def test_mask_authorization_header(header: str, value: str, is_masked: bool): + if not header: + headers = None + elif header == "Custom": + metadata.client.config.auth_header = header + headers = {header: value} + else: + headers = {header: value} + opts = {"headers": headers} + + if header: + assert ( + metadata.client._mask_authorization_headers(opts)["headers"][header] + == "********" + if is_masked + else value + ) + else: + assert metadata.client._mask_authorization_headers(opts)["headers"] is None diff --git a/ingestion/tests/integration/workflow/mysql_test.yaml b/ingestion/tests/integration/workflow/mysql_test.yaml index c8918c94b9d..21aebeebdbb 100644 --- a/ingestion/tests/integration/workflow/mysql_test.yaml +++ b/ingestion/tests/integration/workflow/mysql_test.yaml @@ -6,13 +6,12 @@ source: type: Mysql username: openmetadata_user password: openmetadata_password - database: openmetadata_db hostPort: localhost:3306 + databaseSchema: openmetadata_db connectionOptions: {} connectionArguments: {} sourceConfig: config: - enableDataProfiler: false schemaFilterPattern: excludes: - mysql.* diff --git a/ingestion/tests/integration/workflow/workflow_test.py b/ingestion/tests/integration/workflow/test_workflow.py similarity index 70% rename from ingestion/tests/integration/workflow/workflow_test.py rename to ingestion/tests/integration/workflow/test_workflow.py index 4557ba2100f..da8d7e04e8b 100644 --- a/ingestion/tests/integration/workflow/workflow_test.py +++ b/ingestion/tests/integration/workflow/test_workflow.py @@ -11,11 +11,13 @@ import importlib import pathlib +import re from unittest import TestCase from metadata.config.common import ConfigurationError, load_config_file from metadata.ingestion.api.workflow import Workflow from metadata.ingestion.ometa.ometa_api import OpenMetadata +from metadata.utils.logger import Loggers class WorkflowTest(TestCase): @@ -67,15 +69,15 @@ class WorkflowTest(TestCase): config = workflow.config.workflowConfig.openMetadataServerConfig client = OpenMetadata(config).client + self.assertIsNotNone( + client.get("/services/databaseServices/name/local_mysql_test") + ) + client.delete( f"/services/databaseServices/" f"{client.get('/services/databaseServices/name/local_mysql_test')['id']}" f"?hardDelete=true&recursive=true" ) - file_path = "/tmp/mysql_test" - with open(file_path) as ingestionFile: - ingestionData = ingestionFile.read() - self.assertEqual(ingestionData is not None, True) def test_execute_4xx(self): config_file = pathlib.Path("/tmp/mysql_test123") @@ -83,3 +85,28 @@ class WorkflowTest(TestCase): load_config_file(config_file) except ConfigurationError: self.assertRaises(ConfigurationError) + + def test_debug_not_show_authorization_headers(self): + current_dir = pathlib.Path(__file__).resolve().parent + config_file = current_dir.joinpath("mysql_test.yaml") + workflow_config = load_config_file(config_file) + workflow = Workflow.create(workflow_config) + workflow_config["workflowConfig"]["loggerLevel"] = "DEBUG" + authorization_pattern = re.compile( + r".*['\"]?Authorization['\"]?: ?['\"]?[^*]*$" + ) + with self.assertLogs(Loggers.OMETA.value, level="DEBUG") as logger: + workflow.execute() + self.assertFalse( + any(authorization_pattern.match(log) for log in logger.output), + "Authorization headers are displayed in the logs", + ) + workflow.stop() + + config = workflow.config.workflowConfig.openMetadataServerConfig + client = OpenMetadata(config).client + client.delete( + f"/services/databaseServices/" + f"{client.get('/services/databaseServices/name/local_mysql_test')['id']}" + f"?hardDelete=true&recursive=true" + )