From fadefff3e158b1b9d3b8d1d08f281756c74c810a Mon Sep 17 00:00:00 2001 From: Milan Bariya <52292922+MilanBariya@users.noreply.github.com> Date: Thu, 16 Mar 2023 20:43:49 +0530 Subject: [PATCH] Fix databricks timeout issue (#10613) * Fix databricks timeout issue * Change based on comments * Change timeOut word to connectionTimeout --- .../examples/workflows/databricks.yaml | 1 + .../ingestion/connections/test_connections.py | 23 +++++++++++-------- .../source/database/databricks/connection.py | 6 +++-- .../connectors/database/databricks/airflow.md | 2 ++ .../connectors/database/databricks/cli.md | 2 ++ .../connectors/database/databricks/index.md | 1 + .../database/databricksConnection.json | 6 +++++ 7 files changed, 30 insertions(+), 11 deletions(-) diff --git a/ingestion/src/metadata/examples/workflows/databricks.yaml b/ingestion/src/metadata/examples/workflows/databricks.yaml index f4e334deea9..51fe49f6d94 100644 --- a/ingestion/src/metadata/examples/workflows/databricks.yaml +++ b/ingestion/src/metadata/examples/workflows/databricks.yaml @@ -7,6 +7,7 @@ source: databaseSchema: default token: hostPort: localhost:443 + connectionTimeout: 120 connectionArguments: http_path: sourceConfig: diff --git a/ingestion/src/metadata/ingestion/connections/test_connections.py b/ingestion/src/metadata/ingestion/connections/test_connections.py index c59e6d0f59e..9f27951d42f 100644 --- a/ingestion/src/metadata/ingestion/connections/test_connections.py +++ b/ingestion/src/metadata/ingestion/connections/test_connections.py @@ -92,15 +92,7 @@ def test_connection_steps(steps: List[TestConnectionStep]) -> TestConnectionResu return test_connection_result -@timeout(seconds=120) -def test_connection_db_common(connection: Engine, steps=None) -> TestConnectionResult: - """ - Default implementation is the engine to test. - - Test that we can connect to the source using the given engine - :param connection: Engine to test - :return: None or raise an exception if we cannot connect - """ +def test_connection_engine(connection: Engine, steps=None) -> TestConnectionResult: try: with connection.connect() as conn: conn.execute(ConnTestFn()) @@ -116,3 +108,16 @@ def test_connection_db_common(connection: Engine, steps=None) -> TestConnectionR raise SourceConnectionException(msg) from exc return None + + +def test_connection_db_common( + connection: Engine, steps=None, timeout_seconds: int = 120 +) -> TestConnectionResult: + """ + Default implementation is the engine to test. + + Test that we can connect to the source using the given engine + :param connection: Engine to test + :return: None or raise an exception if we cannot connect + """ + return timeout(timeout_seconds)(test_connection_engine)(connection, steps) diff --git a/ingestion/src/metadata/ingestion/source/database/databricks/connection.py b/ingestion/src/metadata/ingestion/source/database/databricks/connection.py index 1bb97e5219c..20b7c3c98a0 100644 --- a/ingestion/src/metadata/ingestion/source/database/databricks/connection.py +++ b/ingestion/src/metadata/ingestion/source/database/databricks/connection.py @@ -53,12 +53,13 @@ def get_connection(connection: DatabricksConnection) -> Engine: ) -def test_connection(engine: Engine, _) -> TestConnectionResult: +def test_connection(engine: Engine, service_connection) -> TestConnectionResult: """ Test connection """ def custom_executor(engine, statement): + cursor = engine.execute(statement) return [item[0] for item in list(cursor.all())] @@ -91,4 +92,5 @@ def test_connection(engine: Engine, _) -> TestConnectionResult: ), ] - return test_connection_db_common(engine, steps) + timeout_seconds = service_connection.connectionTimeout + return test_connection_db_common(engine, steps, timeout_seconds) diff --git a/openmetadata-docs/content/connectors/database/databricks/airflow.md b/openmetadata-docs/content/connectors/database/databricks/airflow.md index 5195277dfc5..c0d716e1e2a 100644 --- a/openmetadata-docs/content/connectors/database/databricks/airflow.md +++ b/openmetadata-docs/content/connectors/database/databricks/airflow.md @@ -76,6 +76,7 @@ source: token: hostPort: httpPath: + connectionTimeout: 120 sourceConfig: config: type: DatabaseMetadata @@ -120,6 +121,7 @@ workflowConfig: - **hostPort**: Enter the fully qualified hostname and port number for your Databricks deployment in the Host and Port field. - **token**: Generated Token to connect to Databricks. - **httpPath**: Databricks compute resources URL. +- **connectionTimeout**: The maximum amount of time (in seconds) to wait for a successful connection to the data source. If the connection attempt takes longer than this timeout period, an error will be returned. - **catalog**: Catalog of the data source(Example: hive_metastore). This is optional parameter, if you would like to restrict the metadata reading to a single catalog. When left blank, OpenMetadata Ingestion attempts to scan all the catalog. - **databaseSchema**: DatabaseSchema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata Ingestion attempts to scan all the databaseSchema. - **Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Databricks during the connection. These details must be added as Key-Value pairs. diff --git a/openmetadata-docs/content/connectors/database/databricks/cli.md b/openmetadata-docs/content/connectors/database/databricks/cli.md index eb6f557fa5d..46073e297e7 100644 --- a/openmetadata-docs/content/connectors/database/databricks/cli.md +++ b/openmetadata-docs/content/connectors/database/databricks/cli.md @@ -76,6 +76,7 @@ source: token: hostPort: httpPath: + connectionTimeout: 120 sourceConfig: config: type: DatabaseMetadata @@ -120,6 +121,7 @@ workflowConfig: - **hostPort**: Enter the fully qualified hostname and port number for your Databricks deployment in the Host and Port field. - **token**: Generated Token to connect to Databricks. - **httpPath**: Databricks compute resources URL. +- **connectionTimeout**: The maximum amount of time (in seconds) to wait for a successful connection to the data source. If the connection attempt takes longer than this timeout period, an error will be returned. - **catalog**: Catalog of the data source(Example: hive_metastore). This is optional parameter, if you would like to restrict the metadata reading to a single catalog. When left blank, OpenMetadata Ingestion attempts to scan all the catalog. - **databaseSchema**: DatabaseSchema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata Ingestion attempts to scan all the databaseSchema. - **Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Databricks during the connection. These details must be added as Key-Value pairs. diff --git a/openmetadata-docs/content/connectors/database/databricks/index.md b/openmetadata-docs/content/connectors/database/databricks/index.md index c835d61db1b..10b24136078 100644 --- a/openmetadata-docs/content/connectors/database/databricks/index.md +++ b/openmetadata-docs/content/connectors/database/databricks/index.md @@ -150,6 +150,7 @@ the changes. - **Host and Port**: Enter the fully qualified hostname and port number for your Databricks deployment in the Host and Port field. - **Token**: Generated Token to connect to Databricks. - **HTTP Path**: Databricks compute resources URL. +- **connectionTimeout**: The maximum amount of time (in seconds) to wait for a successful connection to the data source. If the connection attempt takes longer than this timeout period, an error will be returned. - **Catalog**: Catalog of the data source(Example: hive_metastore). This is optional parameter, if you would like to restrict the metadata reading to a single catalog. When left blank, OpenMetadata Ingestion attempts to scan all the catalog. - **DatabaseSchema**: databaseSchema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata Ingestion attempts to scan all the databaseSchema. - **Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Databricks during the connection. These details must be added as Key-Value pairs. diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/databricksConnection.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/databricksConnection.json index 1e1f999b0b1..b2f654fa344 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/databricksConnection.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/databricksConnection.json @@ -58,6 +58,12 @@ "description": "databaseSchema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata Ingestion attempts to scan all the databaseSchema.", "type": "string" }, + "connectionTimeout": { + "title": "Connection Timeout", + "description": "The maximum amount of time (in seconds) to wait for a successful connection to the data source. If the connection attempt takes longer than this timeout period, an error will be returned.", + "type": "integer", + "default": 120 + }, "connectionOptions": { "title": "Connection Options", "$ref": "../connectionBasicType.json#/definitions/connectionOptions"