Fix databricks timeout issue (#10613)

* Fix databricks timeout issue

* Change based on comments

* Change timeOut word to connectionTimeout
This commit is contained in:
Milan Bariya 2023-03-16 20:43:49 +05:30 committed by GitHub
parent 1389270cc8
commit fadefff3e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 30 additions and 11 deletions

View File

@ -7,6 +7,7 @@ source:
databaseSchema: default
token: <databricks token>
hostPort: localhost:443
connectionTimeout: 120
connectionArguments:
http_path: <http path of databricks cluster>
sourceConfig:

View File

@ -92,15 +92,7 @@ def test_connection_steps(steps: List[TestConnectionStep]) -> TestConnectionResu
return test_connection_result
@timeout(seconds=120)
def test_connection_db_common(connection: Engine, steps=None) -> TestConnectionResult:
"""
Default implementation is the engine to test.
Test that we can connect to the source using the given engine
:param connection: Engine to test
:return: None or raise an exception if we cannot connect
"""
def test_connection_engine(connection: Engine, steps=None) -> TestConnectionResult:
try:
with connection.connect() as conn:
conn.execute(ConnTestFn())
@ -116,3 +108,16 @@ def test_connection_db_common(connection: Engine, steps=None) -> TestConnectionR
raise SourceConnectionException(msg) from exc
return None
def test_connection_db_common(
connection: Engine, steps=None, timeout_seconds: int = 120
) -> TestConnectionResult:
"""
Default implementation is the engine to test.
Test that we can connect to the source using the given engine
:param connection: Engine to test
:return: None or raise an exception if we cannot connect
"""
return timeout(timeout_seconds)(test_connection_engine)(connection, steps)

View File

@ -53,12 +53,13 @@ def get_connection(connection: DatabricksConnection) -> Engine:
)
def test_connection(engine: Engine, _) -> TestConnectionResult:
def test_connection(engine: Engine, service_connection) -> TestConnectionResult:
"""
Test connection
"""
def custom_executor(engine, statement):
cursor = engine.execute(statement)
return [item[0] for item in list(cursor.all())]
@ -91,4 +92,5 @@ def test_connection(engine: Engine, _) -> TestConnectionResult:
),
]
return test_connection_db_common(engine, steps)
timeout_seconds = service_connection.connectionTimeout
return test_connection_db_common(engine, steps, timeout_seconds)

View File

@ -76,6 +76,7 @@ source:
token: <databricks token>
hostPort: <databricks connection host & port>
httpPath: <http path of databricks cluster>
connectionTimeout: 120
sourceConfig:
config:
type: DatabaseMetadata
@ -120,6 +121,7 @@ workflowConfig:
- **hostPort**: Enter the fully qualified hostname and port number for your Databricks deployment in the Host and Port field.
- **token**: Generated Token to connect to Databricks.
- **httpPath**: Databricks compute resources URL.
- **connectionTimeout**: The maximum amount of time (in seconds) to wait for a successful connection to the data source. If the connection attempt takes longer than this timeout period, an error will be returned.
- **catalog**: Catalog of the data source(Example: hive_metastore). This is optional parameter, if you would like to restrict the metadata reading to a single catalog. When left blank, OpenMetadata Ingestion attempts to scan all the catalog.
- **databaseSchema**: DatabaseSchema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata Ingestion attempts to scan all the databaseSchema.
- **Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Databricks during the connection. These details must be added as Key-Value pairs.

View File

@ -76,6 +76,7 @@ source:
token: <databricks token>
hostPort: <databricks connection host & port>
httpPath: <http path of databricks cluster>
connectionTimeout: 120
sourceConfig:
config:
type: DatabaseMetadata
@ -120,6 +121,7 @@ workflowConfig:
- **hostPort**: Enter the fully qualified hostname and port number for your Databricks deployment in the Host and Port field.
- **token**: Generated Token to connect to Databricks.
- **httpPath**: Databricks compute resources URL.
- **connectionTimeout**: The maximum amount of time (in seconds) to wait for a successful connection to the data source. If the connection attempt takes longer than this timeout period, an error will be returned.
- **catalog**: Catalog of the data source(Example: hive_metastore). This is optional parameter, if you would like to restrict the metadata reading to a single catalog. When left blank, OpenMetadata Ingestion attempts to scan all the catalog.
- **databaseSchema**: DatabaseSchema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata Ingestion attempts to scan all the databaseSchema.
- **Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Databricks during the connection. These details must be added as Key-Value pairs.

View File

@ -150,6 +150,7 @@ the changes.
- **Host and Port**: Enter the fully qualified hostname and port number for your Databricks deployment in the Host and Port field.
- **Token**: Generated Token to connect to Databricks.
- **HTTP Path**: Databricks compute resources URL.
- **connectionTimeout**: The maximum amount of time (in seconds) to wait for a successful connection to the data source. If the connection attempt takes longer than this timeout period, an error will be returned.
- **Catalog**: Catalog of the data source(Example: hive_metastore). This is optional parameter, if you would like to restrict the metadata reading to a single catalog. When left blank, OpenMetadata Ingestion attempts to scan all the catalog.
- **DatabaseSchema**: databaseSchema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata Ingestion attempts to scan all the databaseSchema.
- **Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Databricks during the connection. These details must be added as Key-Value pairs.

View File

@ -58,6 +58,12 @@
"description": "databaseSchema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata Ingestion attempts to scan all the databaseSchema.",
"type": "string"
},
"connectionTimeout": {
"title": "Connection Timeout",
"description": "The maximum amount of time (in seconds) to wait for a successful connection to the data source. If the connection attempt takes longer than this timeout period, an error will be returned.",
"type": "integer",
"default": 120
},
"connectionOptions": {
"title": "Connection Options",
"$ref": "../connectionBasicType.json#/definitions/connectionOptions"