mirror of
https://github.com/datahub-project/datahub.git
synced 2025-06-27 05:03:31 +00:00
fix(ingest/mode): add connection timeouts to avoid RemoteDisconnected errors (#11245)
This commit is contained in:
parent
3c1dcf99b0
commit
d696dbef10
@ -15,6 +15,8 @@ import tenacity
|
||||
import yaml
|
||||
from liquid import Template, Undefined
|
||||
from pydantic import Field, validator
|
||||
from requests.adapters import HTTPAdapter, Retry
|
||||
from requests.exceptions import ConnectionError
|
||||
from requests.models import HTTPBasicAuth, HTTPError
|
||||
from sqllineage.runner import LineageRunner
|
||||
from tenacity import retry_if_exception_type, stop_after_attempt, wait_exponential
|
||||
@ -127,6 +129,10 @@ class ModeAPIConfig(ConfigModel):
|
||||
max_attempts: int = Field(
|
||||
default=5, description="Maximum number of attempts to retry before failing"
|
||||
)
|
||||
timeout: int = Field(
|
||||
default=40,
|
||||
description="Timout setting, how long to wait for the Mode rest api to send data before giving up",
|
||||
)
|
||||
|
||||
|
||||
class ModeConfig(StatefulIngestionConfigBase, DatasetLineageProviderConfigBase):
|
||||
@ -299,7 +305,15 @@ class ModeSource(StatefulIngestionSourceBase):
|
||||
self.report = ModeSourceReport()
|
||||
self.ctx = ctx
|
||||
|
||||
self.session = requests.session()
|
||||
self.session = requests.Session()
|
||||
# Handling retry and backoff
|
||||
retries = 3
|
||||
backoff_factor = 10
|
||||
retry = Retry(total=retries, backoff_factor=backoff_factor)
|
||||
adapter = HTTPAdapter(max_retries=retry)
|
||||
self.session.mount("http://", adapter)
|
||||
self.session.mount("https://", adapter)
|
||||
|
||||
self.session.auth = HTTPBasicAuth(
|
||||
self.config.token,
|
||||
self.config.password.get_secret_value(),
|
||||
@ -1469,15 +1483,16 @@ class ModeSource(StatefulIngestionSourceBase):
|
||||
multiplier=self.config.api_options.retry_backoff_multiplier,
|
||||
max=self.config.api_options.max_retry_interval,
|
||||
),
|
||||
retry=retry_if_exception_type(HTTPError429),
|
||||
retry=retry_if_exception_type((HTTPError429, ConnectionError)),
|
||||
stop=stop_after_attempt(self.config.api_options.max_attempts),
|
||||
)
|
||||
|
||||
@r.wraps
|
||||
def get_request():
|
||||
try:
|
||||
response = self.session.get(url)
|
||||
response.raise_for_status()
|
||||
response = self.session.get(
|
||||
url, timeout=self.config.api_options.timeout
|
||||
)
|
||||
return response.json()
|
||||
except HTTPError as http_error:
|
||||
error_response = http_error.response
|
||||
|
@ -45,8 +45,12 @@ class MockResponse:
|
||||
def json(self):
|
||||
return self.json_data
|
||||
|
||||
def get(self, url):
|
||||
def mount(self, prefix, adaptor):
|
||||
return self
|
||||
|
||||
def get(self, url, timeout=40):
|
||||
self.url = url
|
||||
self.timeout = timeout
|
||||
response_json_path = f"{test_resources_dir}/setup/{JSON_RESPONSE_MAP.get(url)}"
|
||||
with open(response_json_path) as file:
|
||||
data = json.loads(file.read())
|
||||
@ -74,7 +78,7 @@ def mocked_requests_failure(*args, **kwargs):
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_mode_ingest_success(pytestconfig, tmp_path):
|
||||
with patch(
|
||||
"datahub.ingestion.source.mode.requests.session",
|
||||
"datahub.ingestion.source.mode.requests.Session",
|
||||
side_effect=mocked_requests_sucess,
|
||||
):
|
||||
pipeline = Pipeline.create(
|
||||
@ -111,7 +115,7 @@ def test_mode_ingest_success(pytestconfig, tmp_path):
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_mode_ingest_failure(pytestconfig, tmp_path):
|
||||
with patch(
|
||||
"datahub.ingestion.source.mode.requests.session",
|
||||
"datahub.ingestion.source.mode.requests.Session",
|
||||
side_effect=mocked_requests_failure,
|
||||
):
|
||||
global test_resources_dir
|
||||
|
Loading…
x
Reference in New Issue
Block a user