fix(ingest/mode): add connection timeouts to avoid RemoteDisconnected errors (#11245)

This commit is contained in:
sagar-salvi-apptware 2024-09-24 00:58:30 +05:30 committed by GitHub
parent 3c1dcf99b0
commit d696dbef10
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 26 additions and 7 deletions

View File

@ -15,6 +15,8 @@ import tenacity
import yaml
from liquid import Template, Undefined
from pydantic import Field, validator
from requests.adapters import HTTPAdapter, Retry
from requests.exceptions import ConnectionError
from requests.models import HTTPBasicAuth, HTTPError
from sqllineage.runner import LineageRunner
from tenacity import retry_if_exception_type, stop_after_attempt, wait_exponential
@ -127,6 +129,10 @@ class ModeAPIConfig(ConfigModel):
max_attempts: int = Field(
default=5, description="Maximum number of attempts to retry before failing"
)
timeout: int = Field(
default=40,
description="Timout setting, how long to wait for the Mode rest api to send data before giving up",
)
class ModeConfig(StatefulIngestionConfigBase, DatasetLineageProviderConfigBase):
@ -299,7 +305,15 @@ class ModeSource(StatefulIngestionSourceBase):
self.report = ModeSourceReport()
self.ctx = ctx
self.session = requests.session()
self.session = requests.Session()
# Handling retry and backoff
retries = 3
backoff_factor = 10
retry = Retry(total=retries, backoff_factor=backoff_factor)
adapter = HTTPAdapter(max_retries=retry)
self.session.mount("http://", adapter)
self.session.mount("https://", adapter)
self.session.auth = HTTPBasicAuth(
self.config.token,
self.config.password.get_secret_value(),
@ -1469,15 +1483,16 @@ class ModeSource(StatefulIngestionSourceBase):
multiplier=self.config.api_options.retry_backoff_multiplier,
max=self.config.api_options.max_retry_interval,
),
retry=retry_if_exception_type(HTTPError429),
retry=retry_if_exception_type((HTTPError429, ConnectionError)),
stop=stop_after_attempt(self.config.api_options.max_attempts),
)
@r.wraps
def get_request():
try:
response = self.session.get(url)
response.raise_for_status()
response = self.session.get(
url, timeout=self.config.api_options.timeout
)
return response.json()
except HTTPError as http_error:
error_response = http_error.response

View File

@ -45,8 +45,12 @@ class MockResponse:
def json(self):
return self.json_data
def get(self, url):
def mount(self, prefix, adaptor):
return self
def get(self, url, timeout=40):
self.url = url
self.timeout = timeout
response_json_path = f"{test_resources_dir}/setup/{JSON_RESPONSE_MAP.get(url)}"
with open(response_json_path) as file:
data = json.loads(file.read())
@ -74,7 +78,7 @@ def mocked_requests_failure(*args, **kwargs):
@freeze_time(FROZEN_TIME)
def test_mode_ingest_success(pytestconfig, tmp_path):
with patch(
"datahub.ingestion.source.mode.requests.session",
"datahub.ingestion.source.mode.requests.Session",
side_effect=mocked_requests_sucess,
):
pipeline = Pipeline.create(
@ -111,7 +115,7 @@ def test_mode_ingest_success(pytestconfig, tmp_path):
@freeze_time(FROZEN_TIME)
def test_mode_ingest_failure(pytestconfig, tmp_path):
with patch(
"datahub.ingestion.source.mode.requests.session",
"datahub.ingestion.source.mode.requests.Session",
side_effect=mocked_requests_failure,
):
global test_resources_dir