fix(ingest/mode): Additional 404 handling and caching update (#13508)

This commit is contained in:
Michael Minichino 2025-05-14 09:09:40 -05:00 committed by GitHub
parent 5749f6f970
commit 8b4217f7fa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1467,11 +1467,18 @@ class ModeSource(StatefulIngestionSourceBase):
) )
yield reports_page yield reports_page
except ModeRequestError as e: except ModeRequestError as e:
self.report.report_failure( if isinstance(e, HTTPError) and e.response.status_code == 404:
title="Failed to Retrieve Reports for Space", self.report.report_warning(
message="Unable to retrieve reports for space token.", title="No Reports Found in Space",
context=f"Space Token: {space_token}, Error: {str(e)}", message="No reports were found in the space. It may have been recently deleted.",
) context=f"Space Token: {space_token}, Error: {str(e)}",
)
else:
self.report.report_failure(
title="Failed to Retrieve Reports for Space",
message="Unable to retrieve reports for space token.",
context=f"Space Token: {space_token}, Error: {str(e)}",
)
def _get_datasets(self, space_token: str) -> Iterator[List[dict]]: def _get_datasets(self, space_token: str) -> Iterator[List[dict]]:
""" """
@ -1490,11 +1497,18 @@ class ModeSource(StatefulIngestionSourceBase):
) )
yield dataset_page yield dataset_page
except ModeRequestError as e: except ModeRequestError as e:
self.report.report_failure( if isinstance(e, HTTPError) and e.response.status_code == 404:
title="Failed to Retrieve Datasets for Space", self.report.report_warning(
message=f"Unable to retrieve datasets for space token {space_token}.", title="No Datasets Found in Space",
context=f"Error: {str(e)}", message="No datasets were found in the space. It may have been recently deleted.",
) context=f"Space Token: {space_token}, Error: {str(e)}",
)
else:
self.report.report_failure(
title="Failed to Retrieve Datasets for Space",
message=f"Unable to retrieve datasets for space token {space_token}.",
context=f"Space Token: {space_token}, Error: {str(e)}",
)
def _get_queries(self, report_token: str) -> List[dict]: def _get_queries(self, report_token: str) -> List[dict]:
try: try:
@ -1555,13 +1569,18 @@ class ModeSource(StatefulIngestionSourceBase):
) )
return charts.get("_embedded", {}).get("charts", []) return charts.get("_embedded", {}).get("charts", [])
except ModeRequestError as e: except ModeRequestError as e:
self.report.report_failure( if isinstance(e, HTTPError) and e.response.status_code == 404:
title="Failed to Retrieve Charts", self.report.report_warning(
message="Unable to retrieve charts from Mode.", title="No Charts Found for Query",
context=f"Report Token: {report_token}, " message="No charts were found for the query. The query may have been recently deleted.",
f"Query token: {query_token}, " context=f"Report Token: {report_token}, Query Token: {query_token}, Error: {str(e)}",
f"Error: {str(e)}", )
) else:
self.report.report_failure(
title="Failed to Retrieve Charts",
message="Unable to retrieve charts from Mode.",
context=f"Report Token: {report_token}, Query Token: {query_token}, Error: {str(e)}",
)
return [] return []
def _get_paged_request_json( def _get_paged_request_json(
@ -1577,7 +1596,7 @@ class ModeSource(StatefulIngestionSourceBase):
yield data yield data
page += 1 page += 1
@lru_cache(maxsize=20480) @lru_cache(maxsize=None)
def _get_request_json(self, url: str) -> Dict: def _get_request_json(self, url: str) -> Dict:
r = tenacity.Retrying( r = tenacity.Retrying(
wait=wait_exponential( wait=wait_exponential(