diff --git a/haystack/preview/components/audio/whisper_remote.py b/haystack/preview/components/audio/whisper_remote.py index 3b515e867..6752ce419 100644 --- a/haystack/preview/components/audio/whisper_remote.py +++ b/haystack/preview/components/audio/whisper_remote.py @@ -5,7 +5,7 @@ import json import logging from pathlib import Path -from haystack.utils import request_with_retry +from haystack.preview.utils import request_with_retry from haystack.preview import component, Document logger = logging.getLogger(__name__) diff --git a/haystack/preview/document_stores/memory/document_store.py b/haystack/preview/document_stores/memory/document_store.py index 50949ff7e..87a928674 100644 --- a/haystack/preview/document_stores/memory/document_store.py +++ b/haystack/preview/document_stores/memory/document_store.py @@ -16,7 +16,7 @@ from haystack.preview.dataclasses import Document from haystack.preview.document_stores.protocols import DuplicatePolicy, DocumentStore from haystack.preview.document_stores.memory._filters import match from haystack.preview.document_stores.errors import DuplicateDocumentError, MissingDocumentError -from haystack.utils.scipy_utils import expit +from haystack.preview.utils import expit logger = logging.getLogger(__name__) @@ -250,7 +250,7 @@ class MemoryDocumentStore: # get scores for the query against the corpus docs_scores = bm25_scorer.get_scores(tokenized_query) if scale_score: - docs_scores = [float(expit(np.asarray(score / SCALING_FACTOR))) for score in docs_scores] + docs_scores = [expit(float(score / SCALING_FACTOR)) for score in docs_scores] # get the last top_k indexes and reverse them top_docs_positions = np.argsort(docs_scores)[-top_k:][::-1] diff --git a/haystack/preview/utils/__init__.py b/haystack/preview/utils/__init__.py new file mode 100644 index 000000000..adf41e033 --- /dev/null +++ b/haystack/preview/utils/__init__.py @@ -0,0 +1,2 @@ +from haystack.preview.utils.expit import expit +from haystack.preview.utils.requests_utils import request_with_retry diff --git a/haystack/preview/utils/expit.py b/haystack/preview/utils/expit.py new file mode 100644 index 000000000..0aaaa563c --- /dev/null +++ b/haystack/preview/utils/expit.py @@ -0,0 +1,5 @@ +import numpy as np + + +def expit(x: float) -> float: + return 1 / (1 + np.exp(-x)) diff --git a/haystack/preview/utils/requests_utils.py b/haystack/preview/utils/requests_utils.py new file mode 100644 index 000000000..245d7737f --- /dev/null +++ b/haystack/preview/utils/requests_utils.py @@ -0,0 +1,94 @@ +from typing import Optional, List + +import logging + +from tenacity import retry, wait_exponential, retry_if_exception_type, stop_after_attempt, before_log, after_log +import requests + +logger = logging.getLogger(__file__) + + +def request_with_retry( + attempts: int = 3, status_codes_to_retry: Optional[List[int]] = None, **kwargs +) -> requests.Response: + """ + request_with_retry is a simple wrapper function that executes an HTTP request + with a configurable exponential backoff retry on failures. + + All kwargs will be passed to ``requests.request``, so it accepts the same arguments. + + Example Usage: + -------------- + + # Sending an HTTP request with default retry configs + res = request_with_retry(method="GET", url="https://example.com") + + # Sending an HTTP request with custom number of attempts + res = request_with_retry(method="GET", url="https://example.com", attempts=10) + + # Sending an HTTP request with custom HTTP codes to retry + res = request_with_retry(method="GET", url="https://example.com", status_codes_to_retry=[408, 503]) + + # Sending an HTTP request with custom timeout in seconds + res = request_with_retry(method="GET", url="https://example.com", timeout=5) + + # Sending an HTTP request with custom authorization handling + class CustomAuth(requests.auth.AuthBase): + def __call__(self, r): + r.headers["authorization"] = "Basic " + return r + + res = request_with_retry(method="GET", url="https://example.com", auth=CustomAuth()) + + # All of the above combined + res = request_with_retry( + method="GET", + url="https://example.com", + auth=CustomAuth(), + attempts=10, + status_codes_to_retry=[408, 503], + timeout=5 + ) + + # Sending a POST request + res = request_with_retry(method="POST", url="https://example.com", data={"key": "value"}, attempts=10) + + # Retry all 5xx status codes + res = request_with_retry(method="GET", url="https://example.com", status_codes_to_retry=list(range(500, 600))) + + :param attempts: Maximum number of attempts to retry the request, defaults to 3 + :param status_codes_to_retry: List of HTTP status codes that will trigger a retry, defaults to [408, 418, 429, 503]: + - `408: Request Timeout` + - `418` + - `429: Too Many Requests` + - `503: Service Unavailable` + :param **kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + """ + + if status_codes_to_retry is None: + status_codes_to_retry = [408, 418, 429, 503] + + @retry( + reraise=True, + wait=wait_exponential(), + retry=retry_if_exception_type((requests.HTTPError, TimeoutError)), + stop=stop_after_attempt(attempts), + before=before_log(logger, logging.DEBUG), + after=after_log(logger, logging.DEBUG), + ) + def run(): + timeout = kwargs.pop("timeout", 10) + res = requests.request(**kwargs, timeout=timeout) + + if res.status_code in status_codes_to_retry: + # We raise only for the status codes that must trigger a retry + res.raise_for_status() + + return res + + res = run() + # We raise here too in case the request failed with a status code that + # won't trigger a retry, this way the call will still cause an explicit exception + res.raise_for_status() + return res diff --git a/test/preview/components/audio/test_whisper_remote.py b/test/preview/components/audio/test_whisper_remote.py index c7eaa8daf..c5b251745 100644 --- a/test/preview/components/audio/test_whisper_remote.py +++ b/test/preview/components/audio/test_whisper_remote.py @@ -43,7 +43,7 @@ class TestRemoteWhisperTranscriber(BaseTestComponent): mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}' comp = RemoteWhisperTranscriber(api_key="whatever") - with patch("haystack.utils.requests_utils.requests") as mocked_requests: + with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests: mocked_requests.request.return_value = mock_response result = comp.run(audio_files=[preview_samples_path / "audio" / "this is the content of the document.wav"]) @@ -63,7 +63,7 @@ class TestRemoteWhisperTranscriber(BaseTestComponent): mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}' comp = RemoteWhisperTranscriber(api_key="whatever") - with patch("haystack.utils.requests_utils.requests") as mocked_requests: + with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests: mocked_requests.request.return_value = mock_response result = comp.run( @@ -89,7 +89,7 @@ class TestRemoteWhisperTranscriber(BaseTestComponent): mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}' comp = RemoteWhisperTranscriber(api_key="whatever") - with patch("haystack.utils.requests_utils.requests") as mocked_requests: + with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests: mocked_requests.request.return_value = mock_response with open(preview_samples_path / "audio" / "this is the content of the document.wav", "rb") as audio_stream: @@ -107,7 +107,7 @@ class TestRemoteWhisperTranscriber(BaseTestComponent): mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}' comp = RemoteWhisperTranscriber(api_key="whatever") - with patch("haystack.utils.requests_utils.requests") as mocked_requests: + with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests: mocked_requests.request.return_value = mock_response comp.run(audio_files=[preview_samples_path / "audio" / "this is the content of the document.wav"]) @@ -128,7 +128,7 @@ class TestRemoteWhisperTranscriber(BaseTestComponent): mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}' comp = RemoteWhisperTranscriber(api_key="whatever") - with patch("haystack.utils.requests_utils.requests") as mocked_requests: + with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests: mocked_requests.request.return_value = mock_response comp.run(