chore: remove haystack dependencies from preview (#5569)

* provides preview's own implementation of expit * copy the requests utility over into preview * remove unnecessary types conversions * fix mocking paths
2026-01-08 13:06:29 +00:00 · 2023-08-16 12:45:28 +02:00 · 2023-08-16 12:45:28 +02:00 · d4c1a0508a
commit d4c1a0508a
parent 93b3400440
6 changed files with 109 additions and 8 deletions
--- a/haystack/preview/components/audio/whisper_remote.py
+++ b/haystack/preview/components/audio/whisper_remote.py
@ -5,7 +5,7 @@ import json
 import logging
 from pathlib import Path

-from haystack.utils import request_with_retry
+from haystack.preview.utils import request_with_retry
 from haystack.preview import component, Document

 logger = logging.getLogger(__name__)
--- a/haystack/preview/document_stores/memory/document_store.py
+++ b/haystack/preview/document_stores/memory/document_store.py
@ -16,7 +16,7 @@ from haystack.preview.dataclasses import Document
 from haystack.preview.document_stores.protocols import DuplicatePolicy, DocumentStore
 from haystack.preview.document_stores.memory._filters import match
 from haystack.preview.document_stores.errors import DuplicateDocumentError, MissingDocumentError
-from haystack.utils.scipy_utils import expit
+from haystack.preview.utils import expit

 logger = logging.getLogger(__name__)

@ -250,7 +250,7 @@ class MemoryDocumentStore:
        # get scores for the query against the corpus
        docs_scores = bm25_scorer.get_scores(tokenized_query)
        if scale_score:
-            docs_scores = [float(expit(np.asarray(score / SCALING_FACTOR))) for score in docs_scores]
+            docs_scores = [expit(float(score / SCALING_FACTOR)) for score in docs_scores]
        # get the last top_k indexes and reverse them
        top_docs_positions = np.argsort(docs_scores)[-top_k:][::-1]

--- a/haystack/preview/utils/init.py
+++ b/haystack/preview/utils/init.py
@ -0,0 +1,2 @@
+from haystack.preview.utils.expit import expit
+from haystack.preview.utils.requests_utils import request_with_retry
--- a/haystack/preview/utils/expit.py
+++ b/haystack/preview/utils/expit.py
@ -0,0 +1,5 @@
+import numpy as np
+
+
+def expit(x: float) -> float:
+    return 1 / (1 + np.exp(-x))
--- a/haystack/preview/utils/requests_utils.py
+++ b/haystack/preview/utils/requests_utils.py
@ -0,0 +1,94 @@
+from typing import Optional, List
+
+import logging
+
+from tenacity import retry, wait_exponential, retry_if_exception_type, stop_after_attempt, before_log, after_log
+import requests
+
+logger = logging.getLogger(__file__)
+
+
+def request_with_retry(
+    attempts: int = 3, status_codes_to_retry: Optional[List[int]] = None, **kwargs
+) -> requests.Response:
+    """
+    request_with_retry is a simple wrapper function that executes an HTTP request
+    with a configurable exponential backoff retry on failures.
+
+    All kwargs will be passed to ``requests.request``, so it accepts the same arguments.
+
+    Example Usage:
+    --------------
+
+    # Sending an HTTP request with default retry configs
+    res = request_with_retry(method="GET", url="https://example.com")
+
+    # Sending an HTTP request with custom number of attempts
+    res = request_with_retry(method="GET", url="https://example.com", attempts=10)
+
+    # Sending an HTTP request with custom HTTP codes to retry
+    res = request_with_retry(method="GET", url="https://example.com", status_codes_to_retry=[408, 503])
+
+    # Sending an HTTP request with custom timeout in seconds
+    res = request_with_retry(method="GET", url="https://example.com", timeout=5)
+
+    # Sending an HTTP request with custom authorization handling
+    class CustomAuth(requests.auth.AuthBase):
+        def __call__(self, r):
+            r.headers["authorization"] = "Basic <my_token_here>"
+            return r
+
+    res = request_with_retry(method="GET", url="https://example.com", auth=CustomAuth())
+
+    # All of the above combined
+    res = request_with_retry(
+        method="GET",
+        url="https://example.com",
+        auth=CustomAuth(),
+        attempts=10,
+        status_codes_to_retry=[408, 503],
+        timeout=5
+    )
+
+    # Sending a POST request
+    res = request_with_retry(method="POST", url="https://example.com", data={"key": "value"}, attempts=10)
+
+    # Retry all 5xx status codes
+    res = request_with_retry(method="GET", url="https://example.com", status_codes_to_retry=list(range(500, 600)))
+
+    :param attempts: Maximum number of attempts to retry the request, defaults to 3
+    :param status_codes_to_retry: List of HTTP status codes that will trigger a retry, defaults to [408, 418, 429, 503]:
+        - `408: Request Timeout`
+        - `418`
+        - `429: Too Many Requests`
+        - `503: Service Unavailable`
+    :param **kwargs: Optional arguments that ``request`` takes.
+    :return: :class:`Response <Response>` object
+    """
+
+    if status_codes_to_retry is None:
+        status_codes_to_retry = [408, 418, 429, 503]
+
+    @retry(
+        reraise=True,
+        wait=wait_exponential(),
+        retry=retry_if_exception_type((requests.HTTPError, TimeoutError)),
+        stop=stop_after_attempt(attempts),
+        before=before_log(logger, logging.DEBUG),
+        after=after_log(logger, logging.DEBUG),
+    )
+    def run():
+        timeout = kwargs.pop("timeout", 10)
+        res = requests.request(**kwargs, timeout=timeout)
+
+        if res.status_code in status_codes_to_retry:
+            # We raise only for the status codes that must trigger a retry
+            res.raise_for_status()
+
+        return res
+
+    res = run()
+    # We raise here too in case the request failed with a status code that
+    # won't trigger a retry, this way the call will still cause an explicit exception
+    res.raise_for_status()
+    return res
--- a/test/preview/components/audio/test_whisper_remote.py
+++ b/test/preview/components/audio/test_whisper_remote.py
@ -43,7 +43,7 @@ class TestRemoteWhisperTranscriber(BaseTestComponent):
        mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
        comp = RemoteWhisperTranscriber(api_key="whatever")

-        with patch("haystack.utils.requests_utils.requests") as mocked_requests:
+        with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests:
            mocked_requests.request.return_value = mock_response

            result = comp.run(audio_files=[preview_samples_path / "audio" / "this is the content of the document.wav"])
@ -63,7 +63,7 @@ class TestRemoteWhisperTranscriber(BaseTestComponent):
        mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
        comp = RemoteWhisperTranscriber(api_key="whatever")

-        with patch("haystack.utils.requests_utils.requests") as mocked_requests:
+        with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests:
            mocked_requests.request.return_value = mock_response

            result = comp.run(
@ -89,7 +89,7 @@ class TestRemoteWhisperTranscriber(BaseTestComponent):
        mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
        comp = RemoteWhisperTranscriber(api_key="whatever")

-        with patch("haystack.utils.requests_utils.requests") as mocked_requests:
+        with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests:
            mocked_requests.request.return_value = mock_response

            with open(preview_samples_path / "audio" / "this is the content of the document.wav", "rb") as audio_stream:
@ -107,7 +107,7 @@ class TestRemoteWhisperTranscriber(BaseTestComponent):
        mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
        comp = RemoteWhisperTranscriber(api_key="whatever")

-        with patch("haystack.utils.requests_utils.requests") as mocked_requests:
+        with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests:
            mocked_requests.request.return_value = mock_response

            comp.run(audio_files=[preview_samples_path / "audio" / "this is the content of the document.wav"])
@ -128,7 +128,7 @@ class TestRemoteWhisperTranscriber(BaseTestComponent):
        mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
        comp = RemoteWhisperTranscriber(api_key="whatever")

-        with patch("haystack.utils.requests_utils.requests") as mocked_requests:
+        with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests:
            mocked_requests.request.return_value = mock_response

            comp.run(