chore: remove haystack dependencies from preview (#5569)

* provides preview's own implementation of expit

* copy the requests utility over into preview

* remove unnecessary types conversions

* fix mocking paths
This commit is contained in:
Massimiliano Pippi 2023-08-16 12:45:28 +02:00 committed by GitHub
parent 93b3400440
commit d4c1a0508a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 109 additions and 8 deletions

View File

@ -5,7 +5,7 @@ import json
import logging
from pathlib import Path
from haystack.utils import request_with_retry
from haystack.preview.utils import request_with_retry
from haystack.preview import component, Document
logger = logging.getLogger(__name__)

View File

@ -16,7 +16,7 @@ from haystack.preview.dataclasses import Document
from haystack.preview.document_stores.protocols import DuplicatePolicy, DocumentStore
from haystack.preview.document_stores.memory._filters import match
from haystack.preview.document_stores.errors import DuplicateDocumentError, MissingDocumentError
from haystack.utils.scipy_utils import expit
from haystack.preview.utils import expit
logger = logging.getLogger(__name__)
@ -250,7 +250,7 @@ class MemoryDocumentStore:
# get scores for the query against the corpus
docs_scores = bm25_scorer.get_scores(tokenized_query)
if scale_score:
docs_scores = [float(expit(np.asarray(score / SCALING_FACTOR))) for score in docs_scores]
docs_scores = [expit(float(score / SCALING_FACTOR)) for score in docs_scores]
# get the last top_k indexes and reverse them
top_docs_positions = np.argsort(docs_scores)[-top_k:][::-1]

View File

@ -0,0 +1,2 @@
from haystack.preview.utils.expit import expit
from haystack.preview.utils.requests_utils import request_with_retry

View File

@ -0,0 +1,5 @@
import numpy as np
def expit(x: float) -> float:
return 1 / (1 + np.exp(-x))

View File

@ -0,0 +1,94 @@
from typing import Optional, List
import logging
from tenacity import retry, wait_exponential, retry_if_exception_type, stop_after_attempt, before_log, after_log
import requests
logger = logging.getLogger(__file__)
def request_with_retry(
attempts: int = 3, status_codes_to_retry: Optional[List[int]] = None, **kwargs
) -> requests.Response:
"""
request_with_retry is a simple wrapper function that executes an HTTP request
with a configurable exponential backoff retry on failures.
All kwargs will be passed to ``requests.request``, so it accepts the same arguments.
Example Usage:
--------------
# Sending an HTTP request with default retry configs
res = request_with_retry(method="GET", url="https://example.com")
# Sending an HTTP request with custom number of attempts
res = request_with_retry(method="GET", url="https://example.com", attempts=10)
# Sending an HTTP request with custom HTTP codes to retry
res = request_with_retry(method="GET", url="https://example.com", status_codes_to_retry=[408, 503])
# Sending an HTTP request with custom timeout in seconds
res = request_with_retry(method="GET", url="https://example.com", timeout=5)
# Sending an HTTP request with custom authorization handling
class CustomAuth(requests.auth.AuthBase):
def __call__(self, r):
r.headers["authorization"] = "Basic <my_token_here>"
return r
res = request_with_retry(method="GET", url="https://example.com", auth=CustomAuth())
# All of the above combined
res = request_with_retry(
method="GET",
url="https://example.com",
auth=CustomAuth(),
attempts=10,
status_codes_to_retry=[408, 503],
timeout=5
)
# Sending a POST request
res = request_with_retry(method="POST", url="https://example.com", data={"key": "value"}, attempts=10)
# Retry all 5xx status codes
res = request_with_retry(method="GET", url="https://example.com", status_codes_to_retry=list(range(500, 600)))
:param attempts: Maximum number of attempts to retry the request, defaults to 3
:param status_codes_to_retry: List of HTTP status codes that will trigger a retry, defaults to [408, 418, 429, 503]:
- `408: Request Timeout`
- `418`
- `429: Too Many Requests`
- `503: Service Unavailable`
:param **kwargs: Optional arguments that ``request`` takes.
:return: :class:`Response <Response>` object
"""
if status_codes_to_retry is None:
status_codes_to_retry = [408, 418, 429, 503]
@retry(
reraise=True,
wait=wait_exponential(),
retry=retry_if_exception_type((requests.HTTPError, TimeoutError)),
stop=stop_after_attempt(attempts),
before=before_log(logger, logging.DEBUG),
after=after_log(logger, logging.DEBUG),
)
def run():
timeout = kwargs.pop("timeout", 10)
res = requests.request(**kwargs, timeout=timeout)
if res.status_code in status_codes_to_retry:
# We raise only for the status codes that must trigger a retry
res.raise_for_status()
return res
res = run()
# We raise here too in case the request failed with a status code that
# won't trigger a retry, this way the call will still cause an explicit exception
res.raise_for_status()
return res

View File

@ -43,7 +43,7 @@ class TestRemoteWhisperTranscriber(BaseTestComponent):
mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
comp = RemoteWhisperTranscriber(api_key="whatever")
with patch("haystack.utils.requests_utils.requests") as mocked_requests:
with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests:
mocked_requests.request.return_value = mock_response
result = comp.run(audio_files=[preview_samples_path / "audio" / "this is the content of the document.wav"])
@ -63,7 +63,7 @@ class TestRemoteWhisperTranscriber(BaseTestComponent):
mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
comp = RemoteWhisperTranscriber(api_key="whatever")
with patch("haystack.utils.requests_utils.requests") as mocked_requests:
with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests:
mocked_requests.request.return_value = mock_response
result = comp.run(
@ -89,7 +89,7 @@ class TestRemoteWhisperTranscriber(BaseTestComponent):
mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
comp = RemoteWhisperTranscriber(api_key="whatever")
with patch("haystack.utils.requests_utils.requests") as mocked_requests:
with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests:
mocked_requests.request.return_value = mock_response
with open(preview_samples_path / "audio" / "this is the content of the document.wav", "rb") as audio_stream:
@ -107,7 +107,7 @@ class TestRemoteWhisperTranscriber(BaseTestComponent):
mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
comp = RemoteWhisperTranscriber(api_key="whatever")
with patch("haystack.utils.requests_utils.requests") as mocked_requests:
with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests:
mocked_requests.request.return_value = mock_response
comp.run(audio_files=[preview_samples_path / "audio" / "this is the content of the document.wav"])
@ -128,7 +128,7 @@ class TestRemoteWhisperTranscriber(BaseTestComponent):
mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
comp = RemoteWhisperTranscriber(api_key="whatever")
with patch("haystack.utils.requests_utils.requests") as mocked_requests:
with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests:
mocked_requests.request.return_value = mock_response
comp.run(