diff --git a/haystack/components/websearch/searchapi.py b/haystack/components/websearch/searchapi.py index dad31f0ff..4f3042743 100644 --- a/haystack/components/websearch/searchapi.py +++ b/haystack/components/websearch/searchapi.py @@ -1,4 +1,3 @@ -import json from typing import Any, Dict, List, Optional, Union import requests @@ -21,8 +20,6 @@ class SearchApiWebSearch: """ Uses [SearchApi](https://www.searchapi.io/) to search the web for relevant documents. - See the [SearchApi website](https://www.searchapi.io/) for more details. - Usage example: ```python from haystack.components.websearch import SearchApiWebSearch @@ -50,12 +47,17 @@ class SearchApiWebSearch: :param search_params: Additional parameters passed to the SearchApi API. For example, you can set 'num' to 100 to increase the number of search results. See the [SearchApi website](https://www.searchapi.io/) for more details. + + The default search engine is Google, however, users can change it by setting the `engine` + parameter in the `search_params`. """ self.api_key = api_key self.top_k = top_k self.allowed_domains = allowed_domains self.search_params = search_params or {} + if "engine" not in self.search_params: + self.search_params["engine"] = "google" # Ensure that the API key is resolved. _ = self.api_key.resolve_value() @@ -101,10 +103,8 @@ class SearchApiWebSearch: :raises SearchApiError: If an error occurs while querying the SearchApi API. """ query_prepend = "OR ".join(f"site:{domain} " for domain in self.allowed_domains) if self.allowed_domains else "" - - payload = json.dumps({"q": query_prepend + " " + query, **self.search_params}) + payload = {"q": query_prepend + " " + query, **self.search_params} headers = {"Authorization": f"Bearer {self.api_key.resolve_value()}", "X-SearchApi-Source": "Haystack"} - try: response = requests.get(SEARCHAPI_BASE_URL, headers=headers, params=payload, timeout=90) response.raise_for_status() # Will raise an HTTPError for bad responses diff --git a/releasenotes/notes/update-searchapi-new-format-74d8794a8a6f5581.yaml b/releasenotes/notes/update-searchapi-new-format-74d8794a8a6f5581.yaml new file mode 100644 index 000000000..7c6a40d72 --- /dev/null +++ b/releasenotes/notes/update-searchapi-new-format-74d8794a8a6f5581.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Updated the SearchApiWebSearch component with new search format and allowed users to specify the search engine via the `engine` + parameter in `search_params`. The default search engine is Google, making it easier for users to tailor their web searches. diff --git a/test/components/websearch/test_searchapi.py b/test/components/websearch/test_searchapi.py index 48ee677b6..75de66bde 100644 --- a/test/components/websearch/test_searchapi.py +++ b/test/components/websearch/test_searchapi.py @@ -1,13 +1,12 @@ import os from unittest.mock import Mock, patch -from haystack.utils.auth import Secret import pytest -from requests import Timeout, RequestException, HTTPError +from requests import HTTPError, RequestException, Timeout from haystack import Document from haystack.components.websearch.searchapi import SearchApiError, SearchApiWebSearch - +from haystack.utils.auth import Secret EXAMPLE_SEARCHAPI_RESPONSE = { "search_metadata": { @@ -385,7 +384,7 @@ class TestSearchApiSearchAPI: "api_key": {"env_vars": ["SEARCHAPI_API_KEY"], "strict": True, "type": "env_var"}, "top_k": 10, "allowed_domains": ["testdomain.com"], - "search_params": {"param": "test params"}, + "search_params": {"param": "test params", "engine": "google"}, }, }