feat: Update searchapi format, default to Google, allow search engine selection (#7453)

* Update searchapi payload

* Add release note

* PR feedback - Stefano

* Adjust unit test for mandatory engine search_param field
This commit is contained in:
Vladimir Blagojevic 2024-04-03 10:48:50 +02:00 committed by GitHub
parent 42c5b7af32
commit d83af92270
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 14 additions and 10 deletions

View File

@ -1,4 +1,3 @@
import json
from typing import Any, Dict, List, Optional, Union
import requests
@ -21,8 +20,6 @@ class SearchApiWebSearch:
"""
Uses [SearchApi](https://www.searchapi.io/) to search the web for relevant documents.
See the [SearchApi website](https://www.searchapi.io/) for more details.
Usage example:
```python
from haystack.components.websearch import SearchApiWebSearch
@ -50,12 +47,17 @@ class SearchApiWebSearch:
:param search_params: Additional parameters passed to the SearchApi API.
For example, you can set 'num' to 100 to increase the number of search results.
See the [SearchApi website](https://www.searchapi.io/) for more details.
The default search engine is Google, however, users can change it by setting the `engine`
parameter in the `search_params`.
"""
self.api_key = api_key
self.top_k = top_k
self.allowed_domains = allowed_domains
self.search_params = search_params or {}
if "engine" not in self.search_params:
self.search_params["engine"] = "google"
# Ensure that the API key is resolved.
_ = self.api_key.resolve_value()
@ -101,10 +103,8 @@ class SearchApiWebSearch:
:raises SearchApiError: If an error occurs while querying the SearchApi API.
"""
query_prepend = "OR ".join(f"site:{domain} " for domain in self.allowed_domains) if self.allowed_domains else ""
payload = json.dumps({"q": query_prepend + " " + query, **self.search_params})
payload = {"q": query_prepend + " " + query, **self.search_params}
headers = {"Authorization": f"Bearer {self.api_key.resolve_value()}", "X-SearchApi-Source": "Haystack"}
try:
response = requests.get(SEARCHAPI_BASE_URL, headers=headers, params=payload, timeout=90)
response.raise_for_status() # Will raise an HTTPError for bad responses

View File

@ -0,0 +1,5 @@
---
fixes:
- |
Updated the SearchApiWebSearch component with new search format and allowed users to specify the search engine via the `engine`
parameter in `search_params`. The default search engine is Google, making it easier for users to tailor their web searches.

View File

@ -1,13 +1,12 @@
import os
from unittest.mock import Mock, patch
from haystack.utils.auth import Secret
import pytest
from requests import Timeout, RequestException, HTTPError
from requests import HTTPError, RequestException, Timeout
from haystack import Document
from haystack.components.websearch.searchapi import SearchApiError, SearchApiWebSearch
from haystack.utils.auth import Secret
EXAMPLE_SEARCHAPI_RESPONSE = {
"search_metadata": {
@ -385,7 +384,7 @@ class TestSearchApiSearchAPI:
"api_key": {"env_vars": ["SEARCHAPI_API_KEY"], "strict": True, "type": "env_var"},
"top_k": 10,
"allowed_domains": ["testdomain.com"],
"search_params": {"param": "test params"},
"search_params": {"param": "test params", "engine": "google"},
},
}