mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-17 21:17:43 +00:00
feat: Update searchapi format, default to Google, allow search engine selection (#7453)
* Update searchapi payload * Add release note * PR feedback - Stefano * Adjust unit test for mandatory engine search_param field
This commit is contained in:
parent
42c5b7af32
commit
d83af92270
@ -1,4 +1,3 @@
|
||||
import json
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import requests
|
||||
@ -21,8 +20,6 @@ class SearchApiWebSearch:
|
||||
"""
|
||||
Uses [SearchApi](https://www.searchapi.io/) to search the web for relevant documents.
|
||||
|
||||
See the [SearchApi website](https://www.searchapi.io/) for more details.
|
||||
|
||||
Usage example:
|
||||
```python
|
||||
from haystack.components.websearch import SearchApiWebSearch
|
||||
@ -50,12 +47,17 @@ class SearchApiWebSearch:
|
||||
:param search_params: Additional parameters passed to the SearchApi API.
|
||||
For example, you can set 'num' to 100 to increase the number of search results.
|
||||
See the [SearchApi website](https://www.searchapi.io/) for more details.
|
||||
|
||||
The default search engine is Google, however, users can change it by setting the `engine`
|
||||
parameter in the `search_params`.
|
||||
"""
|
||||
|
||||
self.api_key = api_key
|
||||
self.top_k = top_k
|
||||
self.allowed_domains = allowed_domains
|
||||
self.search_params = search_params or {}
|
||||
if "engine" not in self.search_params:
|
||||
self.search_params["engine"] = "google"
|
||||
|
||||
# Ensure that the API key is resolved.
|
||||
_ = self.api_key.resolve_value()
|
||||
@ -101,10 +103,8 @@ class SearchApiWebSearch:
|
||||
:raises SearchApiError: If an error occurs while querying the SearchApi API.
|
||||
"""
|
||||
query_prepend = "OR ".join(f"site:{domain} " for domain in self.allowed_domains) if self.allowed_domains else ""
|
||||
|
||||
payload = json.dumps({"q": query_prepend + " " + query, **self.search_params})
|
||||
payload = {"q": query_prepend + " " + query, **self.search_params}
|
||||
headers = {"Authorization": f"Bearer {self.api_key.resolve_value()}", "X-SearchApi-Source": "Haystack"}
|
||||
|
||||
try:
|
||||
response = requests.get(SEARCHAPI_BASE_URL, headers=headers, params=payload, timeout=90)
|
||||
response.raise_for_status() # Will raise an HTTPError for bad responses
|
||||
|
@ -0,0 +1,5 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Updated the SearchApiWebSearch component with new search format and allowed users to specify the search engine via the `engine`
|
||||
parameter in `search_params`. The default search engine is Google, making it easier for users to tailor their web searches.
|
@ -1,13 +1,12 @@
|
||||
import os
|
||||
from unittest.mock import Mock, patch
|
||||
from haystack.utils.auth import Secret
|
||||
|
||||
import pytest
|
||||
from requests import Timeout, RequestException, HTTPError
|
||||
from requests import HTTPError, RequestException, Timeout
|
||||
|
||||
from haystack import Document
|
||||
from haystack.components.websearch.searchapi import SearchApiError, SearchApiWebSearch
|
||||
|
||||
from haystack.utils.auth import Secret
|
||||
|
||||
EXAMPLE_SEARCHAPI_RESPONSE = {
|
||||
"search_metadata": {
|
||||
@ -385,7 +384,7 @@ class TestSearchApiSearchAPI:
|
||||
"api_key": {"env_vars": ["SEARCHAPI_API_KEY"], "strict": True, "type": "env_var"},
|
||||
"top_k": 10,
|
||||
"allowed_domains": ["testdomain.com"],
|
||||
"search_params": {"param": "test params"},
|
||||
"search_params": {"param": "test params", "engine": "google"},
|
||||
},
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user