Move WebRetriever's new init parameter to last parameter position (#5673)

2025-11-03 11:19:57 +00:00 · 2023-08-29 17:46:12 +02:00 · 2023-08-29 17:46:12 +02:00 · a9b8fd9658
commit a9b8fd9658
parent fbc1951e74
1 changed files with 6 additions and 5 deletions
--- a/haystack/nodes/retriever/web.py
+++ b/haystack/nodes/retriever/web.py
@ -50,8 +50,6 @@ class WebRetriever(BaseRetriever):
        self,
        api_key: str,
        search_engine_provider: Union[str, SearchEngine] = "SerperDev",
-        allowed_domains: Optional[List[str]] = None,
-        link_content_fetcher: Optional[LinkContentFetcher] = None,
        top_search_results: Optional[int] = 10,
        top_k: Optional[int] = 5,
        mode: Literal["snippets", "raw_documents", "preprocessed_documents"] = "snippets",
@ -60,13 +58,12 @@ class WebRetriever(BaseRetriever):
        cache_index: Optional[str] = None,
        cache_headers: Optional[Dict[str, str]] = None,
        cache_time: int = 1 * 24 * 60 * 60,
+        allowed_domains: Optional[List[str]] = None,
+        link_content_fetcher: Optional[LinkContentFetcher] = None,
    ):
        """
        :param api_key: API key for the search engine provider.
        :param search_engine_provider: Name of the search engine provider class, see `providers.py` for a list of supported providers.
-        :param allowed_domains: List of domains to restrict the search to. If not provided, the search is unrestricted.
-        :param link_content_fetcher: LinkContentFetcher to be used to fetch the content from the links. If not provided,
-        the default LinkContentFetcher is used.
        :param top_search_results: Number of top search results to be retrieved.
        :param top_k: Top k documents to be returned by the retriever.
        :param mode: Whether to return snippets, raw documents, or preprocessed documents. Snippets are the default.
@ -75,6 +72,10 @@ class WebRetriever(BaseRetriever):
        :param cache_index: Index name to be used to cache search results.
        :param cache_headers: Headers to be used to cache search results.
        :param cache_time: Time in seconds to cache search results. Defaults to 24 hours.
+        :param allowed_domains: List of domains to restrict the search to. If not provided, the search is unrestricted.
+        :param link_content_fetcher: LinkContentFetcher to be used to fetch the content from the links. If not provided,
+        the default LinkContentFetcher is used.
+
        """
        super().__init__()
        self.web_search = WebSearch(