fix: WebRetriever top_k is ignored in a pipeline (#5106)

* Initial changes * Add WebSearch, WebRetriever top_k unit tests * Add exact integration test that failed Tuana * PR review
2026-01-08 04:56:45 +00:00 · 2023-06-09 10:42:37 +02:00 · 2023-06-09 10:42:37 +02:00 · 0cc9ce7522
commit 0cc9ce7522
parent d8a4f20379
6 changed files with 320 additions and 155 deletions
--- a/haystack/nodes/retriever/web.py
+++ b/haystack/nodes/retriever/web.py
@ -198,7 +198,7 @@ class WebRetriever(BaseRetriever):
            search_results, _ = self.web_search.run(query=query)
            search_results = search_results["documents"]
            if self.mode == "snippets":
-                return search_results  # type: ignore
+                return search_results[:top_k]  # type: ignore

            links: List[SearchResult] = [
                SearchResult(r.meta["link"], r.meta.get("score", None), r.meta.get("position", None))
--- a/haystack/nodes/search_engine/web.py
+++ b/haystack/nodes/search_engine/web.py
@ -63,17 +63,25 @@ class WebSearch(BaseComponent):
        labels: Optional[MultiLabel] = None,
        documents: Optional[List[Document]] = None,
        meta: Optional[dict] = None,
+        top_k: Optional[int] = None,
    ) -> Tuple[Dict, str]:
        """
-        Search the search engine for the given query and return the results. Only the query parameter is used.
+        Search the search engine for the given query and return the results. Only the query parameter and the top_k
+        parameter are used.
        :param query: The query to search for.
+        :param file_paths: Not used.
+        :param labels: Not used.
+        :param documents: Not used.
+        :param meta: Not used.
+        :param top_k: return only the top_k results. If None, the top_k value passed to the constructor is used.
+

        :return: List of search results as documents.
        """
        # query is a required parameter for search, we need to keep the signature of run() the same as in other nodes
        if not query:
            raise ValueError("WebSearch run requires the `query` parameter")
-        return {"documents": self.search_engine.search(query)}, "output_1"
+        return {"documents": self.search_engine.search(query, top_k=top_k)}, "output_1"

    def run_batch(
        self,
--- a/test/nodes/conftest.py
+++ b/test/nodes/conftest.py
@ -1,4 +1,5 @@
 from typing import List
+from unittest.mock import patch, Mock
 from uuid import UUID

 from numpy import loadtxt
@ -91,3 +92,102 @@ def indexing_document_classifier():
        batch_size=16,
        classification_field="class_field",
    )
+
+
+example_serperdev_response = {
+    "searchParameters": {
+        "q": "Who is the boyfriend of Olivia Wilde?",
+        "gl": "us",
+        "hl": "en",
+        "autocorrect": True,
+        "type": "search",
+    },
+    "organic": [
+        {
+            "title": "Olivia Wilde embraces Jason Sudeikis amid custody battle, Harry Styles split - Page Six",
+            "link": "https://pagesix.com/2023/01/29/olivia-wilde-hugs-it-out-with-jason-sudeikis-after-harry-styles-split/",
+            "snippet": "Looks like Olivia Wilde and Jason Sudeikis are starting 2023 on good terms. Amid their highly publicized custody battle – and the actress' ...",
+            "date": "Jan 29, 2023",
+            "position": 1,
+        },
+        {
+            "title": "Olivia Wilde Is 'Quietly Dating' Again Following Harry Styles Split: 'He Makes Her Happy'",
+            "link": "https://www.yahoo.com/now/olivia-wilde-quietly-dating-again-183844364.html",
+            "snippet": "Olivia Wilde is “quietly dating again” following her November 2022 split from Harry Styles, a source exclusively tells Life & Style.",
+            "date": "Feb 10, 2023",
+            "position": 2,
+        },
+        {
+            "title": "Olivia Wilde and Harry Styles' Relationship Timeline: The Way They Were - Us Weekly",
+            "link": "https://www.usmagazine.com/celebrity-news/pictures/olivia-wilde-and-harry-styles-relationship-timeline/",
+            "snippet": "Olivia Wilde started dating Harry Styles after ending her years-long engagement to Jason Sudeikis — see their relationship timeline.",
+            "date": "Mar 10, 2023",
+            "imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSgTcalNFvptTbYBiDXX55s8yCGfn6F1qbed9DAN16LvynTr9GayK5SPmY&s",
+            "position": 3,
+        },
+        {
+            "title": "Olivia Wilde Is 'Ready to Date Again' After Harry Styles Split - Us Weekly",
+            "link": "https://www.usmagazine.com/celebrity-news/news/olivia-wilde-is-ready-to-date-again-after-harry-styles-split/",
+            "snippet": "Ready for love! Olivia Wilde is officially back on the dating scene following her split from her ex-boyfriend, Harry Styles.",
+            "date": "Mar 1, 2023",
+            "imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRCRAeRy5sVE631ZctzbzuOF70xkIOHaTvh2K7dYvdiVBwALiKrIjpscok&s",
+            "position": 4,
+        },
+        {
+            "title": "Harry Styles and Olivia Wilde's Definitive Relationship Timeline - Harper's Bazaar",
+            "link": "https://www.harpersbazaar.com/celebrity/latest/a35172115/harry-styles-olivia-wilde-relationship-timeline/",
+            "snippet": "November 2020: News breaks about Olivia splitting from fiancé Jason Sudeikis. ... In mid-November, news breaks of Olivia Wilde's split from Jason ...",
+            "date": "Feb 23, 2023",
+            "imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRRqw3fvZOIGHEepxCc7yFAWYsS_v_1H6X-4nxyFJxdfRuFQw_BrI6JVzI&s",
+            "position": 5,
+        },
+        {
+            "title": "Harry Styles and Olivia Wilde's Relationship Timeline - People",
+            "link": "https://people.com/music/harry-styles-olivia-wilde-relationship-timeline/",
+            "snippet": "Harry Styles and Olivia Wilde first met on the set of Don't Worry Darling and stepped out as a couple in January 2021. Relive all their biggest relationship ...",
+            "position": 6,
+        },
+        {
+            "title": "Jason Sudeikis and Olivia Wilde's Relationship Timeline - People",
+            "link": "https://people.com/movies/jason-sudeikis-olivia-wilde-relationship-timeline/",
+            "snippet": "Jason Sudeikis and Olivia Wilde ended their engagement of seven years in 2020. Here's a complete timeline of their relationship.",
+            "date": "Mar 24, 2023",
+            "imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSleZoXusQyJJe2WMgIuck_cVaJ8AE0_hU2QxsXzYvKANi55UQlv82yAVI&s",
+            "position": 7,
+        },
+        {
+            "title": "Olivia Wilde's anger at ex-boyfriend Harry Styles: She resents him and thinks he was using her | Marca",
+            "link": "https://www.marca.com/en/lifestyle/celebrities/2023/02/23/63f779a4e2704e8d988b4624.html",
+            "snippet": "The two started dating after Wilde split up with actor Jason Sudeikisin 2020. However, their relationship came to an end last November.",
+            "date": "Feb 23, 2023",
+            "imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQBgJF2mSnIWCvPrqUqM4WTI9xPNWPyLvHuune85swpB1yE_G8cy_7KRh0&s",
+            "position": 8,
+        },
+        {
+            "title": "Olivia Wilde's dating history: Who has the actress dated? | The US Sun",
+            "link": "https://www.the-sun.com/entertainment/5221040/olivia-wildes-dating-history/",
+            "snippet": "AMERICAN actress Olivia Wilde started dating Harry Styles in January 2021 after breaking off her engagement the year prior.",
+            "date": "Nov 19, 2022",
+            "imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTpm8BToVFHJoH6yRggg0fLocLT9mt6lwsnRxFFDNdDGhDydzQiSKZ9__g&s",
+            "position": 9,
+        },
+    ],
+    "relatedSearches": [
+        {"query": "Harry Styles girlfriends in order"},
+        {"query": "Harry Styles and Olivia Wilde engaged"},
+        {"query": "Harry Styles and Olivia Wilde wedding"},
+        {"query": "Who is Harry Styles married to"},
+        {"query": "Jason Sudeikis Olivia Wilde relationship"},
+        {"query": "Olivia Wilde and Jason Sudeikis kids"},
+        {"query": "Olivia Wilde children"},
+        {"query": "Harry Styles and Olivia Wilde age difference"},
+        {"query": "Jason Sudeikis Olivia Wilde, Harry Styles"},
+    ],
+}
+
+
+@pytest.fixture
+def mock_web_search():
+    with patch("haystack.nodes.search_engine.providers.requests") as mock_run:
+        mock_run.request.return_value = Mock(status_code=200, json=lambda: example_serperdev_response)
+        yield mock_run
--- a/test/nodes/test_retriever.py
+++ b/test/nodes/test_retriever.py
@ -1124,156 +1124,6 @@ def test_multimodal_text_image_retrieval(text_docs: List[Document], image_docs:
    assert text_results[0].content == "My name is Christelle and I live in Paris"


-@pytest.mark.unit
-def test_web_retriever_mode_raw_documents(monkeypatch):
-    expected_search_results = {
-        "documents": [
-            Document(
-                content="Eddard Stark",
-                score=0.9090909090909091,
-                meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091},
-                id_hash_keys=["content"],
-                id="f408db6de8de0ffad0cb47cf8830dbb8",
-            ),
-            Document(
-                content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...",
-                score=0.09090909090909091,
-                meta={
-                    "title": "Arya Stark's Father - Crossword Clue Answers",
-                    "link": "https://crossword-solver.io/clue/arya-stark%27s-father/",
-                    "position": 1,
-                    "score": 0.09090909090909091,
-                },
-                id_hash_keys=["content"],
-                id="51779277acf94cf90e7663db137c0732",
-            ),
-        ]
-    }
-
-    def mock_web_search_run(self, query: str) -> Tuple[Dict, str]:
-        return expected_search_results, "output_1"
-
-    class MockResponse:
-        def __init__(self, text, status_code):
-            self.text = text
-            self.status_code = status_code
-
-    def get(url, headers, timeout):
-        return MockResponse("mocked", 200)
-
-    def get_content(self, text: str) -> str:
-        return "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..."
-
-    monkeypatch.setattr(WebSearch, "run", mock_web_search_run)
-    monkeypatch.setattr(ArticleExtractor, "get_content", get_content)
-    monkeypatch.setattr(requests, "get", get)
-
-    web_retriever = WebRetriever(api_key="", top_search_results=2, mode="raw_documents")
-    result = web_retriever.retrieve(query="Who is the father of Arya Stark?")
-    assert len(result) == 1
-    assert isinstance(result[0], Document)
-    assert (
-        result[0].content
-        == "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..."
-    )
-    assert result[0].score == None
-    assert result[0].meta["url"] == "https://crossword-solver.io/clue/arya-stark%27s-father/"
-    # Only preprocessed docs but not raw docs should have the _split_id field
-    assert "_split_id" not in result[0].meta
-
-
-@pytest.mark.unit
-def test_web_retriever_mode_preprocessed_documents(monkeypatch):
-    expected_search_results = {
-        "documents": [
-            Document(
-                content="Eddard Stark",
-                score=0.9090909090909091,
-                meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091},
-                id_hash_keys=["content"],
-                id="f408db6de8de0ffad0cb47cf8830dbb8",
-            ),
-            Document(
-                content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...",
-                score=0.09090909090909091,
-                meta={
-                    "title": "Arya Stark's Father - Crossword Clue Answers",
-                    "link": "https://crossword-solver.io/clue/arya-stark%27s-father/",
-                    "position": 1,
-                    "score": 0.09090909090909091,
-                },
-                id_hash_keys=["content"],
-                id="51779277acf94cf90e7663db137c0732",
-            ),
-        ]
-    }
-
-    def mock_web_search_run(self, query: str) -> Tuple[Dict, str]:
-        return expected_search_results, "output_1"
-
-    class MockResponse:
-        def __init__(self, text, status_code):
-            self.text = text
-            self.status_code = status_code
-
-    def get(url, headers, timeout):
-        return MockResponse("mocked", 200)
-
-    def get_content(self, text: str) -> str:
-        return "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..."
-
-    monkeypatch.setattr(WebSearch, "run", mock_web_search_run)
-    monkeypatch.setattr(ArticleExtractor, "get_content", get_content)
-    monkeypatch.setattr(requests, "get", get)
-
-    web_retriever = WebRetriever(api_key="", top_search_results=2, mode="preprocessed_documents")
-    result = web_retriever.retrieve(query="Who is the father of Arya Stark?")
-    assert len(result) == 1
-    assert isinstance(result[0], Document)
-    assert (
-        result[0].content
-        == "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..."
-    )
-    assert result[0].score == None
-    assert result[0].meta["url"] == "https://crossword-solver.io/clue/arya-stark%27s-father/"
-    assert result[0].meta["_split_id"] == 0
-
-
-@pytest.mark.unit
-def test_web_retriever_mode_snippets(monkeypatch):
-    expected_search_results = {
-        "documents": [
-            Document(
-                content="Eddard Stark",
-                score=0.9090909090909091,
-                meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091},
-                id_hash_keys=["content"],
-                id="f408db6de8de0ffad0cb47cf8830dbb8",
-            ),
-            Document(
-                content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...",
-                score=0.09090909090909091,
-                meta={
-                    "title": "Arya Stark's Father - Crossword Clue Answers",
-                    "link": "https://crossword-solver.io/clue/arya-stark%27s-father/",
-                    "position": 1,
-                    "score": 0.09090909090909091,
-                },
-                id_hash_keys=["content"],
-                id="51779277acf94cf90e7663db137c0732",
-            ),
-        ]
-    }
-
-    def mock_web_search_run(self, query: str) -> Tuple[Dict, str]:
-        return expected_search_results, "output_1"
-
-    monkeypatch.setattr(WebSearch, "run", mock_web_search_run)
-    web_retriever = WebRetriever(api_key="", top_search_results=2)
-    result = web_retriever.retrieve(query="Who is the father of Arya Stark?")
-    assert result == expected_search_results["documents"]
-
-
@pytest.mark.unit
@patch("haystack.nodes.retriever._openai_encoder.openai_request")
 def test_openai_default_api_base(mock_request):
--- a/test/nodes/test_web_retriever.py
+++ b/test/nodes/test_web_retriever.py
@ -0,0 +1,198 @@
+import os
+from typing import Dict, Tuple
+
+
+import pytest
+import requests
+from boilerpy3.extractors import ArticleExtractor
+
+from haystack import Document, Pipeline
+from haystack.nodes import WebSearch, WebRetriever, PromptNode
+
+
+@pytest.mark.unit
+def test_web_retriever_mode_raw_documents(monkeypatch):
+    expected_search_results = {
+        "documents": [
+            Document(
+                content="Eddard Stark",
+                score=0.9090909090909091,
+                meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091},
+                id_hash_keys=["content"],
+                id="f408db6de8de0ffad0cb47cf8830dbb8",
+            ),
+            Document(
+                content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...",
+                score=0.09090909090909091,
+                meta={
+                    "title": "Arya Stark's Father - Crossword Clue Answers",
+                    "link": "https://crossword-solver.io/clue/arya-stark%27s-father/",
+                    "position": 1,
+                    "score": 0.09090909090909091,
+                },
+                id_hash_keys=["content"],
+                id="51779277acf94cf90e7663db137c0732",
+            ),
+        ]
+    }
+
+    def mock_web_search_run(self, query: str) -> Tuple[Dict, str]:
+        return expected_search_results, "output_1"
+
+    class MockResponse:
+        def __init__(self, text, status_code):
+            self.text = text
+            self.status_code = status_code
+
+    def get(url, headers, timeout):
+        return MockResponse("mocked", 200)
+
+    def get_content(self, text: str) -> str:
+        return "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..."
+
+    monkeypatch.setattr(WebSearch, "run", mock_web_search_run)
+    monkeypatch.setattr(ArticleExtractor, "get_content", get_content)
+    monkeypatch.setattr(requests, "get", get)
+
+    web_retriever = WebRetriever(api_key="", top_search_results=2, mode="raw_documents")
+    result = web_retriever.retrieve(query="Who is the father of Arya Stark?")
+    assert len(result) == 1
+    assert isinstance(result[0], Document)
+    assert (
+        result[0].content
+        == "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..."
+    )
+    assert result[0].score == None
+    assert result[0].meta["url"] == "https://crossword-solver.io/clue/arya-stark%27s-father/"
+    # Only preprocessed docs but not raw docs should have the _split_id field
+    assert "_split_id" not in result[0].meta
+
+
+@pytest.mark.unit
+def test_web_retriever_mode_preprocessed_documents(monkeypatch):
+    expected_search_results = {
+        "documents": [
+            Document(
+                content="Eddard Stark",
+                score=0.9090909090909091,
+                meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091},
+                id_hash_keys=["content"],
+                id="f408db6de8de0ffad0cb47cf8830dbb8",
+            ),
+            Document(
+                content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...",
+                score=0.09090909090909091,
+                meta={
+                    "title": "Arya Stark's Father - Crossword Clue Answers",
+                    "link": "https://crossword-solver.io/clue/arya-stark%27s-father/",
+                    "position": 1,
+                    "score": 0.09090909090909091,
+                },
+                id_hash_keys=["content"],
+                id="51779277acf94cf90e7663db137c0732",
+            ),
+        ]
+    }
+
+    def mock_web_search_run(self, query: str) -> Tuple[Dict, str]:
+        return expected_search_results, "output_1"
+
+    class MockResponse:
+        def __init__(self, text, status_code):
+            self.text = text
+            self.status_code = status_code
+
+    def get(url, headers, timeout):
+        return MockResponse("mocked", 200)
+
+    def get_content(self, text: str) -> str:
+        return "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..."
+
+    monkeypatch.setattr(WebSearch, "run", mock_web_search_run)
+    monkeypatch.setattr(ArticleExtractor, "get_content", get_content)
+    monkeypatch.setattr(requests, "get", get)
+
+    web_retriever = WebRetriever(api_key="", top_search_results=2, mode="preprocessed_documents")
+    result = web_retriever.retrieve(query="Who is the father of Arya Stark?")
+    assert len(result) == 1
+    assert isinstance(result[0], Document)
+    assert (
+        result[0].content
+        == "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..."
+    )
+    assert result[0].score == None
+    assert result[0].meta["url"] == "https://crossword-solver.io/clue/arya-stark%27s-father/"
+    assert result[0].meta["_split_id"] == 0
+
+
+@pytest.mark.unit
+def test_web_retriever_mode_snippets(monkeypatch):
+    expected_search_results = {
+        "documents": [
+            Document(
+                content="Eddard Stark",
+                score=0.9090909090909091,
+                meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091},
+                id_hash_keys=["content"],
+                id="f408db6de8de0ffad0cb47cf8830dbb8",
+            ),
+            Document(
+                content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...",
+                score=0.09090909090909091,
+                meta={
+                    "title": "Arya Stark's Father - Crossword Clue Answers",
+                    "link": "https://crossword-solver.io/clue/arya-stark%27s-father/",
+                    "position": 1,
+                    "score": 0.09090909090909091,
+                },
+                id_hash_keys=["content"],
+                id="51779277acf94cf90e7663db137c0732",
+            ),
+        ]
+    }
+
+    def mock_web_search_run(self, query: str) -> Tuple[Dict, str]:
+        return expected_search_results, "output_1"
+
+    monkeypatch.setattr(WebSearch, "run", mock_web_search_run)
+    web_retriever = WebRetriever(api_key="", top_search_results=2)
+    result = web_retriever.retrieve(query="Who is the father of Arya Stark?")
+    assert result == expected_search_results["documents"]
+
+
+@pytest.mark.unit
+@pytest.mark.parametrize("top_k", [1, 3, 6])
+def test_top_k_parameter(mock_web_search, top_k):
+    web_retriever = WebRetriever(api_key="some_invalid_key", mode="snippets")
+    result = web_retriever.retrieve(query="Who is the boyfriend of Olivia Wilde?", top_k=top_k)
+    assert len(result) == top_k
+    assert all(isinstance(doc, Document) for doc in result)
+
+
+@pytest.mark.integration
+@pytest.mark.skipif(
+    not os.environ.get("SERPERDEV_API_KEY", None),
+    reason="Please export an env var called SERPERDEV_API_KEY containing the serper.dev API key to run this test.",
+)
+@pytest.mark.skipif(
+    not os.environ.get("OPENAI_API_KEY", None),
+    reason="Please export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
+)
+@pytest.mark.parametrize("top_k", [2, 4])
+def test_top_k_parameter_in_pipeline(top_k):
+    # test that WebRetriever top_k param is NOT ignored in a pipeline
+    prompt_node = PromptNode(
+        "gpt-3.5-turbo",
+        api_key=os.environ.get("OPENAI_API_KEY"),
+        max_length=256,
+        default_prompt_template="question-answering-with-document-scores",
+    )
+
+    retriever = WebRetriever(api_key=os.environ.get("SERPERDEV_API_KEY"))
+
+    pipe = Pipeline()
+
+    pipe.add_node(component=retriever, name="WebRetriever", inputs=["Query"])
+    pipe.add_node(component=prompt_node, name="QAwithScoresPrompt", inputs=["WebRetriever"])
+    result = pipe.run(query="What year was Obama president", params={"WebRetriever": {"top_k": top_k}})
+    assert len(result["results"]) == top_k
--- a/test/nodes/test_web_search.py
+++ b/test/nodes/test_web_search.py
@ -1,5 +1,4 @@
 import os
-import unittest
 from unittest.mock import MagicMock, patch

 import pytest
@ -99,9 +98,19 @@ def test_web_search_with_google_api_client():
            search_engine_provider="GoogleAPI",
            search_engine_kwargs={"engine_id": SEARCH_ENGINE_ID},
        )
-        result, _ = ws.run(query=query)
+        _, _ = ws.run(query=query)

        mock_build.assert_called_once_with("customsearch", "v1", developerKey=GOOGLE_API_KEY)
        mock_service.cse.assert_called_once()
        mock_cse.list.assert_called_once_with(q=query, cx=SEARCH_ENGINE_ID, num=10)
        mock_list.execute.assert_called_once()
+
+
+@pytest.mark.unit
+@pytest.mark.parametrize("top_k", [1, 3, 6])
+def test_web_search_top_k(mock_web_search, top_k):
+    ws = WebSearch(api_key="some_invalid_key")
+    result, _ = ws.run(query="Who is the boyfriend of Olivia Wilde?", top_k=top_k)
+    assert "documents" in result
+    assert len(result["documents"]) == top_k
+    assert all(isinstance(doc, Document) for doc in result["documents"])