mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-31 11:56:35 +00:00
fix: WebRetriever top_k is ignored in a pipeline (#5106)
* Initial changes * Add WebSearch, WebRetriever top_k unit tests * Add exact integration test that failed Tuana * PR review
This commit is contained in:
parent
d8a4f20379
commit
0cc9ce7522
@ -198,7 +198,7 @@ class WebRetriever(BaseRetriever):
|
||||
search_results, _ = self.web_search.run(query=query)
|
||||
search_results = search_results["documents"]
|
||||
if self.mode == "snippets":
|
||||
return search_results # type: ignore
|
||||
return search_results[:top_k] # type: ignore
|
||||
|
||||
links: List[SearchResult] = [
|
||||
SearchResult(r.meta["link"], r.meta.get("score", None), r.meta.get("position", None))
|
||||
|
@ -63,17 +63,25 @@ class WebSearch(BaseComponent):
|
||||
labels: Optional[MultiLabel] = None,
|
||||
documents: Optional[List[Document]] = None,
|
||||
meta: Optional[dict] = None,
|
||||
top_k: Optional[int] = None,
|
||||
) -> Tuple[Dict, str]:
|
||||
"""
|
||||
Search the search engine for the given query and return the results. Only the query parameter is used.
|
||||
Search the search engine for the given query and return the results. Only the query parameter and the top_k
|
||||
parameter are used.
|
||||
:param query: The query to search for.
|
||||
:param file_paths: Not used.
|
||||
:param labels: Not used.
|
||||
:param documents: Not used.
|
||||
:param meta: Not used.
|
||||
:param top_k: return only the top_k results. If None, the top_k value passed to the constructor is used.
|
||||
|
||||
|
||||
:return: List of search results as documents.
|
||||
"""
|
||||
# query is a required parameter for search, we need to keep the signature of run() the same as in other nodes
|
||||
if not query:
|
||||
raise ValueError("WebSearch run requires the `query` parameter")
|
||||
return {"documents": self.search_engine.search(query)}, "output_1"
|
||||
return {"documents": self.search_engine.search(query, top_k=top_k)}, "output_1"
|
||||
|
||||
def run_batch(
|
||||
self,
|
||||
|
@ -1,4 +1,5 @@
|
||||
from typing import List
|
||||
from unittest.mock import patch, Mock
|
||||
from uuid import UUID
|
||||
|
||||
from numpy import loadtxt
|
||||
@ -91,3 +92,102 @@ def indexing_document_classifier():
|
||||
batch_size=16,
|
||||
classification_field="class_field",
|
||||
)
|
||||
|
||||
|
||||
example_serperdev_response = {
|
||||
"searchParameters": {
|
||||
"q": "Who is the boyfriend of Olivia Wilde?",
|
||||
"gl": "us",
|
||||
"hl": "en",
|
||||
"autocorrect": True,
|
||||
"type": "search",
|
||||
},
|
||||
"organic": [
|
||||
{
|
||||
"title": "Olivia Wilde embraces Jason Sudeikis amid custody battle, Harry Styles split - Page Six",
|
||||
"link": "https://pagesix.com/2023/01/29/olivia-wilde-hugs-it-out-with-jason-sudeikis-after-harry-styles-split/",
|
||||
"snippet": "Looks like Olivia Wilde and Jason Sudeikis are starting 2023 on good terms. Amid their highly publicized custody battle – and the actress' ...",
|
||||
"date": "Jan 29, 2023",
|
||||
"position": 1,
|
||||
},
|
||||
{
|
||||
"title": "Olivia Wilde Is 'Quietly Dating' Again Following Harry Styles Split: 'He Makes Her Happy'",
|
||||
"link": "https://www.yahoo.com/now/olivia-wilde-quietly-dating-again-183844364.html",
|
||||
"snippet": "Olivia Wilde is “quietly dating again” following her November 2022 split from Harry Styles, a source exclusively tells Life & Style.",
|
||||
"date": "Feb 10, 2023",
|
||||
"position": 2,
|
||||
},
|
||||
{
|
||||
"title": "Olivia Wilde and Harry Styles' Relationship Timeline: The Way They Were - Us Weekly",
|
||||
"link": "https://www.usmagazine.com/celebrity-news/pictures/olivia-wilde-and-harry-styles-relationship-timeline/",
|
||||
"snippet": "Olivia Wilde started dating Harry Styles after ending her years-long engagement to Jason Sudeikis — see their relationship timeline.",
|
||||
"date": "Mar 10, 2023",
|
||||
"imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSgTcalNFvptTbYBiDXX55s8yCGfn6F1qbed9DAN16LvynTr9GayK5SPmY&s",
|
||||
"position": 3,
|
||||
},
|
||||
{
|
||||
"title": "Olivia Wilde Is 'Ready to Date Again' After Harry Styles Split - Us Weekly",
|
||||
"link": "https://www.usmagazine.com/celebrity-news/news/olivia-wilde-is-ready-to-date-again-after-harry-styles-split/",
|
||||
"snippet": "Ready for love! Olivia Wilde is officially back on the dating scene following her split from her ex-boyfriend, Harry Styles.",
|
||||
"date": "Mar 1, 2023",
|
||||
"imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRCRAeRy5sVE631ZctzbzuOF70xkIOHaTvh2K7dYvdiVBwALiKrIjpscok&s",
|
||||
"position": 4,
|
||||
},
|
||||
{
|
||||
"title": "Harry Styles and Olivia Wilde's Definitive Relationship Timeline - Harper's Bazaar",
|
||||
"link": "https://www.harpersbazaar.com/celebrity/latest/a35172115/harry-styles-olivia-wilde-relationship-timeline/",
|
||||
"snippet": "November 2020: News breaks about Olivia splitting from fiancé Jason Sudeikis. ... In mid-November, news breaks of Olivia Wilde's split from Jason ...",
|
||||
"date": "Feb 23, 2023",
|
||||
"imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRRqw3fvZOIGHEepxCc7yFAWYsS_v_1H6X-4nxyFJxdfRuFQw_BrI6JVzI&s",
|
||||
"position": 5,
|
||||
},
|
||||
{
|
||||
"title": "Harry Styles and Olivia Wilde's Relationship Timeline - People",
|
||||
"link": "https://people.com/music/harry-styles-olivia-wilde-relationship-timeline/",
|
||||
"snippet": "Harry Styles and Olivia Wilde first met on the set of Don't Worry Darling and stepped out as a couple in January 2021. Relive all their biggest relationship ...",
|
||||
"position": 6,
|
||||
},
|
||||
{
|
||||
"title": "Jason Sudeikis and Olivia Wilde's Relationship Timeline - People",
|
||||
"link": "https://people.com/movies/jason-sudeikis-olivia-wilde-relationship-timeline/",
|
||||
"snippet": "Jason Sudeikis and Olivia Wilde ended their engagement of seven years in 2020. Here's a complete timeline of their relationship.",
|
||||
"date": "Mar 24, 2023",
|
||||
"imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSleZoXusQyJJe2WMgIuck_cVaJ8AE0_hU2QxsXzYvKANi55UQlv82yAVI&s",
|
||||
"position": 7,
|
||||
},
|
||||
{
|
||||
"title": "Olivia Wilde's anger at ex-boyfriend Harry Styles: She resents him and thinks he was using her | Marca",
|
||||
"link": "https://www.marca.com/en/lifestyle/celebrities/2023/02/23/63f779a4e2704e8d988b4624.html",
|
||||
"snippet": "The two started dating after Wilde split up with actor Jason Sudeikisin 2020. However, their relationship came to an end last November.",
|
||||
"date": "Feb 23, 2023",
|
||||
"imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQBgJF2mSnIWCvPrqUqM4WTI9xPNWPyLvHuune85swpB1yE_G8cy_7KRh0&s",
|
||||
"position": 8,
|
||||
},
|
||||
{
|
||||
"title": "Olivia Wilde's dating history: Who has the actress dated? | The US Sun",
|
||||
"link": "https://www.the-sun.com/entertainment/5221040/olivia-wildes-dating-history/",
|
||||
"snippet": "AMERICAN actress Olivia Wilde started dating Harry Styles in January 2021 after breaking off her engagement the year prior.",
|
||||
"date": "Nov 19, 2022",
|
||||
"imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTpm8BToVFHJoH6yRggg0fLocLT9mt6lwsnRxFFDNdDGhDydzQiSKZ9__g&s",
|
||||
"position": 9,
|
||||
},
|
||||
],
|
||||
"relatedSearches": [
|
||||
{"query": "Harry Styles girlfriends in order"},
|
||||
{"query": "Harry Styles and Olivia Wilde engaged"},
|
||||
{"query": "Harry Styles and Olivia Wilde wedding"},
|
||||
{"query": "Who is Harry Styles married to"},
|
||||
{"query": "Jason Sudeikis Olivia Wilde relationship"},
|
||||
{"query": "Olivia Wilde and Jason Sudeikis kids"},
|
||||
{"query": "Olivia Wilde children"},
|
||||
{"query": "Harry Styles and Olivia Wilde age difference"},
|
||||
{"query": "Jason Sudeikis Olivia Wilde, Harry Styles"},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_web_search():
|
||||
with patch("haystack.nodes.search_engine.providers.requests") as mock_run:
|
||||
mock_run.request.return_value = Mock(status_code=200, json=lambda: example_serperdev_response)
|
||||
yield mock_run
|
||||
|
@ -1124,156 +1124,6 @@ def test_multimodal_text_image_retrieval(text_docs: List[Document], image_docs:
|
||||
assert text_results[0].content == "My name is Christelle and I live in Paris"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_web_retriever_mode_raw_documents(monkeypatch):
|
||||
expected_search_results = {
|
||||
"documents": [
|
||||
Document(
|
||||
content="Eddard Stark",
|
||||
score=0.9090909090909091,
|
||||
meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091},
|
||||
id_hash_keys=["content"],
|
||||
id="f408db6de8de0ffad0cb47cf8830dbb8",
|
||||
),
|
||||
Document(
|
||||
content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...",
|
||||
score=0.09090909090909091,
|
||||
meta={
|
||||
"title": "Arya Stark's Father - Crossword Clue Answers",
|
||||
"link": "https://crossword-solver.io/clue/arya-stark%27s-father/",
|
||||
"position": 1,
|
||||
"score": 0.09090909090909091,
|
||||
},
|
||||
id_hash_keys=["content"],
|
||||
id="51779277acf94cf90e7663db137c0732",
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
def mock_web_search_run(self, query: str) -> Tuple[Dict, str]:
|
||||
return expected_search_results, "output_1"
|
||||
|
||||
class MockResponse:
|
||||
def __init__(self, text, status_code):
|
||||
self.text = text
|
||||
self.status_code = status_code
|
||||
|
||||
def get(url, headers, timeout):
|
||||
return MockResponse("mocked", 200)
|
||||
|
||||
def get_content(self, text: str) -> str:
|
||||
return "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..."
|
||||
|
||||
monkeypatch.setattr(WebSearch, "run", mock_web_search_run)
|
||||
monkeypatch.setattr(ArticleExtractor, "get_content", get_content)
|
||||
monkeypatch.setattr(requests, "get", get)
|
||||
|
||||
web_retriever = WebRetriever(api_key="", top_search_results=2, mode="raw_documents")
|
||||
result = web_retriever.retrieve(query="Who is the father of Arya Stark?")
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], Document)
|
||||
assert (
|
||||
result[0].content
|
||||
== "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..."
|
||||
)
|
||||
assert result[0].score == None
|
||||
assert result[0].meta["url"] == "https://crossword-solver.io/clue/arya-stark%27s-father/"
|
||||
# Only preprocessed docs but not raw docs should have the _split_id field
|
||||
assert "_split_id" not in result[0].meta
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_web_retriever_mode_preprocessed_documents(monkeypatch):
|
||||
expected_search_results = {
|
||||
"documents": [
|
||||
Document(
|
||||
content="Eddard Stark",
|
||||
score=0.9090909090909091,
|
||||
meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091},
|
||||
id_hash_keys=["content"],
|
||||
id="f408db6de8de0ffad0cb47cf8830dbb8",
|
||||
),
|
||||
Document(
|
||||
content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...",
|
||||
score=0.09090909090909091,
|
||||
meta={
|
||||
"title": "Arya Stark's Father - Crossword Clue Answers",
|
||||
"link": "https://crossword-solver.io/clue/arya-stark%27s-father/",
|
||||
"position": 1,
|
||||
"score": 0.09090909090909091,
|
||||
},
|
||||
id_hash_keys=["content"],
|
||||
id="51779277acf94cf90e7663db137c0732",
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
def mock_web_search_run(self, query: str) -> Tuple[Dict, str]:
|
||||
return expected_search_results, "output_1"
|
||||
|
||||
class MockResponse:
|
||||
def __init__(self, text, status_code):
|
||||
self.text = text
|
||||
self.status_code = status_code
|
||||
|
||||
def get(url, headers, timeout):
|
||||
return MockResponse("mocked", 200)
|
||||
|
||||
def get_content(self, text: str) -> str:
|
||||
return "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..."
|
||||
|
||||
monkeypatch.setattr(WebSearch, "run", mock_web_search_run)
|
||||
monkeypatch.setattr(ArticleExtractor, "get_content", get_content)
|
||||
monkeypatch.setattr(requests, "get", get)
|
||||
|
||||
web_retriever = WebRetriever(api_key="", top_search_results=2, mode="preprocessed_documents")
|
||||
result = web_retriever.retrieve(query="Who is the father of Arya Stark?")
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], Document)
|
||||
assert (
|
||||
result[0].content
|
||||
== "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..."
|
||||
)
|
||||
assert result[0].score == None
|
||||
assert result[0].meta["url"] == "https://crossword-solver.io/clue/arya-stark%27s-father/"
|
||||
assert result[0].meta["_split_id"] == 0
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_web_retriever_mode_snippets(monkeypatch):
|
||||
expected_search_results = {
|
||||
"documents": [
|
||||
Document(
|
||||
content="Eddard Stark",
|
||||
score=0.9090909090909091,
|
||||
meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091},
|
||||
id_hash_keys=["content"],
|
||||
id="f408db6de8de0ffad0cb47cf8830dbb8",
|
||||
),
|
||||
Document(
|
||||
content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...",
|
||||
score=0.09090909090909091,
|
||||
meta={
|
||||
"title": "Arya Stark's Father - Crossword Clue Answers",
|
||||
"link": "https://crossword-solver.io/clue/arya-stark%27s-father/",
|
||||
"position": 1,
|
||||
"score": 0.09090909090909091,
|
||||
},
|
||||
id_hash_keys=["content"],
|
||||
id="51779277acf94cf90e7663db137c0732",
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
def mock_web_search_run(self, query: str) -> Tuple[Dict, str]:
|
||||
return expected_search_results, "output_1"
|
||||
|
||||
monkeypatch.setattr(WebSearch, "run", mock_web_search_run)
|
||||
web_retriever = WebRetriever(api_key="", top_search_results=2)
|
||||
result = web_retriever.retrieve(query="Who is the father of Arya Stark?")
|
||||
assert result == expected_search_results["documents"]
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@patch("haystack.nodes.retriever._openai_encoder.openai_request")
|
||||
def test_openai_default_api_base(mock_request):
|
||||
|
198
test/nodes/test_web_retriever.py
Normal file
198
test/nodes/test_web_retriever.py
Normal file
@ -0,0 +1,198 @@
|
||||
import os
|
||||
from typing import Dict, Tuple
|
||||
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from boilerpy3.extractors import ArticleExtractor
|
||||
|
||||
from haystack import Document, Pipeline
|
||||
from haystack.nodes import WebSearch, WebRetriever, PromptNode
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_web_retriever_mode_raw_documents(monkeypatch):
|
||||
expected_search_results = {
|
||||
"documents": [
|
||||
Document(
|
||||
content="Eddard Stark",
|
||||
score=0.9090909090909091,
|
||||
meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091},
|
||||
id_hash_keys=["content"],
|
||||
id="f408db6de8de0ffad0cb47cf8830dbb8",
|
||||
),
|
||||
Document(
|
||||
content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...",
|
||||
score=0.09090909090909091,
|
||||
meta={
|
||||
"title": "Arya Stark's Father - Crossword Clue Answers",
|
||||
"link": "https://crossword-solver.io/clue/arya-stark%27s-father/",
|
||||
"position": 1,
|
||||
"score": 0.09090909090909091,
|
||||
},
|
||||
id_hash_keys=["content"],
|
||||
id="51779277acf94cf90e7663db137c0732",
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
def mock_web_search_run(self, query: str) -> Tuple[Dict, str]:
|
||||
return expected_search_results, "output_1"
|
||||
|
||||
class MockResponse:
|
||||
def __init__(self, text, status_code):
|
||||
self.text = text
|
||||
self.status_code = status_code
|
||||
|
||||
def get(url, headers, timeout):
|
||||
return MockResponse("mocked", 200)
|
||||
|
||||
def get_content(self, text: str) -> str:
|
||||
return "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..."
|
||||
|
||||
monkeypatch.setattr(WebSearch, "run", mock_web_search_run)
|
||||
monkeypatch.setattr(ArticleExtractor, "get_content", get_content)
|
||||
monkeypatch.setattr(requests, "get", get)
|
||||
|
||||
web_retriever = WebRetriever(api_key="", top_search_results=2, mode="raw_documents")
|
||||
result = web_retriever.retrieve(query="Who is the father of Arya Stark?")
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], Document)
|
||||
assert (
|
||||
result[0].content
|
||||
== "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..."
|
||||
)
|
||||
assert result[0].score == None
|
||||
assert result[0].meta["url"] == "https://crossword-solver.io/clue/arya-stark%27s-father/"
|
||||
# Only preprocessed docs but not raw docs should have the _split_id field
|
||||
assert "_split_id" not in result[0].meta
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_web_retriever_mode_preprocessed_documents(monkeypatch):
|
||||
expected_search_results = {
|
||||
"documents": [
|
||||
Document(
|
||||
content="Eddard Stark",
|
||||
score=0.9090909090909091,
|
||||
meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091},
|
||||
id_hash_keys=["content"],
|
||||
id="f408db6de8de0ffad0cb47cf8830dbb8",
|
||||
),
|
||||
Document(
|
||||
content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...",
|
||||
score=0.09090909090909091,
|
||||
meta={
|
||||
"title": "Arya Stark's Father - Crossword Clue Answers",
|
||||
"link": "https://crossword-solver.io/clue/arya-stark%27s-father/",
|
||||
"position": 1,
|
||||
"score": 0.09090909090909091,
|
||||
},
|
||||
id_hash_keys=["content"],
|
||||
id="51779277acf94cf90e7663db137c0732",
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
def mock_web_search_run(self, query: str) -> Tuple[Dict, str]:
|
||||
return expected_search_results, "output_1"
|
||||
|
||||
class MockResponse:
|
||||
def __init__(self, text, status_code):
|
||||
self.text = text
|
||||
self.status_code = status_code
|
||||
|
||||
def get(url, headers, timeout):
|
||||
return MockResponse("mocked", 200)
|
||||
|
||||
def get_content(self, text: str) -> str:
|
||||
return "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..."
|
||||
|
||||
monkeypatch.setattr(WebSearch, "run", mock_web_search_run)
|
||||
monkeypatch.setattr(ArticleExtractor, "get_content", get_content)
|
||||
monkeypatch.setattr(requests, "get", get)
|
||||
|
||||
web_retriever = WebRetriever(api_key="", top_search_results=2, mode="preprocessed_documents")
|
||||
result = web_retriever.retrieve(query="Who is the father of Arya Stark?")
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], Document)
|
||||
assert (
|
||||
result[0].content
|
||||
== "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..."
|
||||
)
|
||||
assert result[0].score == None
|
||||
assert result[0].meta["url"] == "https://crossword-solver.io/clue/arya-stark%27s-father/"
|
||||
assert result[0].meta["_split_id"] == 0
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_web_retriever_mode_snippets(monkeypatch):
|
||||
expected_search_results = {
|
||||
"documents": [
|
||||
Document(
|
||||
content="Eddard Stark",
|
||||
score=0.9090909090909091,
|
||||
meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091},
|
||||
id_hash_keys=["content"],
|
||||
id="f408db6de8de0ffad0cb47cf8830dbb8",
|
||||
),
|
||||
Document(
|
||||
content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...",
|
||||
score=0.09090909090909091,
|
||||
meta={
|
||||
"title": "Arya Stark's Father - Crossword Clue Answers",
|
||||
"link": "https://crossword-solver.io/clue/arya-stark%27s-father/",
|
||||
"position": 1,
|
||||
"score": 0.09090909090909091,
|
||||
},
|
||||
id_hash_keys=["content"],
|
||||
id="51779277acf94cf90e7663db137c0732",
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
def mock_web_search_run(self, query: str) -> Tuple[Dict, str]:
|
||||
return expected_search_results, "output_1"
|
||||
|
||||
monkeypatch.setattr(WebSearch, "run", mock_web_search_run)
|
||||
web_retriever = WebRetriever(api_key="", top_search_results=2)
|
||||
result = web_retriever.retrieve(query="Who is the father of Arya Stark?")
|
||||
assert result == expected_search_results["documents"]
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.parametrize("top_k", [1, 3, 6])
|
||||
def test_top_k_parameter(mock_web_search, top_k):
|
||||
web_retriever = WebRetriever(api_key="some_invalid_key", mode="snippets")
|
||||
result = web_retriever.retrieve(query="Who is the boyfriend of Olivia Wilde?", top_k=top_k)
|
||||
assert len(result) == top_k
|
||||
assert all(isinstance(doc, Document) for doc in result)
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.skipif(
|
||||
not os.environ.get("SERPERDEV_API_KEY", None),
|
||||
reason="Please export an env var called SERPERDEV_API_KEY containing the serper.dev API key to run this test.",
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
not os.environ.get("OPENAI_API_KEY", None),
|
||||
reason="Please export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
|
||||
)
|
||||
@pytest.mark.parametrize("top_k", [2, 4])
|
||||
def test_top_k_parameter_in_pipeline(top_k):
|
||||
# test that WebRetriever top_k param is NOT ignored in a pipeline
|
||||
prompt_node = PromptNode(
|
||||
"gpt-3.5-turbo",
|
||||
api_key=os.environ.get("OPENAI_API_KEY"),
|
||||
max_length=256,
|
||||
default_prompt_template="question-answering-with-document-scores",
|
||||
)
|
||||
|
||||
retriever = WebRetriever(api_key=os.environ.get("SERPERDEV_API_KEY"))
|
||||
|
||||
pipe = Pipeline()
|
||||
|
||||
pipe.add_node(component=retriever, name="WebRetriever", inputs=["Query"])
|
||||
pipe.add_node(component=prompt_node, name="QAwithScoresPrompt", inputs=["WebRetriever"])
|
||||
result = pipe.run(query="What year was Obama president", params={"WebRetriever": {"top_k": top_k}})
|
||||
assert len(result["results"]) == top_k
|
@ -1,5 +1,4 @@
|
||||
import os
|
||||
import unittest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
@ -99,9 +98,19 @@ def test_web_search_with_google_api_client():
|
||||
search_engine_provider="GoogleAPI",
|
||||
search_engine_kwargs={"engine_id": SEARCH_ENGINE_ID},
|
||||
)
|
||||
result, _ = ws.run(query=query)
|
||||
_, _ = ws.run(query=query)
|
||||
|
||||
mock_build.assert_called_once_with("customsearch", "v1", developerKey=GOOGLE_API_KEY)
|
||||
mock_service.cse.assert_called_once()
|
||||
mock_cse.list.assert_called_once_with(q=query, cx=SEARCH_ENGINE_ID, num=10)
|
||||
mock_list.execute.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.parametrize("top_k", [1, 3, 6])
|
||||
def test_web_search_top_k(mock_web_search, top_k):
|
||||
ws = WebSearch(api_key="some_invalid_key")
|
||||
result, _ = ws.run(query="Who is the boyfriend of Olivia Wilde?", top_k=top_k)
|
||||
assert "documents" in result
|
||||
assert len(result["documents"]) == top_k
|
||||
assert all(isinstance(doc, Document) for doc in result["documents"])
|
||||
|
Loading…
x
Reference in New Issue
Block a user