mirror of
				https://github.com/deepset-ai/haystack.git
				synced 2025-10-25 06:48:43 +00:00 
			
		
		
		
	fix: WebRetriever top_k is ignored in a pipeline (#5106)
* Initial changes * Add WebSearch, WebRetriever top_k unit tests * Add exact integration test that failed Tuana * PR review
This commit is contained in:
		
							parent
							
								
									d8a4f20379
								
							
						
					
					
						commit
						0cc9ce7522
					
				| @ -198,7 +198,7 @@ class WebRetriever(BaseRetriever): | ||||
|             search_results, _ = self.web_search.run(query=query) | ||||
|             search_results = search_results["documents"] | ||||
|             if self.mode == "snippets": | ||||
|                 return search_results  # type: ignore | ||||
|                 return search_results[:top_k]  # type: ignore | ||||
| 
 | ||||
|             links: List[SearchResult] = [ | ||||
|                 SearchResult(r.meta["link"], r.meta.get("score", None), r.meta.get("position", None)) | ||||
|  | ||||
| @ -63,17 +63,25 @@ class WebSearch(BaseComponent): | ||||
|         labels: Optional[MultiLabel] = None, | ||||
|         documents: Optional[List[Document]] = None, | ||||
|         meta: Optional[dict] = None, | ||||
|         top_k: Optional[int] = None, | ||||
|     ) -> Tuple[Dict, str]: | ||||
|         """ | ||||
|         Search the search engine for the given query and return the results. Only the query parameter is used. | ||||
|         Search the search engine for the given query and return the results. Only the query parameter and the top_k | ||||
|         parameter are used. | ||||
|         :param query: The query to search for. | ||||
|         :param file_paths: Not used. | ||||
|         :param labels: Not used. | ||||
|         :param documents: Not used. | ||||
|         :param meta: Not used. | ||||
|         :param top_k: return only the top_k results. If None, the top_k value passed to the constructor is used. | ||||
| 
 | ||||
| 
 | ||||
|         :return: List of search results as documents. | ||||
|         """ | ||||
|         # query is a required parameter for search, we need to keep the signature of run() the same as in other nodes | ||||
|         if not query: | ||||
|             raise ValueError("WebSearch run requires the `query` parameter") | ||||
|         return {"documents": self.search_engine.search(query)}, "output_1" | ||||
|         return {"documents": self.search_engine.search(query, top_k=top_k)}, "output_1" | ||||
| 
 | ||||
|     def run_batch( | ||||
|         self, | ||||
|  | ||||
| @ -1,4 +1,5 @@ | ||||
| from typing import List | ||||
| from unittest.mock import patch, Mock | ||||
| from uuid import UUID | ||||
| 
 | ||||
| from numpy import loadtxt | ||||
| @ -91,3 +92,102 @@ def indexing_document_classifier(): | ||||
|         batch_size=16, | ||||
|         classification_field="class_field", | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| example_serperdev_response = { | ||||
|     "searchParameters": { | ||||
|         "q": "Who is the boyfriend of Olivia Wilde?", | ||||
|         "gl": "us", | ||||
|         "hl": "en", | ||||
|         "autocorrect": True, | ||||
|         "type": "search", | ||||
|     }, | ||||
|     "organic": [ | ||||
|         { | ||||
|             "title": "Olivia Wilde embraces Jason Sudeikis amid custody battle, Harry Styles split - Page Six", | ||||
|             "link": "https://pagesix.com/2023/01/29/olivia-wilde-hugs-it-out-with-jason-sudeikis-after-harry-styles-split/", | ||||
|             "snippet": "Looks like Olivia Wilde and Jason Sudeikis are starting 2023 on good terms. Amid their highly publicized custody battle – and the actress' ...", | ||||
|             "date": "Jan 29, 2023", | ||||
|             "position": 1, | ||||
|         }, | ||||
|         { | ||||
|             "title": "Olivia Wilde Is 'Quietly Dating' Again Following Harry Styles Split: 'He Makes Her Happy'", | ||||
|             "link": "https://www.yahoo.com/now/olivia-wilde-quietly-dating-again-183844364.html", | ||||
|             "snippet": "Olivia Wilde is “quietly dating again” following her November 2022 split from Harry Styles, a source exclusively tells Life & Style.", | ||||
|             "date": "Feb 10, 2023", | ||||
|             "position": 2, | ||||
|         }, | ||||
|         { | ||||
|             "title": "Olivia Wilde and Harry Styles' Relationship Timeline: The Way They Were - Us Weekly", | ||||
|             "link": "https://www.usmagazine.com/celebrity-news/pictures/olivia-wilde-and-harry-styles-relationship-timeline/", | ||||
|             "snippet": "Olivia Wilde started dating Harry Styles after ending her years-long engagement to Jason Sudeikis — see their relationship timeline.", | ||||
|             "date": "Mar 10, 2023", | ||||
|             "imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSgTcalNFvptTbYBiDXX55s8yCGfn6F1qbed9DAN16LvynTr9GayK5SPmY&s", | ||||
|             "position": 3, | ||||
|         }, | ||||
|         { | ||||
|             "title": "Olivia Wilde Is 'Ready to Date Again' After Harry Styles Split - Us Weekly", | ||||
|             "link": "https://www.usmagazine.com/celebrity-news/news/olivia-wilde-is-ready-to-date-again-after-harry-styles-split/", | ||||
|             "snippet": "Ready for love! Olivia Wilde is officially back on the dating scene following her split from her ex-boyfriend, Harry Styles.", | ||||
|             "date": "Mar 1, 2023", | ||||
|             "imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRCRAeRy5sVE631ZctzbzuOF70xkIOHaTvh2K7dYvdiVBwALiKrIjpscok&s", | ||||
|             "position": 4, | ||||
|         }, | ||||
|         { | ||||
|             "title": "Harry Styles and Olivia Wilde's Definitive Relationship Timeline - Harper's Bazaar", | ||||
|             "link": "https://www.harpersbazaar.com/celebrity/latest/a35172115/harry-styles-olivia-wilde-relationship-timeline/", | ||||
|             "snippet": "November 2020: News breaks about Olivia splitting from fiancé Jason Sudeikis. ... In mid-November, news breaks of Olivia Wilde's split from Jason ...", | ||||
|             "date": "Feb 23, 2023", | ||||
|             "imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRRqw3fvZOIGHEepxCc7yFAWYsS_v_1H6X-4nxyFJxdfRuFQw_BrI6JVzI&s", | ||||
|             "position": 5, | ||||
|         }, | ||||
|         { | ||||
|             "title": "Harry Styles and Olivia Wilde's Relationship Timeline - People", | ||||
|             "link": "https://people.com/music/harry-styles-olivia-wilde-relationship-timeline/", | ||||
|             "snippet": "Harry Styles and Olivia Wilde first met on the set of Don't Worry Darling and stepped out as a couple in January 2021. Relive all their biggest relationship ...", | ||||
|             "position": 6, | ||||
|         }, | ||||
|         { | ||||
|             "title": "Jason Sudeikis and Olivia Wilde's Relationship Timeline - People", | ||||
|             "link": "https://people.com/movies/jason-sudeikis-olivia-wilde-relationship-timeline/", | ||||
|             "snippet": "Jason Sudeikis and Olivia Wilde ended their engagement of seven years in 2020. Here's a complete timeline of their relationship.", | ||||
|             "date": "Mar 24, 2023", | ||||
|             "imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSleZoXusQyJJe2WMgIuck_cVaJ8AE0_hU2QxsXzYvKANi55UQlv82yAVI&s", | ||||
|             "position": 7, | ||||
|         }, | ||||
|         { | ||||
|             "title": "Olivia Wilde's anger at ex-boyfriend Harry Styles: She resents him and thinks he was using her | Marca", | ||||
|             "link": "https://www.marca.com/en/lifestyle/celebrities/2023/02/23/63f779a4e2704e8d988b4624.html", | ||||
|             "snippet": "The two started dating after Wilde split up with actor Jason Sudeikisin 2020. However, their relationship came to an end last November.", | ||||
|             "date": "Feb 23, 2023", | ||||
|             "imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQBgJF2mSnIWCvPrqUqM4WTI9xPNWPyLvHuune85swpB1yE_G8cy_7KRh0&s", | ||||
|             "position": 8, | ||||
|         }, | ||||
|         { | ||||
|             "title": "Olivia Wilde's dating history: Who has the actress dated? | The US Sun", | ||||
|             "link": "https://www.the-sun.com/entertainment/5221040/olivia-wildes-dating-history/", | ||||
|             "snippet": "AMERICAN actress Olivia Wilde started dating Harry Styles in January 2021 after breaking off her engagement the year prior.", | ||||
|             "date": "Nov 19, 2022", | ||||
|             "imageUrl": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTpm8BToVFHJoH6yRggg0fLocLT9mt6lwsnRxFFDNdDGhDydzQiSKZ9__g&s", | ||||
|             "position": 9, | ||||
|         }, | ||||
|     ], | ||||
|     "relatedSearches": [ | ||||
|         {"query": "Harry Styles girlfriends in order"}, | ||||
|         {"query": "Harry Styles and Olivia Wilde engaged"}, | ||||
|         {"query": "Harry Styles and Olivia Wilde wedding"}, | ||||
|         {"query": "Who is Harry Styles married to"}, | ||||
|         {"query": "Jason Sudeikis Olivia Wilde relationship"}, | ||||
|         {"query": "Olivia Wilde and Jason Sudeikis kids"}, | ||||
|         {"query": "Olivia Wilde children"}, | ||||
|         {"query": "Harry Styles and Olivia Wilde age difference"}, | ||||
|         {"query": "Jason Sudeikis Olivia Wilde, Harry Styles"}, | ||||
|     ], | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture | ||||
| def mock_web_search(): | ||||
|     with patch("haystack.nodes.search_engine.providers.requests") as mock_run: | ||||
|         mock_run.request.return_value = Mock(status_code=200, json=lambda: example_serperdev_response) | ||||
|         yield mock_run | ||||
|  | ||||
| @ -1124,156 +1124,6 @@ def test_multimodal_text_image_retrieval(text_docs: List[Document], image_docs: | ||||
|     assert text_results[0].content == "My name is Christelle and I live in Paris" | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.unit | ||||
| def test_web_retriever_mode_raw_documents(monkeypatch): | ||||
|     expected_search_results = { | ||||
|         "documents": [ | ||||
|             Document( | ||||
|                 content="Eddard Stark", | ||||
|                 score=0.9090909090909091, | ||||
|                 meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091}, | ||||
|                 id_hash_keys=["content"], | ||||
|                 id="f408db6de8de0ffad0cb47cf8830dbb8", | ||||
|             ), | ||||
|             Document( | ||||
|                 content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...", | ||||
|                 score=0.09090909090909091, | ||||
|                 meta={ | ||||
|                     "title": "Arya Stark's Father - Crossword Clue Answers", | ||||
|                     "link": "https://crossword-solver.io/clue/arya-stark%27s-father/", | ||||
|                     "position": 1, | ||||
|                     "score": 0.09090909090909091, | ||||
|                 }, | ||||
|                 id_hash_keys=["content"], | ||||
|                 id="51779277acf94cf90e7663db137c0732", | ||||
|             ), | ||||
|         ] | ||||
|     } | ||||
| 
 | ||||
|     def mock_web_search_run(self, query: str) -> Tuple[Dict, str]: | ||||
|         return expected_search_results, "output_1" | ||||
| 
 | ||||
|     class MockResponse: | ||||
|         def __init__(self, text, status_code): | ||||
|             self.text = text | ||||
|             self.status_code = status_code | ||||
| 
 | ||||
|     def get(url, headers, timeout): | ||||
|         return MockResponse("mocked", 200) | ||||
| 
 | ||||
|     def get_content(self, text: str) -> str: | ||||
|         return "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..." | ||||
| 
 | ||||
|     monkeypatch.setattr(WebSearch, "run", mock_web_search_run) | ||||
|     monkeypatch.setattr(ArticleExtractor, "get_content", get_content) | ||||
|     monkeypatch.setattr(requests, "get", get) | ||||
| 
 | ||||
|     web_retriever = WebRetriever(api_key="", top_search_results=2, mode="raw_documents") | ||||
|     result = web_retriever.retrieve(query="Who is the father of Arya Stark?") | ||||
|     assert len(result) == 1 | ||||
|     assert isinstance(result[0], Document) | ||||
|     assert ( | ||||
|         result[0].content | ||||
|         == "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..." | ||||
|     ) | ||||
|     assert result[0].score == None | ||||
|     assert result[0].meta["url"] == "https://crossword-solver.io/clue/arya-stark%27s-father/" | ||||
|     # Only preprocessed docs but not raw docs should have the _split_id field | ||||
|     assert "_split_id" not in result[0].meta | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.unit | ||||
| def test_web_retriever_mode_preprocessed_documents(monkeypatch): | ||||
|     expected_search_results = { | ||||
|         "documents": [ | ||||
|             Document( | ||||
|                 content="Eddard Stark", | ||||
|                 score=0.9090909090909091, | ||||
|                 meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091}, | ||||
|                 id_hash_keys=["content"], | ||||
|                 id="f408db6de8de0ffad0cb47cf8830dbb8", | ||||
|             ), | ||||
|             Document( | ||||
|                 content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...", | ||||
|                 score=0.09090909090909091, | ||||
|                 meta={ | ||||
|                     "title": "Arya Stark's Father - Crossword Clue Answers", | ||||
|                     "link": "https://crossword-solver.io/clue/arya-stark%27s-father/", | ||||
|                     "position": 1, | ||||
|                     "score": 0.09090909090909091, | ||||
|                 }, | ||||
|                 id_hash_keys=["content"], | ||||
|                 id="51779277acf94cf90e7663db137c0732", | ||||
|             ), | ||||
|         ] | ||||
|     } | ||||
| 
 | ||||
|     def mock_web_search_run(self, query: str) -> Tuple[Dict, str]: | ||||
|         return expected_search_results, "output_1" | ||||
| 
 | ||||
|     class MockResponse: | ||||
|         def __init__(self, text, status_code): | ||||
|             self.text = text | ||||
|             self.status_code = status_code | ||||
| 
 | ||||
|     def get(url, headers, timeout): | ||||
|         return MockResponse("mocked", 200) | ||||
| 
 | ||||
|     def get_content(self, text: str) -> str: | ||||
|         return "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..." | ||||
| 
 | ||||
|     monkeypatch.setattr(WebSearch, "run", mock_web_search_run) | ||||
|     monkeypatch.setattr(ArticleExtractor, "get_content", get_content) | ||||
|     monkeypatch.setattr(requests, "get", get) | ||||
| 
 | ||||
|     web_retriever = WebRetriever(api_key="", top_search_results=2, mode="preprocessed_documents") | ||||
|     result = web_retriever.retrieve(query="Who is the father of Arya Stark?") | ||||
|     assert len(result) == 1 | ||||
|     assert isinstance(result[0], Document) | ||||
|     assert ( | ||||
|         result[0].content | ||||
|         == "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..." | ||||
|     ) | ||||
|     assert result[0].score == None | ||||
|     assert result[0].meta["url"] == "https://crossword-solver.io/clue/arya-stark%27s-father/" | ||||
|     assert result[0].meta["_split_id"] == 0 | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.unit | ||||
| def test_web_retriever_mode_snippets(monkeypatch): | ||||
|     expected_search_results = { | ||||
|         "documents": [ | ||||
|             Document( | ||||
|                 content="Eddard Stark", | ||||
|                 score=0.9090909090909091, | ||||
|                 meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091}, | ||||
|                 id_hash_keys=["content"], | ||||
|                 id="f408db6de8de0ffad0cb47cf8830dbb8", | ||||
|             ), | ||||
|             Document( | ||||
|                 content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...", | ||||
|                 score=0.09090909090909091, | ||||
|                 meta={ | ||||
|                     "title": "Arya Stark's Father - Crossword Clue Answers", | ||||
|                     "link": "https://crossword-solver.io/clue/arya-stark%27s-father/", | ||||
|                     "position": 1, | ||||
|                     "score": 0.09090909090909091, | ||||
|                 }, | ||||
|                 id_hash_keys=["content"], | ||||
|                 id="51779277acf94cf90e7663db137c0732", | ||||
|             ), | ||||
|         ] | ||||
|     } | ||||
| 
 | ||||
|     def mock_web_search_run(self, query: str) -> Tuple[Dict, str]: | ||||
|         return expected_search_results, "output_1" | ||||
| 
 | ||||
|     monkeypatch.setattr(WebSearch, "run", mock_web_search_run) | ||||
|     web_retriever = WebRetriever(api_key="", top_search_results=2) | ||||
|     result = web_retriever.retrieve(query="Who is the father of Arya Stark?") | ||||
|     assert result == expected_search_results["documents"] | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.unit | ||||
| @patch("haystack.nodes.retriever._openai_encoder.openai_request") | ||||
| def test_openai_default_api_base(mock_request): | ||||
|  | ||||
							
								
								
									
										198
									
								
								test/nodes/test_web_retriever.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										198
									
								
								test/nodes/test_web_retriever.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,198 @@ | ||||
| import os | ||||
| from typing import Dict, Tuple | ||||
| 
 | ||||
| 
 | ||||
| import pytest | ||||
| import requests | ||||
| from boilerpy3.extractors import ArticleExtractor | ||||
| 
 | ||||
| from haystack import Document, Pipeline | ||||
| from haystack.nodes import WebSearch, WebRetriever, PromptNode | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.unit | ||||
| def test_web_retriever_mode_raw_documents(monkeypatch): | ||||
|     expected_search_results = { | ||||
|         "documents": [ | ||||
|             Document( | ||||
|                 content="Eddard Stark", | ||||
|                 score=0.9090909090909091, | ||||
|                 meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091}, | ||||
|                 id_hash_keys=["content"], | ||||
|                 id="f408db6de8de0ffad0cb47cf8830dbb8", | ||||
|             ), | ||||
|             Document( | ||||
|                 content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...", | ||||
|                 score=0.09090909090909091, | ||||
|                 meta={ | ||||
|                     "title": "Arya Stark's Father - Crossword Clue Answers", | ||||
|                     "link": "https://crossword-solver.io/clue/arya-stark%27s-father/", | ||||
|                     "position": 1, | ||||
|                     "score": 0.09090909090909091, | ||||
|                 }, | ||||
|                 id_hash_keys=["content"], | ||||
|                 id="51779277acf94cf90e7663db137c0732", | ||||
|             ), | ||||
|         ] | ||||
|     } | ||||
| 
 | ||||
|     def mock_web_search_run(self, query: str) -> Tuple[Dict, str]: | ||||
|         return expected_search_results, "output_1" | ||||
| 
 | ||||
|     class MockResponse: | ||||
|         def __init__(self, text, status_code): | ||||
|             self.text = text | ||||
|             self.status_code = status_code | ||||
| 
 | ||||
|     def get(url, headers, timeout): | ||||
|         return MockResponse("mocked", 200) | ||||
| 
 | ||||
|     def get_content(self, text: str) -> str: | ||||
|         return "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..." | ||||
| 
 | ||||
|     monkeypatch.setattr(WebSearch, "run", mock_web_search_run) | ||||
|     monkeypatch.setattr(ArticleExtractor, "get_content", get_content) | ||||
|     monkeypatch.setattr(requests, "get", get) | ||||
| 
 | ||||
|     web_retriever = WebRetriever(api_key="", top_search_results=2, mode="raw_documents") | ||||
|     result = web_retriever.retrieve(query="Who is the father of Arya Stark?") | ||||
|     assert len(result) == 1 | ||||
|     assert isinstance(result[0], Document) | ||||
|     assert ( | ||||
|         result[0].content | ||||
|         == "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..." | ||||
|     ) | ||||
|     assert result[0].score == None | ||||
|     assert result[0].meta["url"] == "https://crossword-solver.io/clue/arya-stark%27s-father/" | ||||
|     # Only preprocessed docs but not raw docs should have the _split_id field | ||||
|     assert "_split_id" not in result[0].meta | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.unit | ||||
| def test_web_retriever_mode_preprocessed_documents(monkeypatch): | ||||
|     expected_search_results = { | ||||
|         "documents": [ | ||||
|             Document( | ||||
|                 content="Eddard Stark", | ||||
|                 score=0.9090909090909091, | ||||
|                 meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091}, | ||||
|                 id_hash_keys=["content"], | ||||
|                 id="f408db6de8de0ffad0cb47cf8830dbb8", | ||||
|             ), | ||||
|             Document( | ||||
|                 content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...", | ||||
|                 score=0.09090909090909091, | ||||
|                 meta={ | ||||
|                     "title": "Arya Stark's Father - Crossword Clue Answers", | ||||
|                     "link": "https://crossword-solver.io/clue/arya-stark%27s-father/", | ||||
|                     "position": 1, | ||||
|                     "score": 0.09090909090909091, | ||||
|                 }, | ||||
|                 id_hash_keys=["content"], | ||||
|                 id="51779277acf94cf90e7663db137c0732", | ||||
|             ), | ||||
|         ] | ||||
|     } | ||||
| 
 | ||||
|     def mock_web_search_run(self, query: str) -> Tuple[Dict, str]: | ||||
|         return expected_search_results, "output_1" | ||||
| 
 | ||||
|     class MockResponse: | ||||
|         def __init__(self, text, status_code): | ||||
|             self.text = text | ||||
|             self.status_code = status_code | ||||
| 
 | ||||
|     def get(url, headers, timeout): | ||||
|         return MockResponse("mocked", 200) | ||||
| 
 | ||||
|     def get_content(self, text: str) -> str: | ||||
|         return "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..." | ||||
| 
 | ||||
|     monkeypatch.setattr(WebSearch, "run", mock_web_search_run) | ||||
|     monkeypatch.setattr(ArticleExtractor, "get_content", get_content) | ||||
|     monkeypatch.setattr(requests, "get", get) | ||||
| 
 | ||||
|     web_retriever = WebRetriever(api_key="", top_search_results=2, mode="preprocessed_documents") | ||||
|     result = web_retriever.retrieve(query="Who is the father of Arya Stark?") | ||||
|     assert len(result) == 1 | ||||
|     assert isinstance(result[0], Document) | ||||
|     assert ( | ||||
|         result[0].content | ||||
|         == "What are the top solutions for\nArya Stark's Father\nWe found 1 solutions for\nArya Stark's Father\n.The top solutions is determined by popularity, ratings and frequency of searches. The most likely answer for the clue is NED..." | ||||
|     ) | ||||
|     assert result[0].score == None | ||||
|     assert result[0].meta["url"] == "https://crossword-solver.io/clue/arya-stark%27s-father/" | ||||
|     assert result[0].meta["_split_id"] == 0 | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.unit | ||||
| def test_web_retriever_mode_snippets(monkeypatch): | ||||
|     expected_search_results = { | ||||
|         "documents": [ | ||||
|             Document( | ||||
|                 content="Eddard Stark", | ||||
|                 score=0.9090909090909091, | ||||
|                 meta={"title": "Eddard Stark", "link": "", "score": 0.9090909090909091}, | ||||
|                 id_hash_keys=["content"], | ||||
|                 id="f408db6de8de0ffad0cb47cf8830dbb8", | ||||
|             ), | ||||
|             Document( | ||||
|                 content="The most likely answer for the clue is NED. How many solutions does Arya Stark's Father have? With crossword-solver.io you will find 1 solutions. We use ...", | ||||
|                 score=0.09090909090909091, | ||||
|                 meta={ | ||||
|                     "title": "Arya Stark's Father - Crossword Clue Answers", | ||||
|                     "link": "https://crossword-solver.io/clue/arya-stark%27s-father/", | ||||
|                     "position": 1, | ||||
|                     "score": 0.09090909090909091, | ||||
|                 }, | ||||
|                 id_hash_keys=["content"], | ||||
|                 id="51779277acf94cf90e7663db137c0732", | ||||
|             ), | ||||
|         ] | ||||
|     } | ||||
| 
 | ||||
|     def mock_web_search_run(self, query: str) -> Tuple[Dict, str]: | ||||
|         return expected_search_results, "output_1" | ||||
| 
 | ||||
|     monkeypatch.setattr(WebSearch, "run", mock_web_search_run) | ||||
|     web_retriever = WebRetriever(api_key="", top_search_results=2) | ||||
|     result = web_retriever.retrieve(query="Who is the father of Arya Stark?") | ||||
|     assert result == expected_search_results["documents"] | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.unit | ||||
| @pytest.mark.parametrize("top_k", [1, 3, 6]) | ||||
| def test_top_k_parameter(mock_web_search, top_k): | ||||
|     web_retriever = WebRetriever(api_key="some_invalid_key", mode="snippets") | ||||
|     result = web_retriever.retrieve(query="Who is the boyfriend of Olivia Wilde?", top_k=top_k) | ||||
|     assert len(result) == top_k | ||||
|     assert all(isinstance(doc, Document) for doc in result) | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.integration | ||||
| @pytest.mark.skipif( | ||||
|     not os.environ.get("SERPERDEV_API_KEY", None), | ||||
|     reason="Please export an env var called SERPERDEV_API_KEY containing the serper.dev API key to run this test.", | ||||
| ) | ||||
| @pytest.mark.skipif( | ||||
|     not os.environ.get("OPENAI_API_KEY", None), | ||||
|     reason="Please export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", | ||||
| ) | ||||
| @pytest.mark.parametrize("top_k", [2, 4]) | ||||
| def test_top_k_parameter_in_pipeline(top_k): | ||||
|     # test that WebRetriever top_k param is NOT ignored in a pipeline | ||||
|     prompt_node = PromptNode( | ||||
|         "gpt-3.5-turbo", | ||||
|         api_key=os.environ.get("OPENAI_API_KEY"), | ||||
|         max_length=256, | ||||
|         default_prompt_template="question-answering-with-document-scores", | ||||
|     ) | ||||
| 
 | ||||
|     retriever = WebRetriever(api_key=os.environ.get("SERPERDEV_API_KEY")) | ||||
| 
 | ||||
|     pipe = Pipeline() | ||||
| 
 | ||||
|     pipe.add_node(component=retriever, name="WebRetriever", inputs=["Query"]) | ||||
|     pipe.add_node(component=prompt_node, name="QAwithScoresPrompt", inputs=["WebRetriever"]) | ||||
|     result = pipe.run(query="What year was Obama president", params={"WebRetriever": {"top_k": top_k}}) | ||||
|     assert len(result["results"]) == top_k | ||||
| @ -1,5 +1,4 @@ | ||||
| import os | ||||
| import unittest | ||||
| from unittest.mock import MagicMock, patch | ||||
| 
 | ||||
| import pytest | ||||
| @ -99,9 +98,19 @@ def test_web_search_with_google_api_client(): | ||||
|             search_engine_provider="GoogleAPI", | ||||
|             search_engine_kwargs={"engine_id": SEARCH_ENGINE_ID}, | ||||
|         ) | ||||
|         result, _ = ws.run(query=query) | ||||
|         _, _ = ws.run(query=query) | ||||
| 
 | ||||
|         mock_build.assert_called_once_with("customsearch", "v1", developerKey=GOOGLE_API_KEY) | ||||
|         mock_service.cse.assert_called_once() | ||||
|         mock_cse.list.assert_called_once_with(q=query, cx=SEARCH_ENGINE_ID, num=10) | ||||
|         mock_list.execute.assert_called_once() | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.unit | ||||
| @pytest.mark.parametrize("top_k", [1, 3, 6]) | ||||
| def test_web_search_top_k(mock_web_search, top_k): | ||||
|     ws = WebSearch(api_key="some_invalid_key") | ||||
|     result, _ = ws.run(query="Who is the boyfriend of Olivia Wilde?", top_k=top_k) | ||||
|     assert "documents" in result | ||||
|     assert len(result["documents"]) == top_k | ||||
|     assert all(isinstance(doc, Document) for doc in result["documents"]) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Vladimir Blagojevic
						Vladimir Blagojevic