diff --git a/haystack/components/readers/extractive.py b/haystack/components/readers/extractive.py index 62e1ce002..29bee01d3 100644 --- a/haystack/components/readers/extractive.py +++ b/haystack/components/readers/extractive.py @@ -210,6 +210,7 @@ class ExtractiveReader: """ texts = [] document_ids = [] + document_contents = [] for i, doc in enumerate(documents): if doc.content is None: warnings.warn( @@ -219,9 +220,11 @@ class ExtractiveReader: continue texts.append(doc.content) document_ids.append(i) + document_contents.append(doc.content) + encodings_pt = self.tokenizer( # type: ignore queries, - [document.content for document in documents], + document_contents, padding=True, truncation=True, max_length=max_seq_length, @@ -571,6 +574,9 @@ class ExtractiveReader: :raises ComponentError: If the component was not warmed up by calling 'warm_up()' before. """ + if not documents: + return {"answers": []} + queries = [query] # Temporary solution until we have decided what batching should look like in v2 nested_documents = [documents] if self.model is None: diff --git a/releasenotes/notes/reader-crash-no-docs-53085ce48baaae81.yaml b/releasenotes/notes/reader-crash-no-docs-53085ce48baaae81.yaml new file mode 100644 index 000000000..cf0cd02a8 --- /dev/null +++ b/releasenotes/notes/reader-crash-no-docs-53085ce48baaae81.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + Return an empty list of answers when `ExtractiveReader` receives an empty list of documents instead of raising an exception. diff --git a/test/components/readers/test_extractive.py b/test/components/readers/test_extractive.py index e6a0fca83..f9a161e80 100644 --- a/test/components/readers/test_extractive.py +++ b/test/components/readers/test_extractive.py @@ -266,13 +266,17 @@ def test_from_dict_no_token(): assert component.token is None +def test_run_no_docs(mock_reader: ExtractiveReader): + mock_reader.warm_up() + assert mock_reader.run(query="hello", documents=[]) == {"answers": []} + + def test_output(mock_reader: ExtractiveReader): - answers = mock_reader.run(example_queries[0], example_documents[0], top_k=3)[ - "answers" - ] # [0] Uncomment and remove first two indices when batching support is reintroduced + answers = mock_reader.run(example_queries[0], example_documents[0], top_k=3)["answers"] doc_ids = set() no_answer_prob = 1 for doc, answer in zip(example_documents[0], answers[:3]): + assert answer.document_offset is not None assert answer.document_offset.start == 11 assert answer.document_offset.end == 16 assert doc.content is not None