fix: exit early if the component receives no documents (#7732)

* exit early if the component receives no documents * relnote
2026-01-07 04:27:15 +00:00 · 2024-05-23 09:35:10 +02:00 · 2024-05-23 09:35:10 +02:00 · 482f60ec99
commit 482f60ec99
parent a4fc2b66e6
3 changed files with 18 additions and 4 deletions
--- a/haystack/components/readers/extractive.py
+++ b/haystack/components/readers/extractive.py
@ -210,6 +210,7 @@ class ExtractiveReader:
        """
        texts = []
        document_ids = []
+        document_contents = []
        for i, doc in enumerate(documents):
            if doc.content is None:
                warnings.warn(
@ -219,9 +220,11 @@ class ExtractiveReader:
                continue
            texts.append(doc.content)
            document_ids.append(i)
+            document_contents.append(doc.content)
+
        encodings_pt = self.tokenizer(  # type: ignore
            queries,
-            [document.content for document in documents],
+            document_contents,
            padding=True,
            truncation=True,
            max_length=max_seq_length,
@ -571,6 +574,9 @@ class ExtractiveReader:
        :raises ComponentError:
            If the component was not warmed up by calling 'warm_up()' before.
        """
+        if not documents:
+            return {"answers": []}
+
        queries = [query]  # Temporary solution until we have decided what batching should look like in v2
        nested_documents = [documents]
        if self.model is None:
--- a/releasenotes/notes/reader-crash-no-docs-53085ce48baaae81.yaml
+++ b/releasenotes/notes/reader-crash-no-docs-53085ce48baaae81.yaml
@ -0,0 +1,4 @@
+---
+fixes:
+  - |
+    Return an empty list of answers when `ExtractiveReader` receives an empty list of documents instead of raising an exception.
--- a/test/components/readers/test_extractive.py
+++ b/test/components/readers/test_extractive.py
@ -266,13 +266,17 @@ def test_from_dict_no_token():
    assert component.token is None


+def test_run_no_docs(mock_reader: ExtractiveReader):
+    mock_reader.warm_up()
+    assert mock_reader.run(query="hello", documents=[]) == {"answers": []}
+
+
 def test_output(mock_reader: ExtractiveReader):
-    answers = mock_reader.run(example_queries[0], example_documents[0], top_k=3)[
-        "answers"
-    ]  # [0] Uncomment and remove first two indices when batching support is reintroduced
+    answers = mock_reader.run(example_queries[0], example_documents[0], top_k=3)["answers"]
    doc_ids = set()
    no_answer_prob = 1
    for doc, answer in zip(example_documents[0], answers[:3]):
+        assert answer.document_offset is not None
        assert answer.document_offset.start == 11
        assert answer.document_offset.end == 16
        assert doc.content is not None