fix: exit early if the component receives no documents (#7732)

* exit early if the component receives no documents

* relnote
This commit is contained in:
Massimiliano Pippi 2024-05-23 09:35:10 +02:00 committed by GitHub
parent a4fc2b66e6
commit 482f60ec99
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 18 additions and 4 deletions

View File

@ -210,6 +210,7 @@ class ExtractiveReader:
"""
texts = []
document_ids = []
document_contents = []
for i, doc in enumerate(documents):
if doc.content is None:
warnings.warn(
@ -219,9 +220,11 @@ class ExtractiveReader:
continue
texts.append(doc.content)
document_ids.append(i)
document_contents.append(doc.content)
encodings_pt = self.tokenizer( # type: ignore
queries,
[document.content for document in documents],
document_contents,
padding=True,
truncation=True,
max_length=max_seq_length,
@ -571,6 +574,9 @@ class ExtractiveReader:
:raises ComponentError:
If the component was not warmed up by calling 'warm_up()' before.
"""
if not documents:
return {"answers": []}
queries = [query] # Temporary solution until we have decided what batching should look like in v2
nested_documents = [documents]
if self.model is None:

View File

@ -0,0 +1,4 @@
---
fixes:
- |
Return an empty list of answers when `ExtractiveReader` receives an empty list of documents instead of raising an exception.

View File

@ -266,13 +266,17 @@ def test_from_dict_no_token():
assert component.token is None
def test_run_no_docs(mock_reader: ExtractiveReader):
mock_reader.warm_up()
assert mock_reader.run(query="hello", documents=[]) == {"answers": []}
def test_output(mock_reader: ExtractiveReader):
answers = mock_reader.run(example_queries[0], example_documents[0], top_k=3)[
"answers"
] # [0] Uncomment and remove first two indices when batching support is reintroduced
answers = mock_reader.run(example_queries[0], example_documents[0], top_k=3)["answers"]
doc_ids = set()
no_answer_prob = 1
for doc, answer in zip(example_documents[0], answers[:3]):
assert answer.document_offset is not None
assert answer.document_offset.start == 11
assert answer.document_offset.end == 16
assert doc.content is not None