mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-28 02:16:32 +00:00
fix: LinkContentFetcher - when no content retrieved (i.e. request blocked), default to snippet text (#5656)
* When no content retrieved (i.e. request blocked), default to snippet * Add release note
This commit is contained in:
parent
2118f68769
commit
f13b37db24
@ -200,8 +200,10 @@ class LinkContentFetcher(BaseComponent):
|
||||
logger.debug("%s handler extracted content from %s", handler, url)
|
||||
|
||||
extracted_doc["content"] = content
|
||||
document = Document.from_dict(extracted_doc)
|
||||
fetched_documents = self.processor.process(documents=[document]) if self.processor else [document]
|
||||
else:
|
||||
extracted_doc["content"] = extracted_doc.get("snippet_text", "") # fallback to snippet_text
|
||||
document = Document.from_dict(extracted_doc)
|
||||
fetched_documents = self.processor.process(documents=[document]) if self.processor else [document]
|
||||
|
||||
return fetched_documents
|
||||
|
||||
|
@ -0,0 +1,5 @@
|
||||
---
|
||||
enhancements:
|
||||
- |
|
||||
If LinkContentFetcher encounters a block or receives any response code other than HTTPStatus.OK, return the search
|
||||
engine snippet as content, if it's available.
|
@ -279,7 +279,9 @@ def test_handle_various_response_errors(caplog, mocked_requests, error_code: int
|
||||
docs = r.fetch(url=url)
|
||||
|
||||
assert f"Couldn't retrieve content from {url}" in caplog.text
|
||||
assert docs == []
|
||||
assert len(docs) == 1
|
||||
assert isinstance(docs[0], Document)
|
||||
assert docs[0].content == ""
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
|
Loading…
x
Reference in New Issue
Block a user