mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-06 20:06:55 +00:00
fix: LinkContentFetcher html text encoding (#7975)
* fix: content encoding of LinkContentFetcher * fix tests * add reno * only touch html
This commit is contained in:
parent
583eb8a293
commit
7e35280d4f
@ -96,6 +96,7 @@ class LinkContentFetcher:
|
|||||||
# register default content handlers that extract data from the response
|
# register default content handlers that extract data from the response
|
||||||
self.handlers: Dict[str, Callable[[Response], ByteStream]] = defaultdict(lambda: _text_content_handler)
|
self.handlers: Dict[str, Callable[[Response], ByteStream]] = defaultdict(lambda: _text_content_handler)
|
||||||
self.handlers["text/*"] = _text_content_handler
|
self.handlers["text/*"] = _text_content_handler
|
||||||
|
self.handlers["text/html"] = _binary_content_handler
|
||||||
self.handlers["application/json"] = _text_content_handler
|
self.handlers["application/json"] = _text_content_handler
|
||||||
self.handlers["application/*"] = _binary_content_handler
|
self.handlers["application/*"] = _binary_content_handler
|
||||||
self.handlers["image/*"] = _binary_content_handler
|
self.handlers["image/*"] = _binary_content_handler
|
||||||
|
|||||||
@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Encoding of HTML files in LinkContentFetcher
|
||||||
@ -47,6 +47,7 @@ class TestLinkContentFetcher:
|
|||||||
assert fetcher.timeout == 3
|
assert fetcher.timeout == 3
|
||||||
assert fetcher.handlers == {
|
assert fetcher.handlers == {
|
||||||
"text/*": _text_content_handler,
|
"text/*": _text_content_handler,
|
||||||
|
"text/html": _binary_content_handler,
|
||||||
"application/json": _text_content_handler,
|
"application/json": _text_content_handler,
|
||||||
"application/*": _binary_content_handler,
|
"application/*": _binary_content_handler,
|
||||||
"image/*": _binary_content_handler,
|
"image/*": _binary_content_handler,
|
||||||
@ -78,7 +79,7 @@ class TestLinkContentFetcher:
|
|||||||
correct_response = b"<h1>Example test response</h1>"
|
correct_response = b"<h1>Example test response</h1>"
|
||||||
with patch("haystack.components.fetchers.link_content.requests") as mock_run:
|
with patch("haystack.components.fetchers.link_content.requests") as mock_run:
|
||||||
mock_run.get.return_value = Mock(
|
mock_run.get.return_value = Mock(
|
||||||
status_code=200, text="<h1>Example test response</h1>", headers={"Content-Type": "text/html"}
|
status_code=200, content=b"<h1>Example test response</h1>", headers={"Content-Type": "text/html"}
|
||||||
)
|
)
|
||||||
fetcher = LinkContentFetcher()
|
fetcher = LinkContentFetcher()
|
||||||
streams = fetcher.run(urls=["https://www.example.com"])["streams"]
|
streams = fetcher.run(urls=["https://www.example.com"])["streams"]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user