mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-03 18:36:04 +00:00
fix: LinkContentFetcher html text encoding (#7975)
* fix: content encoding of LinkContentFetcher * fix tests * add reno * only touch html
This commit is contained in:
parent
583eb8a293
commit
7e35280d4f
@ -96,6 +96,7 @@ class LinkContentFetcher:
|
||||
# register default content handlers that extract data from the response
|
||||
self.handlers: Dict[str, Callable[[Response], ByteStream]] = defaultdict(lambda: _text_content_handler)
|
||||
self.handlers["text/*"] = _text_content_handler
|
||||
self.handlers["text/html"] = _binary_content_handler
|
||||
self.handlers["application/json"] = _text_content_handler
|
||||
self.handlers["application/*"] = _binary_content_handler
|
||||
self.handlers["image/*"] = _binary_content_handler
|
||||
|
||||
@ -0,0 +1,4 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Encoding of HTML files in LinkContentFetcher
|
||||
@ -47,6 +47,7 @@ class TestLinkContentFetcher:
|
||||
assert fetcher.timeout == 3
|
||||
assert fetcher.handlers == {
|
||||
"text/*": _text_content_handler,
|
||||
"text/html": _binary_content_handler,
|
||||
"application/json": _text_content_handler,
|
||||
"application/*": _binary_content_handler,
|
||||
"image/*": _binary_content_handler,
|
||||
@ -78,7 +79,7 @@ class TestLinkContentFetcher:
|
||||
correct_response = b"<h1>Example test response</h1>"
|
||||
with patch("haystack.components.fetchers.link_content.requests") as mock_run:
|
||||
mock_run.get.return_value = Mock(
|
||||
status_code=200, text="<h1>Example test response</h1>", headers={"Content-Type": "text/html"}
|
||||
status_code=200, content=b"<h1>Example test response</h1>", headers={"Content-Type": "text/html"}
|
||||
)
|
||||
fetcher = LinkContentFetcher()
|
||||
streams = fetcher.run(urls=["https://www.example.com"])["streams"]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user