mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-10-19 03:48:42 +00:00
feat: Add ByteStream to_string method (#7009)
This commit is contained in:
parent
3f85a63468
commit
3ce6b9768e
@ -49,3 +49,13 @@ class ByteStream:
|
|||||||
:param meta: Additional metadata to be stored with the ByteStream.
|
:param meta: Additional metadata to be stored with the ByteStream.
|
||||||
"""
|
"""
|
||||||
return cls(data=text.encode(encoding), mime_type=mime_type, meta=meta or {})
|
return cls(data=text.encode(encoding), mime_type=mime_type, meta=meta or {})
|
||||||
|
|
||||||
|
def to_string(self, encoding: str = "utf-8") -> str:
|
||||||
|
"""
|
||||||
|
Convert the ByteStream to a string, metadata will not be included.
|
||||||
|
|
||||||
|
:param encoding: The encoding used to convert the bytes to a string. Defaults to "utf-8".
|
||||||
|
:return: The string representation of the ByteStream.
|
||||||
|
:raises UnicodeDecodeError: If the ByteStream data cannot be decoded with the specified encoding.
|
||||||
|
"""
|
||||||
|
return self.data.decode(encoding)
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
from haystack.dataclasses import ByteStream
|
from haystack.dataclasses import ByteStream
|
||||||
|
|
||||||
|
|
||||||
@ -35,6 +37,30 @@ def test_from_string():
|
|||||||
assert b.meta == {"foo": "bar"}
|
assert b.meta == {"foo": "bar"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_to_string():
|
||||||
|
test_string = "Hello, world!"
|
||||||
|
b = ByteStream.from_string(test_string)
|
||||||
|
assert b.to_string() == test_string
|
||||||
|
|
||||||
|
|
||||||
|
def test_to_from_string_encoding():
|
||||||
|
test_string = "Hello Baščaršija!"
|
||||||
|
with pytest.raises(UnicodeEncodeError):
|
||||||
|
ByteStream.from_string(test_string, encoding="ISO-8859-1")
|
||||||
|
|
||||||
|
bs = ByteStream.from_string(test_string) # default encoding is utf-8
|
||||||
|
|
||||||
|
assert bs.to_string(encoding="ISO-8859-1") != test_string
|
||||||
|
assert bs.to_string(encoding="utf-8") == test_string
|
||||||
|
|
||||||
|
|
||||||
|
def test_to_string_encoding_error():
|
||||||
|
# test that it raises ValueError if the encoding is not valid
|
||||||
|
b = ByteStream.from_string("Hello, world!")
|
||||||
|
with pytest.raises(UnicodeDecodeError):
|
||||||
|
b.to_string("utf-16")
|
||||||
|
|
||||||
|
|
||||||
def test_to_file(tmp_path, request):
|
def test_to_file(tmp_path, request):
|
||||||
test_str = "Hello, world!\n"
|
test_str = "Hello, world!\n"
|
||||||
test_path = tmp_path / request.node.name
|
test_path = tmp_path / request.node.name
|
||||||
|
Loading…
x
Reference in New Issue
Block a user