mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-23 00:42:28 +00:00
71 lines
2.1 KiB
Python
71 lines
2.1 KiB
Python
import pytest
|
|
|
|
from haystack.dataclasses import ByteStream
|
|
|
|
|
|
def test_from_file_path(tmp_path, request):
|
|
test_bytes = "Hello, world!\n".encode()
|
|
test_path = tmp_path / request.node.name
|
|
with open(test_path, "wb") as fd:
|
|
assert fd.write(test_bytes)
|
|
|
|
b = ByteStream.from_file_path(test_path)
|
|
assert b.data == test_bytes
|
|
assert b.mime_type == None
|
|
|
|
b = ByteStream.from_file_path(test_path, mime_type="text/plain")
|
|
assert b.data == test_bytes
|
|
assert b.mime_type == "text/plain"
|
|
|
|
b = ByteStream.from_file_path(test_path, meta={"foo": "bar"})
|
|
assert b.data == test_bytes
|
|
assert b.meta == {"foo": "bar"}
|
|
|
|
|
|
def test_from_string():
|
|
test_string = "Hello, world!"
|
|
b = ByteStream.from_string(test_string)
|
|
assert b.data.decode() == test_string
|
|
assert b.mime_type == None
|
|
|
|
b = ByteStream.from_string(test_string, mime_type="text/plain")
|
|
assert b.data.decode() == test_string
|
|
assert b.mime_type == "text/plain"
|
|
|
|
b = ByteStream.from_string(test_string, meta={"foo": "bar"})
|
|
assert b.data.decode() == test_string
|
|
assert b.meta == {"foo": "bar"}
|
|
|
|
|
|
def test_to_string():
|
|
test_string = "Hello, world!"
|
|
b = ByteStream.from_string(test_string)
|
|
assert b.to_string() == test_string
|
|
|
|
|
|
def test_to_from_string_encoding():
|
|
test_string = "Hello Baščaršija!"
|
|
with pytest.raises(UnicodeEncodeError):
|
|
ByteStream.from_string(test_string, encoding="ISO-8859-1")
|
|
|
|
bs = ByteStream.from_string(test_string) # default encoding is utf-8
|
|
|
|
assert bs.to_string(encoding="ISO-8859-1") != test_string
|
|
assert bs.to_string(encoding="utf-8") == test_string
|
|
|
|
|
|
def test_to_string_encoding_error():
|
|
# test that it raises ValueError if the encoding is not valid
|
|
b = ByteStream.from_string("Hello, world!")
|
|
with pytest.raises(UnicodeDecodeError):
|
|
b.to_string("utf-16")
|
|
|
|
|
|
def test_to_file(tmp_path, request):
|
|
test_str = "Hello, world!\n"
|
|
test_path = tmp_path / request.node.name
|
|
|
|
ByteStream(test_str.encode()).to_file(test_path)
|
|
with open(test_path, "rb") as fd:
|
|
assert fd.read().decode() == test_str
|