haystack/test/dataclasses/test_byte_stream.py
2024-02-17 12:57:42 +01:00

71 lines
2.1 KiB
Python

import pytest
from haystack.dataclasses import ByteStream
def test_from_file_path(tmp_path, request):
test_bytes = "Hello, world!\n".encode()
test_path = tmp_path / request.node.name
with open(test_path, "wb") as fd:
assert fd.write(test_bytes)
b = ByteStream.from_file_path(test_path)
assert b.data == test_bytes
assert b.mime_type == None
b = ByteStream.from_file_path(test_path, mime_type="text/plain")
assert b.data == test_bytes
assert b.mime_type == "text/plain"
b = ByteStream.from_file_path(test_path, meta={"foo": "bar"})
assert b.data == test_bytes
assert b.meta == {"foo": "bar"}
def test_from_string():
test_string = "Hello, world!"
b = ByteStream.from_string(test_string)
assert b.data.decode() == test_string
assert b.mime_type == None
b = ByteStream.from_string(test_string, mime_type="text/plain")
assert b.data.decode() == test_string
assert b.mime_type == "text/plain"
b = ByteStream.from_string(test_string, meta={"foo": "bar"})
assert b.data.decode() == test_string
assert b.meta == {"foo": "bar"}
def test_to_string():
test_string = "Hello, world!"
b = ByteStream.from_string(test_string)
assert b.to_string() == test_string
def test_to_from_string_encoding():
test_string = "Hello Baščaršija!"
with pytest.raises(UnicodeEncodeError):
ByteStream.from_string(test_string, encoding="ISO-8859-1")
bs = ByteStream.from_string(test_string) # default encoding is utf-8
assert bs.to_string(encoding="ISO-8859-1") != test_string
assert bs.to_string(encoding="utf-8") == test_string
def test_to_string_encoding_error():
# test that it raises ValueError if the encoding is not valid
b = ByteStream.from_string("Hello, world!")
with pytest.raises(UnicodeDecodeError):
b.to_string("utf-16")
def test_to_file(tmp_path, request):
test_str = "Hello, world!\n"
test_path = tmp_path / request.node.name
ByteStream(test_str.encode()).to_file(test_path)
with open(test_path, "rb") as fd:
assert fd.read().decode() == test_str