2024-02-17 12:57:42 +01:00
|
|
|
import pytest
|
|
|
|
|
2023-11-24 14:48:43 +01:00
|
|
|
from haystack.dataclasses import ByteStream
|
2023-10-04 17:23:12 +02:00
|
|
|
|
|
|
|
|
|
|
|
def test_from_file_path(tmp_path, request):
|
|
|
|
test_bytes = "Hello, world!\n".encode()
|
|
|
|
test_path = tmp_path / request.node.name
|
|
|
|
with open(test_path, "wb") as fd:
|
|
|
|
assert fd.write(test_bytes)
|
|
|
|
|
|
|
|
b = ByteStream.from_file_path(test_path)
|
|
|
|
assert b.data == test_bytes
|
2023-10-23 16:13:40 +02:00
|
|
|
assert b.mime_type == None
|
|
|
|
|
|
|
|
b = ByteStream.from_file_path(test_path, mime_type="text/plain")
|
|
|
|
assert b.data == test_bytes
|
|
|
|
assert b.mime_type == "text/plain"
|
2023-10-04 17:23:12 +02:00
|
|
|
|
2024-02-01 12:50:11 +01:00
|
|
|
b = ByteStream.from_file_path(test_path, meta={"foo": "bar"})
|
|
|
|
assert b.data == test_bytes
|
|
|
|
assert b.meta == {"foo": "bar"}
|
|
|
|
|
2023-10-04 17:23:12 +02:00
|
|
|
|
|
|
|
def test_from_string():
|
|
|
|
test_string = "Hello, world!"
|
|
|
|
b = ByteStream.from_string(test_string)
|
|
|
|
assert b.data.decode() == test_string
|
2023-10-23 16:13:40 +02:00
|
|
|
assert b.mime_type == None
|
|
|
|
|
|
|
|
b = ByteStream.from_string(test_string, mime_type="text/plain")
|
|
|
|
assert b.data.decode() == test_string
|
|
|
|
assert b.mime_type == "text/plain"
|
2023-10-04 17:23:12 +02:00
|
|
|
|
2024-02-01 12:50:11 +01:00
|
|
|
b = ByteStream.from_string(test_string, meta={"foo": "bar"})
|
|
|
|
assert b.data.decode() == test_string
|
|
|
|
assert b.meta == {"foo": "bar"}
|
|
|
|
|
2023-10-04 17:23:12 +02:00
|
|
|
|
2024-02-17 12:57:42 +01:00
|
|
|
def test_to_string():
|
|
|
|
test_string = "Hello, world!"
|
|
|
|
b = ByteStream.from_string(test_string)
|
|
|
|
assert b.to_string() == test_string
|
|
|
|
|
|
|
|
|
|
|
|
def test_to_from_string_encoding():
|
|
|
|
test_string = "Hello Baščaršija!"
|
|
|
|
with pytest.raises(UnicodeEncodeError):
|
|
|
|
ByteStream.from_string(test_string, encoding="ISO-8859-1")
|
|
|
|
|
|
|
|
bs = ByteStream.from_string(test_string) # default encoding is utf-8
|
|
|
|
|
|
|
|
assert bs.to_string(encoding="ISO-8859-1") != test_string
|
|
|
|
assert bs.to_string(encoding="utf-8") == test_string
|
|
|
|
|
|
|
|
|
|
|
|
def test_to_string_encoding_error():
|
|
|
|
# test that it raises ValueError if the encoding is not valid
|
|
|
|
b = ByteStream.from_string("Hello, world!")
|
|
|
|
with pytest.raises(UnicodeDecodeError):
|
|
|
|
b.to_string("utf-16")
|
|
|
|
|
|
|
|
|
2023-10-04 17:23:12 +02:00
|
|
|
def test_to_file(tmp_path, request):
|
|
|
|
test_str = "Hello, world!\n"
|
|
|
|
test_path = tmp_path / request.node.name
|
|
|
|
|
|
|
|
ByteStream(test_str.encode()).to_file(test_path)
|
|
|
|
with open(test_path, "rb") as fd:
|
|
|
|
assert fd.read().decode() == test_str
|