mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-04 02:57:34 +00:00
feat: add File type to preview package (#5873)
* add Blob type * review feedback * fix tests and naming * Update add-blob-type-2a9476a39841f54d.yaml * removed unused import --------- Co-authored-by: Stefano Fiorucci <44616784+anakin87@users.noreply.github.com>
This commit is contained in:
parent
a4beec3013
commit
c2ec3f5fde
@ -1,4 +1,5 @@
|
||||
from haystack.preview.dataclasses.document import Document
|
||||
from haystack.preview.dataclasses.answer import ExtractedAnswer, GeneratedAnswer, Answer
|
||||
from haystack.preview.dataclasses.byte_stream import ByteStream
|
||||
|
||||
__all__ = ["Document", "ExtractedAnswer", "GeneratedAnswer", "Answer"]
|
||||
__all__ = ["Document", "ExtractedAnswer", "GeneratedAnswer", "Answer", "ByteStream"]
|
||||
|
||||
37
haystack/preview/dataclasses/byte_stream.py
Normal file
37
haystack/preview/dataclasses/byte_stream.py
Normal file
@ -0,0 +1,37 @@
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ByteStream:
|
||||
"""
|
||||
Base data class representing a binary object in the Haystack API.
|
||||
"""
|
||||
|
||||
data: bytes
|
||||
metadata: Dict[str, Any] = field(default_factory=dict, hash=False)
|
||||
|
||||
def to_file(self, destination_path: Path):
|
||||
with open(destination_path, "wb") as fd:
|
||||
fd.write(self.data)
|
||||
|
||||
@classmethod
|
||||
def from_file_path(cls, filepath: Path) -> "ByteStream":
|
||||
"""
|
||||
Create a ByteStream from the contents read from a file.
|
||||
|
||||
:param filepath: A valid path to a file.
|
||||
"""
|
||||
with open(filepath, "rb") as fd:
|
||||
return cls(data=fd.read())
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, text: str, encoding: str = "utf-8") -> "ByteStream":
|
||||
"""
|
||||
Create a ByteStream encoding a string.
|
||||
|
||||
:param text: The string to encode
|
||||
:param encoding: The encoding used to convert the string into bytes
|
||||
"""
|
||||
return cls(data=text.encode(encoding))
|
||||
5
releasenotes/notes/add-blob-type-2a9476a39841f54d.yaml
Normal file
5
releasenotes/notes/add-blob-type-2a9476a39841f54d.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
---
|
||||
preview:
|
||||
- |
|
||||
Add ByteStream type to send binary raw data across components
|
||||
in a pipeline.
|
||||
33
test/preview/dataclasses/test_byte_stream.py
Normal file
33
test/preview/dataclasses/test_byte_stream.py
Normal file
@ -0,0 +1,33 @@
|
||||
import io
|
||||
|
||||
from haystack.preview.dataclasses import ByteStream
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_from_file_path(tmp_path, request):
|
||||
test_bytes = "Hello, world!\n".encode()
|
||||
test_path = tmp_path / request.node.name
|
||||
with open(test_path, "wb") as fd:
|
||||
assert fd.write(test_bytes)
|
||||
|
||||
b = ByteStream.from_file_path(test_path)
|
||||
assert b.data == test_bytes
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_from_string():
|
||||
test_string = "Hello, world!"
|
||||
b = ByteStream.from_string(test_string)
|
||||
assert b.data.decode() == test_string
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_to_file(tmp_path, request):
|
||||
test_str = "Hello, world!\n"
|
||||
test_path = tmp_path / request.node.name
|
||||
|
||||
ByteStream(test_str.encode()).to_file(test_path)
|
||||
with open(test_path, "rb") as fd:
|
||||
assert fd.read().decode() == test_str
|
||||
Loading…
x
Reference in New Issue
Block a user