fix: truncate ByteStream string representation (#8673)

* fix: truncate ByteStream string representation * add reno * better reno * add test * Update test_byte_stream.py * apply feedback * update reno
2026-01-07 12:37:27 +00:00 · 2025-01-07 19:00:52 +01:00 · 2025-01-07 19:00:52 +01:00 · e6059e632e
commit e6059e632e
parent 8e3f64717f
3 changed files with 26 additions and 1 deletions
--- a/haystack/dataclasses/byte_stream.py
+++ b/haystack/dataclasses/byte_stream.py
@ -7,7 +7,7 @@ from pathlib import Path
 from typing import Any, Dict, Optional


-@dataclass
+@dataclass(repr=False)
 class ByteStream:
    """
    Base data class representing a binary object in the Haystack API.
@ -63,3 +63,15 @@ class ByteStream:
        :raises: UnicodeDecodeError: If the ByteStream data cannot be decoded with the specified encoding.
        """
        return self.data.decode(encoding)
+
+    def __repr__(self) -> str:
+        """
+        Return a string representation of the ByteStream, truncating the data to 100 bytes.
+        """
+        fields = []
+        truncated_data = self.data[:100] + b"..." if len(self.data) > 100 else self.data
+        fields.append(f"data={truncated_data!r}")
+        fields.append(f"meta={self.meta!r}")
+        fields.append(f"mime_type={self.mime_type!r}")
+        fields_str = ", ".join(fields)
+        return f"{self.__class__.__name__}({fields_str})"
--- a/releasenotes/notes/fix-bytestream-str-8dd6d5e9a87f6aa4.yaml
+++ b/releasenotes/notes/fix-bytestream-str-8dd6d5e9a87f6aa4.yaml
@ -0,0 +1,4 @@
+---
+fixes:
+  - |
+    ByteStream now truncates the data to 100 bytes in the string representation to avoid excessive log output.
--- a/test/dataclasses/test_byte_stream.py
+++ b/test/dataclasses/test_byte_stream.py
@ -71,3 +71,12 @@ def test_to_file(tmp_path, request):
    ByteStream(test_str.encode()).to_file(test_path)
    with open(test_path, "rb") as fd:
        assert fd.read().decode() == test_str
+
+
+def test_str_truncation():
+    test_str = "1234567890" * 100
+    b = ByteStream.from_string(test_str, mime_type="text/plain", meta={"foo": "bar"})
+    string_repr = str(b)
+    assert len(string_repr) < 200
+    assert "text/plain" in string_repr
+    assert "foo" in string_repr