mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-06-26 22:00:13 +00:00
feat: Set ByteStream's mime_type attribute for web based resources (#7681)
This commit is contained in:
parent
1d20ac3c5e
commit
811b93db91
@ -151,6 +151,7 @@ class LinkContentFetcher:
|
||||
for stream_metadata, stream in results: # type: ignore
|
||||
if stream_metadata is not None and stream is not None:
|
||||
stream.meta.update(stream_metadata)
|
||||
stream.mime_type = stream.meta.get("content_type", None)
|
||||
streams.append(stream)
|
||||
|
||||
return {"streams": streams}
|
||||
|
@ -90,7 +90,7 @@ class FileTypeRouter:
|
||||
if isinstance(source, Path):
|
||||
mime_type = self._get_mime_type(source)
|
||||
elif isinstance(source, ByteStream):
|
||||
mime_type = source.meta.get("content_type", None)
|
||||
mime_type = source.mime_type
|
||||
else:
|
||||
raise ValueError(f"Unsupported data source type: {type(source).__name__}")
|
||||
|
||||
|
@ -0,0 +1,4 @@
|
||||
---
|
||||
enhancements:
|
||||
- |
|
||||
Improved MIME type management by directly setting MIME types on ByteStreams, enhancing the overall handling and routing of different file types. This update makes MIME type data more consistently accessible and simplifies the process of working with various document formats.
|
@ -50,7 +50,7 @@ class TestFileTypeRouter:
|
||||
byte_streams = []
|
||||
for path, mime_type in zip(file_paths, mime_types):
|
||||
stream = ByteStream(path.read_bytes())
|
||||
stream.meta["content_type"] = mime_type
|
||||
stream.mime_type = mime_type
|
||||
byte_streams.append(stream)
|
||||
|
||||
# add unclassified ByteStream
|
||||
@ -81,7 +81,7 @@ class TestFileTypeRouter:
|
||||
byte_stream_sources = []
|
||||
for path, mime_type in zip(file_paths, mime_types):
|
||||
stream = ByteStream(path.read_bytes())
|
||||
stream.meta["content_type"] = mime_type
|
||||
stream.mime_type = mime_type
|
||||
byte_stream_sources.append(stream)
|
||||
|
||||
mixed_sources = file_paths[:2] + byte_stream_sources[2:]
|
||||
@ -165,9 +165,12 @@ class TestFileTypeRouter:
|
||||
"""
|
||||
Test if the component correctly matches mime types exactly, without regex patterns.
|
||||
"""
|
||||
txt_stream = ByteStream(io.BytesIO(b"Text file content"), meta={"content_type": "text/plain"})
|
||||
jpg_stream = ByteStream(io.BytesIO(b"JPEG file content"), meta={"content_type": "image/jpeg"})
|
||||
mp3_stream = ByteStream(io.BytesIO(b"MP3 file content"), meta={"content_type": "audio/mpeg"})
|
||||
txt_stream = ByteStream(io.BytesIO(b"Text file content").read())
|
||||
txt_stream.mime_type = "text/plain"
|
||||
jpg_stream = ByteStream(io.BytesIO(b"JPEG file content").read())
|
||||
jpg_stream.mime_type = "image/jpeg"
|
||||
mp3_stream = ByteStream(io.BytesIO(b"MP3 file content").read())
|
||||
mp3_stream.mime_type = "audio/mpeg"
|
||||
|
||||
byte_streams = [txt_stream, jpg_stream, mp3_stream]
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user