mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-12 15:27:06 +00:00
feat: Add FileTypeRouter markdown support (#6551)
* Add FileTypeRouter markdown support * Add releae note
This commit is contained in:
parent
3b172b0476
commit
c642695ec0
@ -76,7 +76,10 @@ class FileTypeRouter:
|
|||||||
:param path: The file path to get the MIME type for.
|
:param path: The file path to get the MIME type for.
|
||||||
:return: The MIME type of the provided file path, or None if the MIME type cannot be determined.
|
:return: The MIME type of the provided file path, or None if the MIME type cannot be determined.
|
||||||
"""
|
"""
|
||||||
return mimetypes.guess_type(path.as_posix())[0]
|
extension = path.suffix.lower()
|
||||||
|
mime_type = mimetypes.guess_type(path.as_posix())[0]
|
||||||
|
# lookup custom mappings if the mime type is not found
|
||||||
|
return self.get_custom_mime_mappings().get(extension, mime_type)
|
||||||
|
|
||||||
def is_valid_mime_type_format(self, mime_type: str) -> bool:
|
def is_valid_mime_type_format(self, mime_type: str) -> bool:
|
||||||
"""
|
"""
|
||||||
@ -84,4 +87,13 @@ class FileTypeRouter:
|
|||||||
:param mime_type: The MIME type to check.
|
:param mime_type: The MIME type to check.
|
||||||
:return: True if the provided MIME type is a valid MIME type format, False otherwise.
|
:return: True if the provided MIME type is a valid MIME type format, False otherwise.
|
||||||
"""
|
"""
|
||||||
return mime_type in mimetypes.types_map.values()
|
return mime_type in mimetypes.types_map.values() or mime_type in self.get_custom_mime_mappings().values()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_custom_mime_mappings() -> Dict[str, str]:
|
||||||
|
"""
|
||||||
|
Returns a dictionary of custom file extension to MIME type mappings.
|
||||||
|
"""
|
||||||
|
# we add markdown because it is not added by the mimetypes module
|
||||||
|
# see https://github.com/python/cpython/pull/17995
|
||||||
|
return {".md": "text/markdown", ".markdown": "text/markdown"}
|
||||||
|
|||||||
@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
enhancements:
|
||||||
|
- |
|
||||||
|
Adds markdown mimetype support to the file type router i.e. `FileTypeRouter` class.
|
||||||
@ -69,8 +69,9 @@ class TestFileTypeRouter:
|
|||||||
test_files_path / "audio" / "the context for this answer is here.wav",
|
test_files_path / "audio" / "the context for this answer is here.wav",
|
||||||
test_files_path / "txt" / "doc_2.txt",
|
test_files_path / "txt" / "doc_2.txt",
|
||||||
test_files_path / "images" / "apple.jpg",
|
test_files_path / "images" / "apple.jpg",
|
||||||
|
test_files_path / "markdown" / "sample.md",
|
||||||
]
|
]
|
||||||
mime_types = ["text/plain", "audio/x-wav", "text/plain", "image/jpeg"]
|
mime_types = ["text/plain", "audio/x-wav", "text/plain", "image/jpeg", "text/markdown"]
|
||||||
byte_stream_sources = []
|
byte_stream_sources = []
|
||||||
for path, mime_type in zip(file_paths, mime_types):
|
for path, mime_type in zip(file_paths, mime_types):
|
||||||
stream = ByteStream(path.read_bytes())
|
stream = ByteStream(path.read_bytes())
|
||||||
@ -79,11 +80,12 @@ class TestFileTypeRouter:
|
|||||||
|
|
||||||
mixed_sources = file_paths[:2] + byte_stream_sources[2:]
|
mixed_sources = file_paths[:2] + byte_stream_sources[2:]
|
||||||
|
|
||||||
router = FileTypeRouter(mime_types=["text/plain", "audio/x-wav", "image/jpeg"])
|
router = FileTypeRouter(mime_types=["text/plain", "audio/x-wav", "image/jpeg", "text/markdown"])
|
||||||
output = router.run(sources=mixed_sources)
|
output = router.run(sources=mixed_sources)
|
||||||
assert len(output["text/plain"]) == 2
|
assert len(output["text/plain"]) == 2
|
||||||
assert len(output["audio/x-wav"]) == 1
|
assert len(output["audio/x-wav"]) == 1
|
||||||
assert len(output["image/jpeg"]) == 1
|
assert len(output["image/jpeg"]) == 1
|
||||||
|
assert len(output["text/markdown"]) == 1
|
||||||
|
|
||||||
def test_no_files(self):
|
def test_no_files(self):
|
||||||
"""
|
"""
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user