mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-06-26 22:00:13 +00:00
feat: Add FileTypeRouter markdown support (#6551)
* Add FileTypeRouter markdown support * Add releae note
This commit is contained in:
parent
3b172b0476
commit
c642695ec0
@ -76,7 +76,10 @@ class FileTypeRouter:
|
||||
:param path: The file path to get the MIME type for.
|
||||
:return: The MIME type of the provided file path, or None if the MIME type cannot be determined.
|
||||
"""
|
||||
return mimetypes.guess_type(path.as_posix())[0]
|
||||
extension = path.suffix.lower()
|
||||
mime_type = mimetypes.guess_type(path.as_posix())[0]
|
||||
# lookup custom mappings if the mime type is not found
|
||||
return self.get_custom_mime_mappings().get(extension, mime_type)
|
||||
|
||||
def is_valid_mime_type_format(self, mime_type: str) -> bool:
|
||||
"""
|
||||
@ -84,4 +87,13 @@ class FileTypeRouter:
|
||||
:param mime_type: The MIME type to check.
|
||||
:return: True if the provided MIME type is a valid MIME type format, False otherwise.
|
||||
"""
|
||||
return mime_type in mimetypes.types_map.values()
|
||||
return mime_type in mimetypes.types_map.values() or mime_type in self.get_custom_mime_mappings().values()
|
||||
|
||||
@staticmethod
|
||||
def get_custom_mime_mappings() -> Dict[str, str]:
|
||||
"""
|
||||
Returns a dictionary of custom file extension to MIME type mappings.
|
||||
"""
|
||||
# we add markdown because it is not added by the mimetypes module
|
||||
# see https://github.com/python/cpython/pull/17995
|
||||
return {".md": "text/markdown", ".markdown": "text/markdown"}
|
||||
|
@ -0,0 +1,4 @@
|
||||
---
|
||||
enhancements:
|
||||
- |
|
||||
Adds markdown mimetype support to the file type router i.e. `FileTypeRouter` class.
|
@ -69,8 +69,9 @@ class TestFileTypeRouter:
|
||||
test_files_path / "audio" / "the context for this answer is here.wav",
|
||||
test_files_path / "txt" / "doc_2.txt",
|
||||
test_files_path / "images" / "apple.jpg",
|
||||
test_files_path / "markdown" / "sample.md",
|
||||
]
|
||||
mime_types = ["text/plain", "audio/x-wav", "text/plain", "image/jpeg"]
|
||||
mime_types = ["text/plain", "audio/x-wav", "text/plain", "image/jpeg", "text/markdown"]
|
||||
byte_stream_sources = []
|
||||
for path, mime_type in zip(file_paths, mime_types):
|
||||
stream = ByteStream(path.read_bytes())
|
||||
@ -79,11 +80,12 @@ class TestFileTypeRouter:
|
||||
|
||||
mixed_sources = file_paths[:2] + byte_stream_sources[2:]
|
||||
|
||||
router = FileTypeRouter(mime_types=["text/plain", "audio/x-wav", "image/jpeg"])
|
||||
router = FileTypeRouter(mime_types=["text/plain", "audio/x-wav", "image/jpeg", "text/markdown"])
|
||||
output = router.run(sources=mixed_sources)
|
||||
assert len(output["text/plain"]) == 2
|
||||
assert len(output["audio/x-wav"]) == 1
|
||||
assert len(output["image/jpeg"]) == 1
|
||||
assert len(output["text/markdown"]) == 1
|
||||
|
||||
def test_no_files(self):
|
||||
"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user