feat: Add FileTypeRouter markdown support (#6551)

* Add FileTypeRouter markdown support

* Add releae note
This commit is contained in:
Vladimir Blagojevic 2023-12-14 16:30:57 +01:00 committed by GitHub
parent 3b172b0476
commit c642695ec0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 22 additions and 4 deletions

View File

@ -76,7 +76,10 @@ class FileTypeRouter:
:param path: The file path to get the MIME type for.
:return: The MIME type of the provided file path, or None if the MIME type cannot be determined.
"""
return mimetypes.guess_type(path.as_posix())[0]
extension = path.suffix.lower()
mime_type = mimetypes.guess_type(path.as_posix())[0]
# lookup custom mappings if the mime type is not found
return self.get_custom_mime_mappings().get(extension, mime_type)
def is_valid_mime_type_format(self, mime_type: str) -> bool:
"""
@ -84,4 +87,13 @@ class FileTypeRouter:
:param mime_type: The MIME type to check.
:return: True if the provided MIME type is a valid MIME type format, False otherwise.
"""
return mime_type in mimetypes.types_map.values()
return mime_type in mimetypes.types_map.values() or mime_type in self.get_custom_mime_mappings().values()
@staticmethod
def get_custom_mime_mappings() -> Dict[str, str]:
"""
Returns a dictionary of custom file extension to MIME type mappings.
"""
# we add markdown because it is not added by the mimetypes module
# see https://github.com/python/cpython/pull/17995
return {".md": "text/markdown", ".markdown": "text/markdown"}

View File

@ -0,0 +1,4 @@
---
enhancements:
- |
Adds markdown mimetype support to the file type router i.e. `FileTypeRouter` class.

View File

@ -69,8 +69,9 @@ class TestFileTypeRouter:
test_files_path / "audio" / "the context for this answer is here.wav",
test_files_path / "txt" / "doc_2.txt",
test_files_path / "images" / "apple.jpg",
test_files_path / "markdown" / "sample.md",
]
mime_types = ["text/plain", "audio/x-wav", "text/plain", "image/jpeg"]
mime_types = ["text/plain", "audio/x-wav", "text/plain", "image/jpeg", "text/markdown"]
byte_stream_sources = []
for path, mime_type in zip(file_paths, mime_types):
stream = ByteStream(path.read_bytes())
@ -79,11 +80,12 @@ class TestFileTypeRouter:
mixed_sources = file_paths[:2] + byte_stream_sources[2:]
router = FileTypeRouter(mime_types=["text/plain", "audio/x-wav", "image/jpeg"])
router = FileTypeRouter(mime_types=["text/plain", "audio/x-wav", "image/jpeg", "text/markdown"])
output = router.run(sources=mixed_sources)
assert len(output["text/plain"]) == 2
assert len(output["audio/x-wav"]) == 1
assert len(output["image/jpeg"]) == 1
assert len(output["text/markdown"]) == 1
def test_no_files(self):
"""