fix: adds to list of extensions to check if a file has a plain text MIME type (#916)

* added .txt, .text, and .tab to text file list

* changelog and version
This commit is contained in:
Matt Robinson 2023-07-12 16:07:43 -04:00 committed by GitHub
parent f7b3c0f741
commit 9b830693bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 29 additions and 2 deletions

View File

@ -1,3 +1,14 @@
## 0.8.2-dev0
### Enhancements
### Features
### Fixes
* Adds `.txt`, `.text`, and `.tab` to list of extensions to check if file
has a `text/plain` MIME type.
## 0.8.1
### Enhancements

View File

@ -1 +1 @@
__version__ = "0.8.1" # pragma: no cover
__version__ = "0.8.2-dev0" # pragma: no cover

View File

@ -179,6 +179,7 @@ EXT_TO_FILETYPE = {
".odt": FileType.ODT,
".csv": FileType.CSV,
".tsv": FileType.TSV,
".tab": FileType.TSV,
# NOTE(robinson) - for now we are treating code files as plain text
".js": FileType.TXT,
".py": FileType.TXT,
@ -196,6 +197,21 @@ EXT_TO_FILETYPE = {
None: FileType.UNK,
}
PLAIN_TEXT_EXTENSIONS = [
".txt",
".text",
".eml",
".md",
".rtf",
".html",
".rst",
".org",
".csv",
".tsv",
".tab",
".json",
]
def _resolve_symlink(file_path):
# Resolve the symlink to get the actual file path
@ -284,7 +300,7 @@ def detect_filetype(
encoding = "utf-8"
formatted_encoding = format_encoding_str(encoding)
if extension in [".eml", ".md", ".rtf", ".html", ".rst", ".org", ".csv", ".tsv", ".json"]:
if extension in PLAIN_TEXT_EXTENSIONS:
return EXT_TO_FILETYPE.get(extension)
# NOTE(crag): for older versions of the OS libmagic package, such as is currently