diff --git a/CHANGELOG.md b/CHANGELOG.md
index dbe51c5ba..4999a4975 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-## 0.15.1-dev1
+## 0.15.1-dev2
 
 ### Enhancements
 
@@ -7,6 +7,10 @@
 ### Fixes
 
 * **Update import of Pinecone exception** Adds compatibility for pinecone-client>=5.0.0
+* **File-type detection catches non-existent file-path.** `detect_filetype()` no longer silently falls back to detecting a file-type based on the extension when no file exists at the path provided. Instead `FileNotFoundError` is raised. This provides consistent user notification of a mis-typed path rather than an unpredictable exception from a file-type specific partitioner when the file cannot be opened.
+* **EML files specified as a file-path are detected correctly.** Resolved a bug where an EML file submitted to `partition()` as a file-path was identified as TXT and partitioned using `partition_text()`. EML files specified by path are now identified and processed correctly, including processing any attachments.
+* **A DOCX, PPTX, or XLSX file specified by path and ambiguously identified as MIME-type "application/octet-stream" is identified correctly.** Resolves a shortcoming where a file specified by path immediately fell back to filename-extension based identification when misidentified as "application/octet-stream", either by asserted content type or a mis-guess by libmagic. An MS Office file misidentified in this way is now correctly identified regardless of its filename and whether it is specified by path or file-like object.
+* **Textual content retrieved from a URL with gzip transport compression now partitions correctly.** Resolves a bug where a textual file-type (such as Markdown) retrieved by passing a URL to `partition()` would raise when `gzip` compression was used for transport by the server.
 
 ## 0.15.0
 
diff --git a/example-docs/simple.pptx b/example-docs/simple.pptx
new file mode 100644
index 000000000..ab165bb5b
Binary files /dev/null and b/example-docs/simple.pptx differ
diff --git a/test_unstructured/file_utils/test_filetype.py b/test_unstructured/file_utils/test_filetype.py
index 545f441a0..fac88ab2d 100644
--- a/test_unstructured/file_utils/test_filetype.py
+++ b/test_unstructured/file_utils/test_filetype.py
@@ -6,550 +6,971 @@ from __future__ import annotations
 
 import io
 import os
-import pathlib
 
-import magic
 import pytest
 
 from test_unstructured.unit_utils import (
     FixtureRequest,
     LogCaptureFixture,
     Mock,
-    MonkeyPatch,
-    call,
     example_doc_path,
-    method_mock,
+    patch,
+    property_mock,
 )
-from unstructured.file_utils import filetype
 from unstructured.file_utils.filetype import (
-    _detect_filetype_from_octet_stream,
-    _is_code_mime_type,
-    _is_text_file_a_csv,
-    _is_text_file_a_json,
+    _FileTypeDetectionContext,
+    _TextFileDifferentiator,
+    _ZipFileDifferentiator,
     detect_filetype,
 )
 from unstructured.file_utils.model import FileType
 
 is_in_docker = os.path.exists("/.dockerenv")
 
-
-@pytest.mark.parametrize(
-    ("file_name", "expected_value"),
-    [
-        ("layout-parser-paper-fast.pdf", FileType.PDF),
-        ("fake.docx", FileType.DOCX),
-        ("example.jpg", FileType.JPG),
-        ("fake-text.txt", FileType.TXT),
-        ("eml/fake-email.eml", FileType.EML),
-        ("factbook.xml", FileType.XML),
-        ("example-10k.html", FileType.HTML),
-        ("fake-html.html", FileType.HTML),
-        ("stanley-cups.xlsx", FileType.XLSX),
-        ("stanley-cups.csv", FileType.CSV),
-        ("stanley-cups.tsv", FileType.TSV),
-        ("fake-power-point.pptx", FileType.PPTX),
-        ("winter-sports.epub", FileType.EPUB),
-        ("spring-weather.html.json", FileType.JSON),
-        ("README.org", FileType.ORG),
-        ("README.rst", FileType.RST),
-        ("README.md", FileType.MD),
-        ("fake.odt", FileType.ODT),
-        ("fake-incomplete-json.txt", FileType.TXT),
-    ],
-)
-def test_detect_filetype_from_filename(file_name: str, expected_value: FileType):
-    assert detect_filetype(example_doc_path(file_name)) == expected_value
+# ================================================================================================
+# STRATEGY #1 - CONTENT-TYPE ASSERTED IN CALL
+# ================================================================================================
 
 
 @pytest.mark.parametrize(
-    ("file_name", "expected_value"),
+    ("expected_value", "file_name", "content_type"),
     [
-        ("layout-parser-paper-fast.pdf", FileType.PDF),
-        ("fake.docx", FileType.DOCX),
-        ("example.jpg", FileType.JPG),
-        ("fake-text.txt", FileType.TXT),
-        ("eml/fake-email.eml", FileType.EML),
-        ("factbook.xml", FileType.XML),
-        ("example-10k.html", FileType.HTML),
-        ("fake-html.html", FileType.HTML),
-        ("stanley-cups.xlsx", FileType.XLSX),
-        ("stanley-cups.csv", FileType.CSV),
-        ("stanley-cups.tsv", FileType.TSV),
-        ("fake-power-point.pptx", FileType.PPTX),
-        ("winter-sports.epub", FileType.EPUB),
-        ("fake-doc.rtf", FileType.RTF),
-        ("spring-weather.html.json", FileType.JSON),
-        ("fake.odt", FileType.ODT),
-        ("fake-incomplete-json.txt", FileType.TXT),
+        (FileType.BMP, "img/bmp_24.bmp", "image/bmp"),
+        (FileType.CSV, "stanley-cups.csv", "text/csv"),
+        (FileType.DOC, "simple.doc", "application/msword"),
+        (
+            FileType.DOCX,
+            "simple.docx",
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+        ),
+        (FileType.EML, "eml/fake-email.eml", "message/rfc822"),
+        (FileType.EPUB, "winter-sports.epub", "application/epub+zip"),
+        (FileType.HEIC, "img/DA-1p.heic", "image/heic"),
+        (FileType.HTML, "example-10k-1p.html", "text/html"),
+        (FileType.JPG, "img/example.jpg", "image/jpeg"),
+        (FileType.JSON, "spring-weather.html.json", "application/json"),
+        (FileType.MD, "README.md", "text/markdown"),
+        (FileType.MSG, "fake-email.msg", "application/vnd.ms-outlook"),
+        (FileType.ODT, "simple.odt", "application/vnd.oasis.opendocument.text"),
+        (FileType.ORG, "README.org", "text/org"),
+        (FileType.PDF, "pdf/layout-parser-paper-fast.pdf", "application/pdf"),
+        (FileType.PNG, "img/DA-1p.png", "image/png"),
+        (FileType.PPT, "fake-power-point.ppt", "application/vnd.ms-powerpoint"),
+        (
+            FileType.PPTX,
+            "fake-power-point.pptx",
+            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+        ),
+        (FileType.RST, "README.rst", "text/x-rst"),
+        (FileType.RTF, "fake-doc.rtf", "text/rtf"),
+        (FileType.TIFF, "img/layout-parser-paper-fast.tiff", "image/tiff"),
+        (FileType.TSV, "stanley-cups.tsv", "text/tsv"),
+        (FileType.TXT, "norwich-city.txt", "text/plain"),
+        (FileType.WAV, "CantinaBand3.wav", "audio/wav"),
+        (FileType.XLS, "tests-example.xls", "application/vnd.ms-excel"),
+        (
+            FileType.XLSX,
+            "stanley-cups.xlsx",
+            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+        ),
+        (FileType.XML, "factbook.xml", "application/xml"),
+        (FileType.ZIP, "simple.zip", "application/zip"),
     ],
 )
-def test_detect_filetype_from_filename_with_extension(
-    file_name: str, expected_value: FileType, monkeypatch: MonkeyPatch
+def test_it_detects_correct_file_type_from_file_path_with_correct_asserted_content_type(
+    file_name: str, content_type: str, expected_value: FileType, ctx_mime_type_: Mock
 ):
-    """File-type is detected from extension when libmagic not available or file does not exist."""
-    # -- when libmagic is not available --
-    monkeypatch.setattr(filetype, "LIBMAGIC_AVAILABLE", False)
-    assert detect_filetype(example_doc_path(file_name)) == expected_value
-    # -- when file does not exist --
-    monkeypatch.setattr(filetype, "LIBMAGIC_AVAILABLE", True)
-    extension = pathlib.Path(file_name).suffix
-    assert detect_filetype(example_doc_path("not-on-disk" + extension)) == expected_value
+    # -- disable strategy #2, leaving only asserted content-type and extension --
+    ctx_mime_type_.return_value = None
+
+    file_type = detect_filetype(example_doc_path(file_name), content_type=content_type)
+
+    # -- Strategy 1 should not need to refer to guessed MIME-type and detection should not
+    # -- fall back to strategy 2 for any of these test cases.
+    ctx_mime_type_.assert_not_called()
+    assert file_type == expected_value
 
 
 @pytest.mark.parametrize(
-    ("file_name", "expected_value"),
+    ("expected_value", "file_name", "content_type"),
     [
-        ("pdf/layout-parser-paper-fast.pdf", [FileType.PDF]),
-        ("fake.docx", [FileType.DOCX]),
-        ("img/example.jpg", [FileType.JPG]),
-        ("fake-text.txt", [FileType.TXT]),
-        ("eml/fake-email.eml", [FileType.EML]),
-        ("factbook.xml", [FileType.XML]),
-        # NOTE(robinson]) - For the document, some operating systems return
-        # */xml and some return */html. Either could be acceptable depending on the OS
-        ("example-10k.html", [FileType.HTML, FileType.XML]),
-        ("fake-html.html", [FileType.HTML]),
-        ("stanley-cups.xlsx", [FileType.XLSX]),
-        ("stanley-cups.csv", [FileType.CSV]),
-        ("stanley-cups.tsv", [FileType.TSV]),
-        ("fake-power-point.pptx", [FileType.PPTX]),
-        ("winter-sports.epub", [FileType.EPUB]),
-        ("fake-incomplete-json.txt", [FileType.TXT]),
+        (FileType.BMP, "img/bmp_24.bmp", "image/bmp"),
+        (FileType.CSV, "stanley-cups.csv", "text/csv"),
+        (FileType.DOC, "simple.doc", "application/msword"),
+        (
+            FileType.DOCX,
+            "simple.docx",
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+        ),
+        (FileType.EML, "eml/fake-email.eml", "message/rfc822"),
+        (FileType.EPUB, "winter-sports.epub", "application/epub+zip"),
+        (FileType.HEIC, "img/DA-1p.heic", "image/heic"),
+        (FileType.HTML, "example-10k-1p.html", "text/html"),
+        (FileType.JPG, "img/example.jpg", "image/jpeg"),
+        (FileType.JSON, "spring-weather.html.json", "application/json"),
+        (FileType.MD, "README.md", "text/markdown"),
+        (FileType.MSG, "fake-email.msg", "application/vnd.ms-outlook"),
+        (FileType.ODT, "simple.odt", "application/vnd.oasis.opendocument.text"),
+        (FileType.ORG, "README.org", "text/org"),
+        (FileType.PDF, "pdf/layout-parser-paper-fast.pdf", "application/pdf"),
+        (FileType.PNG, "img/DA-1p.png", "image/png"),
+        (FileType.PPT, "fake-power-point.ppt", "application/vnd.ms-powerpoint"),
+        (
+            FileType.PPTX,
+            "fake-power-point.pptx",
+            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+        ),
+        (FileType.RST, "README.rst", "text/x-rst"),
+        (FileType.RTF, "fake-doc.rtf", "text/rtf"),
+        (FileType.TIFF, "img/layout-parser-paper-fast.tiff", "image/tiff"),
+        (FileType.TSV, "stanley-cups.tsv", "text/tsv"),
+        (FileType.TXT, "norwich-city.txt", "text/plain"),
+        (FileType.WAV, "CantinaBand3.wav", "audio/wav"),
+        (FileType.XLS, "tests-example.xls", "application/vnd.ms-excel"),
+        (
+            FileType.XLSX,
+            "stanley-cups.xlsx",
+            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+        ),
+        (FileType.XML, "factbook.xml", "application/xml"),
+        (FileType.ZIP, "simple.zip", "application/zip"),
     ],
 )
-def test_detect_filetype_from_file(file_name: str, expected_value: list[FileType]):
+def test_it_detects_correct_file_type_from_file_no_name_with_correct_asserted_content_type(
+    file_name: str, content_type: str, expected_value: FileType, ctx_mime_type_: Mock
+):
+    # -- disable strategy #2 (guessed mime-type) --
+    ctx_mime_type_.return_value = None
+    # -- disable strategy #3 (filename extension) by supplying no source of file name --
     with open(example_doc_path(file_name), "rb") as f:
-        assert detect_filetype(file=f) in expected_value
+        file = io.BytesIO(f.read())
+
+    file_type = detect_filetype(file=file, content_type=content_type)
+
+    # -- Strategy 1 should not need to refer to guessed MIME-type and detection should not
+    # -- fall-back to strategy 2 for any of these test cases.
+    ctx_mime_type_.assert_not_called()
+    assert file_type is expected_value
+
+
+# ================================================================================================
+# STRATEGY #2 - GUESS MIME-TYPE WITH LIBMAGIC
+# ================================================================================================
+
+
+@pytest.mark.parametrize(
+    ("expected_value", "file_name", "mime_type"),
+    [
+        (FileType.BMP, "img/bmp_24.bmp", "image/bmp"),
+        (FileType.CSV, "stanley-cups.csv", "text/csv"),
+        (FileType.CSV, "stanley-cups.csv", "application/csv"),
+        (FileType.CSV, "stanley-cups.csv", "application/x-csv"),
+        (FileType.DOC, "simple.doc", "application/msword"),
+        (
+            FileType.DOCX,
+            "simple.docx",
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+        ),
+        (FileType.EML, "eml/fake-email.eml", "message/rfc822"),
+        (FileType.EPUB, "winter-sports.epub", "application/epub"),
+        (FileType.EPUB, "winter-sports.epub", "application/epub+zip"),
+        (FileType.HEIC, "img/DA-1p.heic", "image/heic"),
+        (FileType.HTML, "example-10k-1p.html", "text/html"),
+        (FileType.JPG, "img/example.jpg", "image/jpeg"),
+        (FileType.JSON, "spring-weather.html.json", "application/json"),
+        (FileType.MD, "README.md", "text/markdown"),
+        (FileType.MD, "README.md", "text/x-markdown"),
+        (FileType.MSG, "fake-email.msg", "application/vnd.ms-outlook"),
+        (FileType.ODT, "simple.odt", "application/vnd.oasis.opendocument.text"),
+        (FileType.ORG, "README.org", "text/org"),
+        (FileType.PDF, "pdf/layout-parser-paper-fast.pdf", "application/pdf"),
+        (FileType.PNG, "img/DA-1p.png", "image/png"),
+        (FileType.PPT, "fake-power-point.ppt", "application/vnd.ms-powerpoint"),
+        (
+            FileType.PPTX,
+            "fake-power-point.pptx",
+            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+        ),
+        (FileType.RST, "README.rst", "text/x-rst"),
+        (FileType.RTF, "fake-doc.rtf", "text/rtf"),
+        (FileType.RTF, "fake-doc.rtf", "application/rtf"),
+        (FileType.TIFF, "img/layout-parser-paper-fast.tiff", "image/tiff"),
+        (FileType.TSV, "stanley-cups.tsv", "text/tsv"),
+        (FileType.TXT, "norwich-city.txt", "text/plain"),
+        (FileType.TXT, "simple.yaml", "text/yaml"),
+        (FileType.WAV, "CantinaBand3.wav", "audio/wav"),
+        (FileType.XLS, "tests-example.xls", "application/vnd.ms-excel"),
+        (
+            FileType.XLSX,
+            "stanley-cups.xlsx",
+            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+        ),
+        (FileType.XML, "factbook.xml", "application/xml"),
+        (FileType.XML, "factbook.xml", "text/xml"),
+        (FileType.ZIP, "simple.zip", "application/zip"),
+    ],
+)
+def test_it_detects_correct_file_type_using_strategy_2_when_libmagic_guesses_recognized_mime_type(
+    file_name: str, mime_type: str, expected_value: FileType, ctx_mime_type_: Mock
+):
+    # -- libmagic guesses a MIME-type mapped to a `FileType` --
+    ctx_mime_type_.return_value = mime_type
+    # -- disable strategy #3 (filename extension) by not providing filename --
+    with open(example_doc_path(file_name), "rb") as f:
+        file = io.BytesIO(f.read())
+
+    # -- disable strategy #1 by not asserting a content_type in the call --
+    file_type = detect_filetype(file=file)
+
+    # -- ctx.mime_type may be referenced multiple times, but at least once --
+    ctx_mime_type_.assert_called_with()
+    assert file_type is expected_value
+
+
+@pytest.mark.parametrize(
+    ("expected_value", "file_name"),
+    [
+        (FileType.BMP, "img/bmp_24.bmp"),
+        (FileType.CSV, "stanley-cups.csv"),
+        (FileType.DOCX, "simple.docx"),
+        (FileType.EML, "eml/fake-email.eml"),
+        (FileType.EPUB, "winter-sports.epub"),
+        (FileType.HEIC, "img/DA-1p.heic"),
+        (FileType.HTML, "ideas-page.html"),
+        (FileType.JPG, "img/example.jpg"),
+        (FileType.JSON, "spring-weather.html.json"),
+        (FileType.ODT, "simple.odt"),
+        (FileType.PDF, "pdf/layout-parser-paper-fast.pdf"),
+        (FileType.PNG, "img/DA-1p.png"),
+        (FileType.PPTX, "fake-power-point.pptx"),
+        (FileType.RTF, "fake-doc.rtf"),
+        (FileType.TIFF, "img/layout-parser-paper-fast.tiff"),
+        (FileType.TXT, "norwich-city.txt"),
+        (FileType.WAV, "CantinaBand3.wav"),
+        (FileType.XLSX, "stanley-cups.xlsx"),
+        (FileType.XML, "factbook.xml"),
+        (FileType.ZIP, "simple.zip"),
+    ],
+)
+def test_it_detects_most_file_types_using_strategy_2_when_libmagic_guesses_mime_type_for_itself(
+    file_name: str, expected_value: FileType
+):
+    """Does not work for all types, in particular:
+
+    TODOs:
+    - DOC is misidentified as MSG, TODO on that below.
+    - MSG is misidentified as UNK, but only on CI.
+    - PPT is misidentified as MSG, same fix as DOC.
+    - TSV is identified as TXT, maybe need an `.is_tsv` predicate in `_TextFileDifferentiator`
+    - XLS is misidentified as MSG, same fix as DOC.
+
+    NOCANDOs: w/o an extension I think these are the best we can do.
+    - MD is identified as TXT
+    - ORG is identified as TXT
+    - RST is identified as TXT
+    """
+    # -- disable strategy #1 by not asserting a content_type in the call --
+    # -- disable strategy #3 (extension) by passing file-like object with no `.name` attribute --
+    with open(example_doc_path(file_name), "rb") as f:
+        file = io.BytesIO(f.read())
+
+    assert detect_filetype(file=file) is expected_value
+
+
+# NOTE(scanny): magic gets this wrong ("application/x-ole-storage") but filetype lib gets it right
+# ("application/msword"). Need a differentiator for "application/x-ole-storage".
+@pytest.mark.xfail(reason="TODO: FIX", raises=AssertionError, strict=True)
+@pytest.mark.parametrize(
+    ("expected_value", "file_name"),
+    [
+        (FileType.DOC, "simple.doc"),
+        (FileType.PPT, "fake-power-point.ppt"),
+        (FileType.XLS, "tests-example.xls"),
+        # -- only fails on CI, maybe different libmagic version or "magic-files" --
+        # (FileType.MSG, "fake-email.msg"),
+    ],
+)
+def test_it_detects_MS_Office_file_types_using_strategy_2_when_libmagic_guesses_mime_type(
+    file_name: str, expected_value: FileType
+):
+    with open(example_doc_path(file_name), "rb") as f:
+        file = io.BytesIO(f.read())
+    assert detect_filetype(file=file) is expected_value
+
+
+@pytest.mark.parametrize(
+    ("expected_value", "file_name"),
+    [
+        # -- `filetype` lib recognizes all these binary file-types --
+        (FileType.BMP, "img/bmp_24.bmp"),
+        (FileType.DOC, "simple.doc"),
+        (FileType.DOCX, "simple.docx"),
+        (FileType.EPUB, "winter-sports.epub"),
+        (FileType.HEIC, "img/DA-1p.heic"),
+        (FileType.JPG, "img/example.jpg"),
+        (FileType.ODT, "simple.odt"),
+        (FileType.PDF, "pdf/layout-parser-paper-fast.pdf"),
+        (FileType.PNG, "img/DA-1p.png"),
+        (FileType.PPT, "fake-power-point.ppt"),
+        (FileType.PPTX, "fake-power-point.pptx"),
+        (FileType.RTF, "fake-doc.rtf"),
+        (FileType.TIFF, "img/layout-parser-paper-fast.tiff"),
+        (FileType.WAV, "CantinaBand3.wav"),
+        (FileType.XLS, "tests-example.xls"),
+        (FileType.XLSX, "stanley-cups.xlsx"),
+        (FileType.ZIP, "simple.zip"),
+        # -- but it doesn't recognize textual file-types at all --
+        (FileType.UNK, "stanley-cups.csv"),
+        (FileType.UNK, "eml/fake-email.eml"),
+        (FileType.UNK, "example-10k-1p.html"),
+        (FileType.UNK, "spring-weather.html.json"),
+        (FileType.UNK, "README.md"),
+        (FileType.UNK, "README.org"),
+        (FileType.UNK, "README.rst"),
+        (FileType.UNK, "stanley-cups.tsv"),
+        (FileType.UNK, "norwich-city.txt"),
+        (FileType.UNK, "factbook.xml"),
+        # -- and it doesn't recognize MSG files --
+        (FileType.UNK, "fake-email.msg"),
+    ],
+)
+def test_strategy_2_can_detect_only_binary_file_types_when_libmagic_is_unavailable(
+    file_name: str, expected_value: FileType, LIBMAGIC_AVAILABLE_False: bool
+):
+    """File-type is detected using `filetype` library when libmagic is not available.
+
+    `filetype.guess_mime()` does a good job on binary file types (PDF, images, legacy MS-Office),
+    but doesn't even try to guess textual file-types.
+    """
+    # -- disable strategy #3 (extension) by passing file-like object with no `.name` attribute --
+    with open(example_doc_path(file_name), "rb") as f:
+        file = io.BytesIO(f.read())
+    # -- simulate libmagic is not available --
+    assert LIBMAGIC_AVAILABLE_False is False
+
+    # -- disable strategy #1 by not asserting a content_type in the call --
+    file_type = detect_filetype(file=file)
+
+    assert file_type is expected_value
 
 
 def test_detect_filetype_from_file_warns_when_libmagic_is_not_installed(
-    monkeypatch: MonkeyPatch, caplog: LogCaptureFixture
+    caplog: LogCaptureFixture, LIBMAGIC_AVAILABLE_False: bool
 ):
-    monkeypatch.setattr(filetype, "LIBMAGIC_AVAILABLE", False)
     with open(example_doc_path("fake-text.txt"), "rb") as f:
         detect_filetype(file=f)
 
     assert "WARNING" in caplog.text
+    assert "libmagic is unavailable but assists in filetype detection. Please cons" in caplog.text
 
 
-def test_detect_XML_from_application_xml_file_path(magic_from_file_: Mock):
-    magic_from_file_.return_value = "application/xml"
-    file_path = example_doc_path("factbook.xml")
-
-    filetype = detect_filetype(file_path)
-
-    magic_from_file_.assert_called_once_with(file_path, mime=True)
-    assert filetype == FileType.XML
+# ================================================================================================
+# STRATEGY #3 - MAP FILENAME EXTENSION TO FILETYPE
+# ================================================================================================
 
 
-def test_detect_CSV_from_text_csv_file_path(magic_from_file_: Mock):
-    magic_from_file_.return_value = "text/csv"
-    file_path = example_doc_path("stanley-cups.csv")
+@pytest.mark.parametrize(
+    ("expected_value", "file_name"),
+    [
+        (FileType.BMP, "img/bmp_24.bmp"),
+        (FileType.CSV, "stanley-cups.csv"),
+        (FileType.DOC, "simple.doc"),
+        (FileType.DOCX, "simple.docx"),
+        (FileType.EML, "eml/fake-email.eml"),
+        (FileType.EPUB, "winter-sports.epub"),
+        (FileType.HEIC, "img/DA-1p.heic"),
+        (FileType.HTML, "example-10k-1p.html"),
+        (FileType.JPG, "img/example.jpg"),
+        (FileType.JSON, "spring-weather.html.json"),
+        (FileType.MD, "README.md"),
+        (FileType.MSG, "fake-email.msg"),
+        (FileType.ODT, "simple.odt"),
+        (FileType.ORG, "README.org"),
+        (FileType.PDF, "pdf/layout-parser-paper-fast.pdf"),
+        (FileType.PNG, "img/DA-1p.png"),
+        (FileType.PPT, "fake-power-point.ppt"),
+        (FileType.PPTX, "fake-power-point.pptx"),
+        (FileType.RST, "README.rst"),
+        (FileType.RTF, "fake-doc.rtf"),
+        (FileType.TIFF, "img/layout-parser-paper-fast.tiff"),
+        (FileType.TSV, "stanley-cups.tsv"),
+        (FileType.TXT, "norwich-city.txt"),
+        (FileType.WAV, "CantinaBand3.wav"),
+        (FileType.XLS, "tests-example.xls"),
+        (FileType.XLSX, "stanley-cups.xlsx"),
+        (FileType.XML, "factbook.xml"),
+        (FileType.ZIP, "simple.zip"),
+    ],
+)
+def test_it_detects_correct_file_type_from_strategy_3_when_extension_maps_to_file_type(
+    file_name: str, expected_value: FileType, ctx_mime_type_: Mock
+):
+    # -- disable strategy #2 by making libmagic always guess `None` --
+    ctx_mime_type_.return_value = None
 
-    filetype = detect_filetype(file_path)
+    # -- disable strategy #1 by not asserting a content_type in the call --
+    # -- enable strategy #3 by passing filename as source for extension --
+    file_type = detect_filetype(example_doc_path(file_name))
 
-    magic_from_file_.assert_called_once_with(file_path, mime=True)
-    assert filetype == FileType.CSV
+    # -- ctx.mime_type may be referenced multiple times, but at least once --
+    ctx_mime_type_.assert_called_with()
+    assert file_type is expected_value
 
 
-def test_detect_TXT_from_text_x_script_python_file_path(magic_from_file_: Mock):
-    magic_from_file_.return_value = "text/x-script.python"
-    file_path = example_doc_path("logger.py")
+@pytest.mark.parametrize(
+    ("expected_value", "file_name", "mime_type"),
+    [
+        (FileType.BMP, "img/bmp_24.bmp", "application/zip"),
+        (FileType.DOC, "simple.doc", None),
+        (FileType.MSG, "fake-email.msg", "application/octet-stream"),
+    ],
+)
+def test_it_falls_back_to_extension_strategy_when_prior_strategies_fail(
+    file_name: str, mime_type: str | None, expected_value: FileType, ctx_mime_type_: Mock
+):
+    ctx_mime_type_.return_value = mime_type
 
-    filetype = detect_filetype(file_path)
+    file_type = detect_filetype(example_doc_path(file_name))
 
-    magic_from_file_.assert_called_once_with(file_path, mime=True)
-    assert filetype == FileType.TXT
+    ctx_mime_type_.assert_called_with()
+    assert file_type is expected_value
 
 
-def test_detect_TXT_from_text_x_script_python_file(magic_from_buffer_: Mock):
-    magic_from_buffer_.return_value = "text/x-script.python"
-    file_path = example_doc_path("logger.py")
-
-    with open(file_path, "rb") as f:
-        head = f.read(4096)
-        f.seek(0)
-        filetype = detect_filetype(file=f)
-
-    magic_from_buffer_.assert_called_once_with(head, mime=True)
-    assert filetype == FileType.TXT
+# ================================================================================================
+# SPECIAL CASES
+# ================================================================================================
 
 
-def test_is_code_mime_type_for_Go():
-    assert _is_code_mime_type("text/x-go") is True
+@pytest.mark.parametrize("mime_type", ["application/xml", "text/xml"])
+@pytest.mark.parametrize("extension", [".html", ".htm"])
+def test_it_detects_HTML_from_guessed_mime_type_ending_with_xml_and_html_extension(
+    mime_type: str, extension: str, ctx_mime_type_: Mock
+):
+    ctx_mime_type_.return_value = mime_type
+    with open(example_doc_path("example-10k-1p.html"), "rb") as f:
+        file = io.BytesIO(f.read())
+        file.name = f"a/b/page{extension}"
+
+    file_type = detect_filetype(file=file)
+
+    ctx_mime_type_.assert_called_with()
+    assert file_type is FileType.HTML
 
 
-def test_detect_TXT_from_text_go_file(magic_from_buffer_: Mock):
-    magic_from_buffer_.return_value = "text/x-go"
-    file_path = example_doc_path("fake.go")
-
-    with open(file_path, "rb") as f:
-        head = f.read(4096)
-        f.seek(0)
-        filetype = detect_filetype(file=f)
-
-    magic_from_buffer_.assert_called_once_with(head, mime=True)
-    assert filetype == FileType.TXT
-
-
-def test_detect_RTF_from_application_rtf_file_path(magic_from_file_: Mock):
-    magic_from_file_.return_value = "application/rtf"
-    file_path = example_doc_path("fake-doc.rtf")
-
-    filetype = detect_filetype(file_path)
-
-    magic_from_file_.assert_called_once_with(file_path, mime=True)
-    assert filetype == FileType.RTF
-
-
-def test_detect_XML_from_text_xml_file_path(magic_from_file_: Mock):
-    magic_from_file_.return_value = "text/xml"
-    file_path = example_doc_path("factbook.xml")
-
-    filetype = detect_filetype(file_path)
-
-    magic_from_file_.assert_called_once_with(file_path, mime=True)
-    assert filetype == FileType.XML
-
-
-def test_detect_HTML_from_application_xml_file_path_with_html_extension(magic_from_file_: Mock):
-    magic_from_file_.return_value = "application/xml"
-    file_path = example_doc_path("fake-html.html")
-
-    filetype = detect_filetype(file_path)
-
-    magic_from_file_.assert_called_once_with(file_path, mime=True)
-    assert filetype == FileType.HTML
-
-
-def test_detect_HTML_from_text_xml_file_path_with_html_extension(magic_from_file_: Mock):
-    magic_from_file_.return_value = "text/xml"
-    file_path = example_doc_path("fake-html.html")
-
-    filetype = detect_filetype(file_path)
-
-    magic_from_file_.assert_called_once_with(file_path, mime=True)
-    assert filetype == FileType.HTML
-
-
-def test_detect_DOCX_from_application_octet_stream_file_no_extension(magic_from_buffer_: Mock):
-    magic_from_buffer_.return_value = "application/octet-stream"
-    with open(example_doc_path("simple.docx"), "rb") as f:
+@pytest.mark.parametrize(
+    "mime_type",
+    [
+        "application/octet-stream",
+        "application/zip",
+        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+        "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    ],
+)
+@pytest.mark.parametrize(
+    ("expected_value", "file_name"),
+    [
+        (FileType.DOCX, "simple.docx"),
+        (FileType.PPTX, "fake-power-point.pptx"),
+        (FileType.XLSX, "stanley-cups.xlsx"),
+        (FileType.ZIP, "simple.zip"),
+    ],
+)
+def test_it_differentiates_files_when_libmagic_guesses_octet_stream_zip_or_modern_ms_office(
+    mime_type: str, file_name: str, expected_value: FileType, ctx_mime_type_: Mock
+):
+    ctx_mime_type_.return_value = mime_type
+    # -- disable extension-based strategy #3 --
+    with open(example_doc_path(file_name), "rb") as f:
         file = io.BytesIO(f.read())
 
-    filetype = detect_filetype(file=file)
+    file_type = detect_filetype(file=file)
 
-    magic_from_buffer_.assert_called_once_with(file.getvalue()[:4096], mime=True)
-    assert filetype == FileType.DOCX
+    ctx_mime_type_.assert_called_with()
+    assert file_type is expected_value
 
 
-def test_detect_DOCX_from_application_octet_stream_file_path(magic_from_file_: Mock):
-    magic_from_file_.return_value = "application/octet-stream"
-    file_path = example_doc_path("simple.docx")
-
-    filetype = detect_filetype(file_path)
-
-    magic_from_file_.assert_called_once_with(file_path, mime=True)
-    assert filetype == FileType.DOCX
-
-
-def test_detect_DOCX_from_application_zip_file_path(magic_from_file_: Mock):
-    magic_from_file_.return_value = "application/zip"
-    file_path = example_doc_path("simple.docx")
-
-    filetype = detect_filetype(file_path)
-
-    magic_from_file_.assert_called_once_with(file_path, mime=True)
-    assert filetype == FileType.DOCX
-
-
-def test_detect_ZIP_from_application_zip_file_path(magic_from_file_: Mock):
-    magic_from_file_.return_value = "application/zip"
-    file_path = example_doc_path("simple.zip")
-
-    filetype = detect_filetype(file_path)
-
-    magic_from_file_.assert_called_once_with(file_path, mime=True)
-    assert filetype == FileType.ZIP
-
-
-def test_detect_DOC_from_application_msword_file_path(magic_from_file_: Mock):
-    magic_from_file_.return_value = "application/msword"
-    file_path = example_doc_path("fake.doc")
-
-    filetype = detect_filetype(file_path)
-
-    magic_from_file_.assert_called_once_with(file_path, mime=True)
-    assert filetype == FileType.DOC
-
-
-def test_detect_PPT_from_application_vnd_ms_powerpoint_file_path(magic_from_file_: Mock):
-    magic_from_file_.return_value = "application/vnd.ms-powerpoint"
-    file_path = example_doc_path("fake-power-point.ppt")
-
-    filetype = detect_filetype(file_path)
-
-    magic_from_file_.assert_called_once_with(file_path, mime=True)
-    assert filetype == FileType.PPT
-
-
-def test_detect_XLS_from_application_vnd_ms_excel_file_path(magic_from_file_: Mock):
-    magic_from_file_.return_value = "application/vnd.ms-excel"
-    file_path = example_doc_path("tests-example.xls")
-
-    filetype = detect_filetype(file_path)
-
-    magic_from_file_.assert_called_once_with(file_path, mime=True)
-    assert filetype == FileType.XLS
-
-
-def test_detect_XLSX_from_application_octet_stream_file_no_extension(magic_from_buffer_: Mock):
-    magic_from_buffer_.return_value = "application/octet-stream"
-    with open(example_doc_path("stanley-cups.xlsx"), "rb") as f:
+@pytest.mark.parametrize(
+    ("mime_type", "file_name"),
+    [
+        ("text/x-script.python", "logger.py"),
+        ("text/x-go", "fake.go"),
+        ("application/x-javascript", "fake-text.txt"),
+    ],
+)
+def test_it_detects_TXT_for_source_code_files(mime_type: str, file_name: str, ctx_mime_type_: Mock):
+    ctx_mime_type_.return_value = mime_type
+    # -- disable extension-based strategy #3 --
+    with open(example_doc_path(file_name), "rb") as f:
         file = io.BytesIO(f.read())
 
-    filetype = detect_filetype(file=file)
+    file_type = detect_filetype(file=file)
 
-    magic_from_buffer_.assert_called_once_with(file.getvalue()[:4096], mime=True)
-    assert filetype == FileType.XLSX
+    ctx_mime_type_.assert_called_with()
+    assert file_type is FileType.TXT
 
 
-def test_detect_XLSX_from_application_octet_stream_file_path(magic_from_file_: Mock):
-    magic_from_file_.return_value = "application/octet-stream"
-    file_path = example_doc_path("stanley-cups.xlsx")
-
-    filetype = detect_filetype(file_path)
-
-    magic_from_file_.assert_called_once_with(file_path, mime=True)
-    assert filetype == FileType.XLSX
-
-
-def test_detect_PPTX_from_application_octet_stream_file_no_extension(magic_from_buffer_: Mock):
-    magic_from_buffer_.return_value = "application/octet-stream"
-    with open(example_doc_path("fake-power-point.pptx"), "rb") as f:
-        file = io.BytesIO(f.read())
-
-    filetype = detect_filetype(file=file)
-
-    magic_from_buffer_.assert_called_once_with(file.getvalue()[:4096], mime=True)
-    assert filetype == FileType.PPTX
-
-
-def test_detect_PPTX_from_application_octet_stream_file_path(magic_from_file_: Mock):
-    magic_from_file_.return_value = "application/octet-stream"
-    file_path = example_doc_path("fake-power-point.pptx")
-
-    filetype = detect_filetype(file_path)
-
-    magic_from_file_.assert_called_once_with(file_path, mime=True)
-    assert filetype == FileType.PPTX
-
-
-def test_detect_UNK_from_application_octet_stream_text_file_no_extension(magic_from_buffer_: Mock):
-    magic_from_buffer_.return_value = "application/octet-stream"
+def test_detects_TXT_from_an_unknown_guessed_text_subtype(ctx_mime_type_: Mock):
+    ctx_mime_type_.return_value = "text/new-type"
     with open(example_doc_path("fake-text.txt"), "rb") as f:
         file = io.BytesIO(f.read())
 
     filetype = detect_filetype(file=file)
 
-    assert magic_from_buffer_.call_args_list == [
-        call(file.getvalue()[:4096], mime=True),
-        call(b"", mime=True),
-    ]
-    assert filetype == FileType.UNK
-
-
-def test_detect_ZIP_from_application_zip_not_a_zip_file(magic_from_buffer_: Mock):
-    magic_from_buffer_.return_value = "application/zip"
-
-    with open(example_doc_path("fake-text.txt"), "rb") as f:
-        head = f.read(4096)
-        f.seek(0)
-        filetype = detect_filetype(file=f)
-
-    assert magic_from_buffer_.call_args_list == [
-        call(head, mime=True),
-        call(b"", mime=True),
-    ]
-    assert filetype == FileType.ZIP
-
-
-def test_detect_DOCX_from_docx_mime_type_file_no_extension(magic_from_buffer_: Mock):
-    magic_from_buffer_.return_value = (
-        "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
-    )
-    with open(example_doc_path("simple.docx"), "rb") as f:
-        file = io.BytesIO(f.read())
-
-    filetype = detect_filetype(file=file)
-
-    magic_from_buffer_.assert_called_once_with(file.getvalue()[:4096], mime=True)
-    assert filetype == FileType.DOCX
-
-
-def test_detect_XLSX_from_xlsx_mime_type_file_no_extension(magic_from_buffer_: Mock):
-    magic_from_buffer_.return_value = (
-        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
-    )
-    with open(example_doc_path("stanley-cups.xlsx"), "rb") as f:
-        file = io.BytesIO(f.read())
-
-    filetype = detect_filetype(file=file)
-
-    magic_from_buffer_.assert_called_once_with(file.getvalue()[:4096], mime=True)
-    assert filetype == FileType.XLSX
-
-
-def test_detect_UNK_from_extension_of_non_existent_file_path():
-    assert detect_filetype(example_doc_path("made_up.fake")) == FileType.UNK
-
-
-def test_detect_PNG_from_extension_of_non_existent_file_path():
-    assert detect_filetype(example_doc_path("made_up.png")) == FileType.PNG
-
-
-def test_detect_TXT_from_unknown_text_subtype_file_no_extension(magic_from_buffer_: Mock):
-    magic_from_buffer_.return_value = "text/new-type"
-    with open(example_doc_path("fake-text.txt"), "rb") as f:
-        file = io.BytesIO(f.read())
-
-    filetype = detect_filetype(file=file)
-
-    magic_from_buffer_.assert_called_once_with(file.getvalue()[:4096], mime=True)
+    ctx_mime_type_.assert_called_with()
     assert filetype == FileType.TXT
 
 
-def test_detect_BMP_from_file_path():
-    assert detect_filetype(example_doc_path("bmp_24.bmp")) == FileType.BMP
-
-
-def test_detect_BMP_from_file_no_extension():
-    with open(example_doc_path("img/bmp_24.bmp"), "rb") as f:
-        file = io.BytesIO(f.read())
-    assert detect_filetype(file=file) == FileType.BMP
-
-
-def test_detect_filetype_raises_when_both_path_and_file_like_object_are_specified():
-    file_path = example_doc_path("fake-email.eml")
-    with open(example_doc_path(file_path), "rb") as f:
-        file = io.BytesIO(f.read())
-
-    with pytest.raises(ValueError, match="Exactly one of filename and file must be specified."):
-        detect_filetype(filename=file_path, file=file)
-
-
 def test_detect_filetype_raises_with_neither_path_or_file_like_object_specified():
-    with pytest.raises(ValueError, match="Exactly one of filename and file must be specified."):
+    with pytest.raises(ValueError, match="either `file_path` or `file` argument must be provided"):
         detect_filetype()
 
 
-def test_FileType_is_ordererd_by_name():
-    """FileType is a total order on name, e.g. FileType.A < FileType.B."""
-    assert FileType.EML < FileType.HTML < FileType.XML
-
-
-@pytest.mark.parametrize(
-    ("content", "expected_value"),
-    [
-        (b"d\xe2\x80", False),  # Invalid JSON
-        (b'[{"key": "value"}]', True),  # Valid JSON
-        (b"", False),  # Empty content
-        (b'"This is not a JSON"', False),  # Serializable as JSON, but we want to treat it as txt
-    ],
-)
-def test_is_text_file_a_json_distinguishes_JSON_from_text(content: bytes, expected_value: bool):
-    with io.BytesIO(content) as f:
-        assert _is_text_file_a_json(file=f) == expected_value
-
-
-@pytest.mark.parametrize(
-    ("content", "expected_value"),
-    [
-        (b"d\xe2\x80", False),  # Invalid CSV
-        (b'[{"key": "value"}]', False),  # Invalid CSV
-        (b"column1,column2,column3\nvalue1,value2,value3\n", True),  # Valid CSV
-        (b"", False),  # Empty content
-    ],
-)
-def test_is_text_file_a_csv_distinguishes_CSV_from_text(content: bytes, expected_value: bool):
-    with io.BytesIO(content) as f:
-        assert _is_text_file_a_csv(file=f) == expected_value
-
-
-def test_csv_and_json_checks_with_filename_accommodate_utf_32_encoded_file():
-    file_path = example_doc_path("fake-text-utf-32.txt")
-    assert _is_text_file_a_csv(filename=file_path) is False
-    assert _is_text_file_a_json(filename=file_path) is False
-
-
-def test_csv_and_json_checks_with_file_accommodate_utf_32_encoded_content():
-    with open(example_doc_path("fake-text-utf-32.txt"), "rb") as f:
-        file = io.BytesIO(f.read())
-
-    assert _is_text_file_a_csv(file=file) is False
-    file.seek(0)
-    assert _is_text_file_a_json(file=file) is False
-
-
-def test_detect_EMPTY_from_file_path_to_empty_file():
+def test_it_detects_EMPTY_from_file_path_to_empty_file():
     assert detect_filetype(example_doc_path("empty.txt")) == FileType.EMPTY
 
 
-def test_detect_EMPTY_from_file_that_is_empty():
+def test_it_detects_EMPTY_from_empty_file_like_object():
     with open(example_doc_path("empty.txt"), "rb") as f:
         assert detect_filetype(file=f) == FileType.EMPTY
 
 
-def test_detect_CSV_from_path_and_file_when_content_contains_escaped_commas():
+def test_it_detect_CSV_from_path_and_file_when_content_contains_escaped_commas():
     file_path = example_doc_path("csv-with-escaped-commas.csv")
 
-    assert detect_filetype(filename=file_path) == FileType.CSV
+    assert detect_filetype(file_path) == FileType.CSV
     with open(file_path, "rb") as f:
         assert detect_filetype(file=f) == FileType.CSV
 
 
-def test_detect_filetype_from_octet_stream():
-    with open(example_doc_path("emoji.xlsx"), "rb") as f:
-        assert _detect_filetype_from_octet_stream(file=f) == FileType.XLSX
-
-
-def test_detect_WAV_from_filename():
-    assert detect_filetype(example_doc_path("CantinaBand3.wav")) == FileType.WAV
-
-
-def test_detect_wav_from_file():
-    with open(example_doc_path("CantinaBand3.wav"), "rb") as f:
-        assert detect_filetype(file=f) == FileType.WAV
-
-
-def test_detect_TXT_from_file_path_to_yaml():
-    assert detect_filetype(example_doc_path("simple.yaml")) == FileType.TXT
-
-
-def test_detect_TXT_from_yaml_file(magic_from_buffer_: Mock):
-    magic_from_buffer_.return_value = "text/yaml"
-
-    with open(example_doc_path("simple.yaml"), "rb") as f:
-        head = f.read(4096)
-        f.seek(0)
-        file_type = detect_filetype(file=f)
-
-    magic_from_buffer_.assert_called_once_with(head, mime=True)
-    assert file_type == FileType.TXT
-
-
 # ================================================================================================
 # MODULE-LEVEL FIXTURES
 # ================================================================================================
 
 
-# -- `from_buffer()` and `from_file()` are not "methods" on `magic` per-se (`magic` is a module)
-# -- but they behave like methods for mocking purposes.
 @pytest.fixture()
-def magic_from_buffer_(request: FixtureRequest):
-    return method_mock(request, magic, "from_buffer")
+def LIBMAGIC_AVAILABLE_False():
+    with patch("unstructured.file_utils.filetype.LIBMAGIC_AVAILABLE", False) as m:
+        yield m
 
 
 @pytest.fixture()
-def magic_from_file_(request: FixtureRequest):
-    return method_mock(request, magic, "from_file")
+def ctx_mime_type_(request: FixtureRequest):
+    return property_mock(request, _FileTypeDetectionContext, "mime_type")
+
+
+# ================================================================================================
+# UNIT-TESTS
+# ================================================================================================
+
+
+class Describe_FileTypeDetectionContext:
+    """Unit-test suite for `unstructured.file_utils.filetype._FileTypeDetectionContext`."""
+
+    # -- .new() -------------------------------------------------
+
+    def it_provides_a_validating_alternate_constructor(self):
+        ctx = _FileTypeDetectionContext.new(
+            file_path=example_doc_path("simple.docx"),
+            file=None,
+            encoding="utf-8",
+            content_type="text/plain",
+            metadata_file_path="a/b/foo.bar",
+        )
+        assert isinstance(ctx, _FileTypeDetectionContext)
+
+    def and_the_validating_constructor_raises_on_an_invalid_context(self):
+        with pytest.raises(ValueError, match="either `file_path` or `file` argument must be pro"):
+            _FileTypeDetectionContext.new(
+                file_path=None,
+                file=None,
+                encoding=None,
+                content_type=None,
+                metadata_file_path=None,
+            )
+
+    # -- .content_type ------------------------------------------
+
+    def it_knows_the_content_type_asserted_by_the_caller(self):
+        assert _FileTypeDetectionContext(content_type="TEXT/hTmL").content_type == "text/html"
+
+    # -- .encoding ----------------------------------------------
+
+    @pytest.mark.parametrize(
+        ("encoding", "expected_value"),
+        [
+            ("utf-8", "utf-8"),
+            ("UTF_8", "utf-8"),
+            ("UTF_16LE", "utf-16le"),
+            ("ISO_8859_6_I", "iso-8859-6"),
+            # -- default value is utf-8 --
+            (None, "utf-8"),
+        ],
+    )
+    def it_knows_the_encoding_asserted_by_the_caller_and_normalizes_it(
+        self, encoding: str | None, expected_value: str
+    ):
+        assert _FileTypeDetectionContext(encoding=encoding).encoding == expected_value
+
+    # -- .extension ---------------------------------------------
+
+    def it_derives_the_filename_extension_from_the_file_path_when_one_is_provided(self):
+        ctx = _FileTypeDetectionContext(file_path=example_doc_path("simple.docx"))
+        assert ctx.extension == ".docx"
+
+    def and_it_derives_the_extension_from_a_file_opened_from_a_path(self):
+        with open(example_doc_path("picture.pptx"), "rb") as f:
+            assert _FileTypeDetectionContext(file=f).extension == ".pptx"
+
+    @pytest.mark.parametrize(
+        "file_name",
+        [
+            # -- case 1: file-like object has no `.name` attribute
+            None,
+            # -- case 2: file-like object has `.name` attribute but it's value is the empty string
+            "",
+        ],
+    )
+    def and_it_derives_the_extension_from_metadata_file_path_when_file_object_has_no_name(
+        self, file_name: str | None
+    ):
+        with open(example_doc_path("ideas-page.html"), "rb") as f:
+            file = io.BytesIO(f.read())
+            if file_name is not None:
+                file.name = file_name
+
+        ctx = _FileTypeDetectionContext(file=file, metadata_file_path="a/b/c.html")
+
+        assert ctx.extension == ".html"
+
+    @pytest.mark.parametrize(
+        "file_name",
+        [
+            # -- case 1: file-like object has no `.name` attribute
+            None,
+            # -- case 2: file-like object has `.name` attribute but it's value is the empty string
+            "",
+        ],
+    )
+    def and_it_returns_the_empty_string_as_the_extension_when_there_are_no_file_name_sources(
+        self, file_name: str | None
+    ):
+        with open(example_doc_path("ideas-page.html"), "rb") as f:
+            file = io.BytesIO(f.read())
+            if file_name is not None:
+                file.name = file_name
+
+        assert _FileTypeDetectionContext(file=file).extension == ""
+
+    # -- .file_head ---------------------------------------------
+
+    def it_grabs_the_first_4k_bytes_of_the_file_for_use_by_magic(self):
+        ctx = _FileTypeDetectionContext(file_path=example_doc_path("norwich-city.txt"))
+
+        head = ctx.file_head
+
+        assert isinstance(head, bytes)
+        assert len(head) == 4096
+        assert head.startswith(b"Iwan Roberts\nRoberts celebrating after")
+
+    # -- .file_path ---------------------------------------------
+
+    @pytest.mark.parametrize("file_path", [None, "a/b/c.pdf"])
+    def it_knows_the_file_path_provided_by_the_caller(self, file_path: str | None):
+        assert _FileTypeDetectionContext(file_path=file_path).file_path == file_path
+
+    # -- .has_code_mime_type ------------------------------------
+
+    @pytest.mark.parametrize(
+        ("mime_type", "expected_value"),
+        [
+            ("text/plain", False),
+            ("text/x-csharp", True),
+            ("text/x-go", True),
+            ("text/x-java", True),
+            ("text/x-python", True),
+            ("application/xml", False),
+            (None, False),
+        ],
+    )
+    def it_knows_whether_its_mime_type_indicates_programming_language_source_code(
+        self, mime_type_prop_: Mock, mime_type: str | None, expected_value: bool
+    ):
+        mime_type_prop_.return_value = mime_type
+        assert _FileTypeDetectionContext().has_code_mime_type is expected_value
+
+    # -- .is_zipfile --------------------------------------------
+
+    @pytest.mark.parametrize(
+        ("file_name", "expected_value"),
+        [
+            ("README.md", False),
+            ("emoji.xlsx", True),
+            ("simple.doc", False),
+            ("simple.docx", True),
+            ("simple.odt", True),
+            ("simple.zip", True),
+            ("winter-sports.epub", True),
+        ],
+    )
+    def it_knows_whether_it_is_a_zipfile(self, file_name: str, expected_value: bool):
+        assert _FileTypeDetectionContext(example_doc_path(file_name)).is_zipfile is expected_value
+
+    # -- .mime_type ---------------------------------------------
+
+    def it_provides_the_MIME_type_detected_by_libmagic_from_a_file_path(self):
+        ctx = _FileTypeDetectionContext(file_path=example_doc_path("norwich-city.txt"))
+        assert ctx.mime_type == "text/plain"
+
+    def and_it_provides_the_MIME_type_from_path_using_filetype_lib_when_magic_is_unavailable(self):
+        with patch("unstructured.file_utils.filetype.LIBMAGIC_AVAILABLE", False):
+            ctx = _FileTypeDetectionContext(file_path=example_doc_path("simple.doc"))
+            assert ctx.mime_type == "application/msword"
+
+    def but_it_warns_to_install_libmagic_when_the_filetype_lib_cannot_detect_the_MIME_type(
+        self, caplog: LogCaptureFixture
+    ):
+        with patch("unstructured.file_utils.filetype.LIBMAGIC_AVAILABLE", False):
+            ctx = _FileTypeDetectionContext(file_path=example_doc_path("norwich-city.txt"))
+            assert ctx.mime_type is None
+            assert "WARNING" in caplog.text
+            assert "libmagic is unavailable" in caplog.text
+            assert "consider installing libmagic" in caplog.text
+
+    def it_provides_the_MIME_type_detected_by_libmagic_from_a_file_like_object(self):
+        with open(example_doc_path("norwich-city.txt"), "rb") as f:
+            ctx = _FileTypeDetectionContext(file=f)
+            assert ctx.mime_type == "text/plain"
+
+    def and_it_provides_the_MIME_type_from_file_using_filetype_lib_when_magic_is_unavailable(self):
+        with patch("unstructured.file_utils.filetype.LIBMAGIC_AVAILABLE", False):
+            file_path = example_doc_path("simple.doc")
+            with open(file_path, "rb") as f:
+                ctx = _FileTypeDetectionContext(file=f)
+                assert ctx.mime_type == "application/msword"
+
+    # -- .open() ------------------------------------------------
+
+    def it_provides_transparent_access_to_the_source_file_when_it_is_a_file_like_object(self):
+        with open(example_doc_path("norwich-city.txt"), "rb") as f:
+            ctx = _FileTypeDetectionContext(file=f)
+            with ctx.open() as file:
+                assert file is f
+                assert file.read(38) == b"Iwan Roberts\nRoberts celebrating after"
+
+    def it_provides_transparent_access_to_the_source_file_when_it_is_a_file_path(self):
+        ctx = _FileTypeDetectionContext(file_path=example_doc_path("norwich-city.txt"))
+        with ctx.open() as file:
+            assert file.read(38) == b"Iwan Roberts\nRoberts celebrating after"
+
+    # -- .text_head ---------------------------------------------
+
+    def it_grabs_the_first_4k_chars_from_file_path_for_textual_type_differentiation(self):
+        ctx = _FileTypeDetectionContext(file_path=example_doc_path("norwich-city.txt"))
+
+        text_head = ctx.text_head
+
+        assert isinstance(text_head, str)
+        assert len(text_head) == 4096
+        assert text_head.startswith("Iwan Roberts\nRoberts celebrating after")
+
+    def and_it_uses_character_detection_to_correct_a_wrong_encoding_arg_for_file_path(self):
+        ctx = _FileTypeDetectionContext(
+            file_path=example_doc_path("norwich-city.txt"), encoding="utf_32_be"
+        )
+
+        text_head = ctx.text_head
+
+        assert isinstance(text_head, str)
+        assert len(text_head) == 4096
+        assert text_head.startswith("Iwan Roberts\nRoberts celebrating after")
+
+    def but_not_to_correct_a_wrong_encoding_arg_for_a_file_like_object_open_in_binary_mode(self):
+        """Fails silently in this case, returning empty string."""
+        with open(example_doc_path("norwich-city.txt"), "rb") as f:
+            file = io.BytesIO(f.read())
+        ctx = _FileTypeDetectionContext(file=file, encoding="utf_32_be")
+
+        text_head = ctx.text_head
+
+        assert text_head == ""
+
+    def and_it_grabs_the_first_4k_chars_from_binary_file_for_textual_type_differentiation(self):
+        with open(example_doc_path("norwich-city.txt"), "rb") as f:
+            ctx = _FileTypeDetectionContext(file=f)
+
+            text_head = ctx.text_head
+
+            assert isinstance(text_head, str)
+            # -- some characters consume multiple bytes, so shorter than 4096 --
+            assert len(text_head) == 4063
+            assert text_head.startswith("Iwan Roberts\nRoberts celebrating after")
+
+    def and_it_grabs_the_first_4k_chars_from_text_file_for_textual_type_differentiation(self):
+        """Not a documented behavior to accept IO[str], but support is implemented."""
+        with open(example_doc_path("norwich-city.txt")) as f:
+            ctx = _FileTypeDetectionContext(file=f)  # pyright: ignore[reportArgumentType]
+
+            text_head = ctx.text_head
+
+            assert isinstance(text_head, str)
+            assert len(text_head) == 4096
+            assert text_head.startswith("Iwan Roberts\nRoberts celebrating after")
+
+    def it_accommodates_a_utf_32_encoded_file_path(self):
+        ctx = _FileTypeDetectionContext(example_doc_path("fake-text-utf-32.txt"))
+
+        text_head = ctx.text_head
+
+        assert isinstance(text_head, str)
+        # -- test document is short --
+        assert len(text_head) == 188
+        assert text_head.startswith("This is a test document to use for unit tests.\n\n    Doyle")
+
+    # TODO: this fails because `.text_head` ignores decoding errors on a file open for binary
+    # reading. Probably better if it used chardet in that case as it does for a file-path.
+    @pytest.mark.xfail(reason="WIP", raises=AssertionError, strict=True)
+    def and_it_accommodates_a_utf_32_encoded_file_like_object(self):
+        with open(example_doc_path("fake-text-utf-32.txt"), "rb") as f:
+            file = io.BytesIO(f.read())
+        ctx = _FileTypeDetectionContext(file=file)
+
+        text_head = ctx.text_head
+
+        assert isinstance(text_head, str)
+        # -- test document is short --
+        assert len(text_head) == 188
+        assert text_head.startswith("This is a test document to use for unit tests.\n\n    Doyle")
+
+    # -- .validate() --------------------------------------------
+
+    def it_raises_when_no_file_exists_at_the_specified_file_path(self):
+        with pytest.raises(FileNotFoundError, match="no such file a/b/c.foo"):
+            _FileTypeDetectionContext(file_path="a/b/c.foo")._validate()
+
+    def it_raises_when_neither_file_path_nor_file_is_provided(self):
+        with pytest.raises(ValueError, match="either `file_path` or `file` argument must be pro"):
+            _FileTypeDetectionContext()._validate()
+
+    # -- fixtures --------------------------------------------------------------------------------
+
+    @pytest.fixture
+    def mime_type_prop_(self, request: FixtureRequest):
+        return property_mock(request, _FileTypeDetectionContext, "mime_type")
+
+
+class Describe_TextFileDifferentiator:
+    """Unit-test suite for `unstructured.file_utils.filetype._TextFileDifferentiator`."""
+
+    # -- .applies() ---------------------------------------------
+
+    def it_provides_a_qualifying_alternate_constructor_which_constructs_when_applicable(self):
+        """The constructor determines whether this differentiator is applicable.
+
+        It returns an instance only when differentiating a text file-type is required, which it can
+        judge from the context (`ctx`).
+        """
+        ctx = _FileTypeDetectionContext(example_doc_path("norwich-city.txt"))
+
+        differentiator = _TextFileDifferentiator.applies(ctx)
+
+        assert isinstance(differentiator, _TextFileDifferentiator)
+
+    def and_it_returns_None_when_text_differentiation_does_not_apply_to_the_detection_context(self):
+        ctx = _FileTypeDetectionContext(example_doc_path("simple.docx"))
+        assert _TextFileDifferentiator.applies(ctx) is None
+
+    # -- ._is_csv -----------------------------------------------
+
+    @pytest.mark.parametrize(
+        ("content", "expected_value"),
+        [
+            # -- no commas, too few lines --
+            (b"d\xe2\x80", False),
+            (b'[{"key": "value"}]', False),
+            # -- at least a header and one data row, at least two columns --
+            (b"column1,column2,column3\nvalue1,value2,value3\n", True),
+            # -- no content --
+            (b"", False),
+        ],
+    )
+    def it_distinguishes_a_CSV_file_from_other_text_files(
+        self, content: bytes, expected_value: bool
+    ):
+        ctx = _FileTypeDetectionContext(file=io.BytesIO(content))
+        differentiator = _TextFileDifferentiator(ctx)
+
+        assert differentiator._is_csv is expected_value
+
+    # -- ._is_eml -----------------------------------------------
+
+    @pytest.mark.parametrize(
+        ("file_name", "expected_value"), [("fake-email.eml", True), ("norwich-city.txt", False)]
+    )
+    def it_distinguishes_an_EML_file_from_other_text_files(
+        self, file_name: str, expected_value: bool
+    ):
+        ctx = _FileTypeDetectionContext(example_doc_path(file_name))
+        assert _TextFileDifferentiator(ctx)._is_eml is expected_value
+
+    # -- ._is_json ----------------------------------------------
+
+    @pytest.mark.parametrize(
+        ("content", "expected_value"),
+        [
+            (b"d\xe2\x80", False),
+            (b'[{"key": "value"}]', True),
+            (b"", False),
+            # -- valid JSON, but not for our purposes --
+            (b'"This is not a JSON"', False),
+        ],
+    )
+    def it_distinguishes_a_JSON_file_from_other_text_files(
+        self, content: bytes, expected_value: bool
+    ):
+        ctx = _FileTypeDetectionContext(file=io.BytesIO(content))
+        differentiator = _TextFileDifferentiator(ctx)
+
+        assert differentiator._is_json is expected_value
+
+
+class Describe_ZipFileDifferentiator:
+    """Unit-test suite for `unstructured.file_utils.filetype._ZipFileDifferentiator`."""
+
+    # -- .applies() ---------------------------------------------
+
+    def it_provides_a_qualifying_alternate_constructor_which_constructs_when_applicable(self):
+        """The constructor determines whether this differentiator is applicable.
+
+        It returns an instance only when differentiating a zip file-type is required, which it can
+        judge from the mime-type provided by the context (`ctx`).
+        """
+        ctx = _FileTypeDetectionContext(example_doc_path("simple.docx"))
+
+        differentiator = _ZipFileDifferentiator.applies(ctx, "application/zip")
+
+        assert isinstance(differentiator, _ZipFileDifferentiator)
+
+    def and_it_returns_None_when_zip_differentiation_does_not_apply_to_the_detection_context(self):
+        ctx = _FileTypeDetectionContext(example_doc_path("norwich-city.txt"))
+        assert _ZipFileDifferentiator.applies(ctx, "application/epub") is None
+
+    # -- .file_type ---------------------------------------------
+
+    @pytest.mark.parametrize(
+        ("file_name", "expected_value"),
+        [
+            ("simple.docx", FileType.DOCX),
+            ("picture.pptx", FileType.PPTX),
+            ("vodafone.xlsx", FileType.XLSX),
+            ("simple.zip", FileType.ZIP),
+            ("README.org", None),
+        ],
+    )
+    def it_distinguishes_the_file_type_of_applicable_zip_files(
+        self, file_name: str, expected_value: FileType | None
+    ):
+        ctx = _FileTypeDetectionContext(example_doc_path(file_name))
+        differentiator = _ZipFileDifferentiator(ctx)
+
+        assert differentiator.file_type is expected_value
diff --git a/test_unstructured/file_utils/test_model.py b/test_unstructured/file_utils/test_model.py
index 91d2b8bec..98088ee75 100644
--- a/test_unstructured/file_utils/test_model.py
+++ b/test_unstructured/file_utils/test_model.py
@@ -10,6 +10,14 @@ from unstructured.file_utils.model import FileType
 class DescribeFileType:
     """Unit-test suite for `unstructured.file_utils.model.Filetype`."""
 
+    # -- .__lt__() ----------------------------------------------
+
+    def it_is_a_collection_ordered_by_name_and_can_be_sorted(self):
+        """FileType is a total order on name, e.g. FileType.A < FileType.B."""
+        assert FileType.EML < FileType.HTML < FileType.XML
+
+    # -- .from_extension() --------------------------------------
+
     @pytest.mark.parametrize(
         ("ext", "file_type"),
         [
@@ -23,10 +31,12 @@ class DescribeFileType:
     def it_can_recognize_a_file_type_from_an_extension(self, ext: str, file_type: FileType | None):
         assert FileType.from_extension(ext) is file_type
 
-    @pytest.mark.parametrize("ext", [".foobar", ".xyz", ".mdx", "", "."])
-    def but_not_when_that_extension_is_empty_or_not_registered(self, ext: str):
+    @pytest.mark.parametrize("ext", [".foobar", ".xyz", ".mdx", "", ".", None])
+    def but_not_when_that_extension_is_empty_or_None_or_not_registered(self, ext: str | None):
         assert FileType.from_extension(ext) is None
 
+    # -- .from_mime_type() --------------------------------------
+
     @pytest.mark.parametrize(
         ("mime_type", "file_type"),
         [
@@ -46,29 +56,13 @@ class DescribeFileType:
     ):
         assert FileType.from_mime_type(mime_type) is file_type
 
-    @pytest.mark.parametrize("mime_type", ["text/css", "image/gif", "audio/mpeg", "foo/bar"])
-    def but_not_when_that_mime_type_is_not_registered_by_a_file_type(self, mime_type: str):
+    @pytest.mark.parametrize("mime_type", ["text/css", "image/gif", "audio/mpeg", "foo/bar", None])
+    def but_not_when_that_mime_type_is_not_registered_by_a_file_type_or_None(
+        self, mime_type: str | None
+    ):
         assert FileType.from_mime_type(mime_type) is None
 
-    @pytest.mark.parametrize(
-        ("file_type", "expected_value"),
-        [
-            (FileType.BMP, ("unstructured_inference",)),
-            (FileType.CSV, ("pandas",)),
-            (FileType.DOC, ("docx",)),
-            (FileType.EMPTY, ()),
-            (FileType.HTML, ()),
-            (FileType.ODT, ("docx", "pypandoc")),
-            (FileType.PDF, ("pdf2image", "pdfminer", "PIL")),
-            (FileType.UNK, ()),
-            (FileType.WAV, ()),
-            (FileType.ZIP, ()),
-        ],
-    )
-    def it_knows_which_importable_packages_its_partitioner_depends_on(
-        self, file_type: FileType, expected_value: tuple[str, ...]
-    ):
-        assert file_type.importable_package_dependencies == expected_value
+    # -- .extra_name --------------------------------------------
 
     @pytest.mark.parametrize(
         ("file_type", "expected_value"),
@@ -91,6 +85,30 @@ class DescribeFileType:
     ):
         assert file_type.extra_name == expected_value
 
+    # -- .importable_package_dependencies -----------------------
+
+    @pytest.mark.parametrize(
+        ("file_type", "expected_value"),
+        [
+            (FileType.BMP, ("unstructured_inference",)),
+            (FileType.CSV, ("pandas",)),
+            (FileType.DOC, ("docx",)),
+            (FileType.EMPTY, ()),
+            (FileType.HTML, ()),
+            (FileType.ODT, ("docx", "pypandoc")),
+            (FileType.PDF, ("pdf2image", "pdfminer", "PIL")),
+            (FileType.UNK, ()),
+            (FileType.WAV, ()),
+            (FileType.ZIP, ()),
+        ],
+    )
+    def it_knows_which_importable_packages_its_partitioner_depends_on(
+        self, file_type: FileType, expected_value: tuple[str, ...]
+    ):
+        assert file_type.importable_package_dependencies == expected_value
+
+    # -- .is_partitionable --------------------------------------
+
     @pytest.mark.parametrize(
         ("file_type", "expected_value"),
         [
@@ -112,6 +130,8 @@ class DescribeFileType:
     ):
         assert file_type.is_partitionable is expected_value
 
+    # -- .mime_type ---------------------------------------------
+
     @pytest.mark.parametrize(
         ("file_type", "mime_type"),
         [
@@ -131,6 +151,8 @@ class DescribeFileType:
     def it_knows_its_canonical_MIME_type(self, file_type: FileType, mime_type: str):
         assert file_type.mime_type == mime_type
 
+    # -- .partitioner_function_name -----------------------------
+
     @pytest.mark.parametrize(
         ("file_type", "expected_value"),
         [
@@ -155,6 +177,8 @@ class DescribeFileType:
         with pytest.raises(ValueError, match="`.partitioner_function_name` is undefined because "):
             file_type.partitioner_function_name
 
+    # -- .partitioner_module_qname ------------------------------
+
     @pytest.mark.parametrize(
         ("file_type", "expected_value"),
         [
@@ -181,6 +205,8 @@ class DescribeFileType:
         with pytest.raises(ValueError, match="`.partitioner_module_qname` is undefined because "):
             file_type.partitioner_module_qname
 
+    # -- .partitioner_shortname ---------------------------------
+
     @pytest.mark.parametrize(
         ("file_type", "expected_value"),
         [
diff --git a/test_unstructured/metrics/test_element_type.py b/test_unstructured/metrics/test_element_type.py
index 54939af0b..183efb8c6 100644
--- a/test_unstructured/metrics/test_element_type.py
+++ b/test_unstructured/metrics/test_element_type.py
@@ -1,6 +1,10 @@
+from __future__ import annotations
+
 import pytest
 
+from test_unstructured.unit_utils import example_doc_path
 from unstructured.metrics.element_type import (
+    FrequencyDict,
     calculate_element_type_percent_match,
     get_element_type_frequency,
 )
@@ -14,10 +18,9 @@ from unstructured.staging.base import elements_to_json
         (
             "fake-email.txt",
             {
-                ("UncategorizedText", None): 6,
+                ("NarrativeText", None): 1,
+                ("Title", None): 1,
                 ("ListItem", None): 2,
-                ("Title", None): 5,
-                ("NarrativeText", None): 2,
             },
         ),
         (
@@ -34,8 +37,8 @@ from unstructured.staging.base import elements_to_json
         ),
     ],
 )
-def test_get_element_type_frequency(filename, frequency):
-    elements = partition(filename=f"example-docs/{filename}")
+def test_get_element_type_frequency(filename: str, frequency: dict[tuple[str, int | None], int]):
+    elements = partition(example_doc_path(filename))
     elements_freq = get_element_type_frequency(elements_to_json(elements))
     assert elements_freq == frequency
 
@@ -46,11 +49,11 @@ def test_get_element_type_frequency(filename, frequency):
         (
             "fake-email.txt",
             {
-                ("UncategorizedText", None): 14,
+                ("Title", None): 1,
                 ("ListItem", None): 2,
                 ("NarrativeText", None): 2,
             },
-            (0.56, 0.56, 0.56),
+            (0.8, 0.8, 0.80),
         ),
         (
             "sample-presentation.pptx",
@@ -92,8 +95,10 @@ def test_get_element_type_frequency(filename, frequency):
         ),
     ],
 )
-def test_calculate_element_type_percent_match(filename, expected_frequency, percent_matched):
-    elements = partition(filename=f"example-docs/{filename}")
+def test_calculate_element_type_percent_match(
+    filename: str, expected_frequency: FrequencyDict, percent_matched: tuple[float, float, float]
+):
+    elements = partition(example_doc_path(filename))
     elements_frequency = get_element_type_frequency(elements_to_json(elements))
     assert (
         round(calculate_element_type_percent_match(elements_frequency, expected_frequency), 2)
diff --git a/test_unstructured/partition/test_auto.py b/test_unstructured/partition/test_auto.py
index a09d45f2d..3e3d4c6b9 100644
--- a/test_unstructured/partition/test_auto.py
+++ b/test_unstructured/partition/test_auto.py
@@ -929,7 +929,11 @@ def test_auto_partition_raises_with_bad_type(request: FixtureRequest):
         partition(filename="made-up.fake", strategy=PartitionStrategy.HI_RES)
 
     detect_filetype_.assert_called_once_with(
-        content_type=None, encoding=None, file=None, file_filename=None, filename="made-up.fake"
+        file_path="made-up.fake",
+        file=None,
+        encoding=None,
+        content_type=None,
+        metadata_file_path=None,
     )
 
 
@@ -1305,7 +1309,7 @@ def test_auto_partition_that_requires_extras_raises_when_dependencies_are_not_in
     )
     match = r"partition_pdf\(\) is not available because one or more dependencies are not installed"
     with pytest.raises(ImportError, match=match):
-        partition(example_doc_path("layout-parser-paper-fast.pdf"))
+        partition(example_doc_path("pdf/layout-parser-paper-fast.pdf"))
 
     dependency_exists_.assert_called_once_with("pdf2image")
 
diff --git a/test_unstructured/partition/test_json.py b/test_unstructured/partition/test_json.py
index 5b08a23e8..f5a5e0b56 100644
--- a/test_unstructured/partition/test_json.py
+++ b/test_unstructured/partition/test_json.py
@@ -9,8 +9,8 @@ import tempfile
 import pytest
 from pytest_mock import MockFixture
 
+from test_unstructured.unit_utils import example_doc_path
 from unstructured.documents.elements import CompositeElement
-from unstructured.file_utils.filetype import detect_filetype
 from unstructured.file_utils.model import FileType
 from unstructured.partition.email import partition_email
 from unstructured.partition.html import partition_html
@@ -43,9 +43,9 @@ def test_it_chunks_elements_when_a_chunking_strategy_is_specified():
 
 @pytest.mark.parametrize("filename", test_files)
 def test_partition_json_from_filename(filename: str):
-    path = os.path.join(DIRECTORY, "..", "..", "example-docs", filename)
+    path = example_doc_path(filename)
     elements = []
-    filetype = detect_filetype(filename=path)
+    filetype = FileType.from_extension(os.path.splitext(path)[1])
     if filetype == FileType.TXT:
         elements = partition_text(filename=path)
     if filetype == FileType.HTML:
@@ -72,9 +72,9 @@ def test_partition_json_from_filename(filename: str):
 
 @pytest.mark.parametrize("filename", test_files)
 def test_partition_json_from_filename_with_metadata_filename(filename: str):
-    path = os.path.join(DIRECTORY, "..", "..", "example-docs", filename)
+    path = example_doc_path(filename)
     elements = []
-    filetype = detect_filetype(filename=path)
+    filetype = FileType.from_extension(os.path.splitext(path)[1])
     if filetype == FileType.TXT:
         elements = partition_text(filename=path)
     if filetype == FileType.HTML:
@@ -97,9 +97,9 @@ def test_partition_json_from_filename_with_metadata_filename(filename: str):
 
 @pytest.mark.parametrize("filename", test_files)
 def test_partition_json_from_file(filename: str):
-    path = os.path.join(DIRECTORY, "..", "..", "example-docs", filename)
+    path = example_doc_path(filename)
     elements = []
-    filetype = detect_filetype(filename=path)
+    filetype = FileType.from_extension(os.path.splitext(path)[1])
     if filetype == FileType.TXT:
         elements = partition_text(filename=path)
     if filetype == FileType.HTML:
@@ -126,9 +126,9 @@ def test_partition_json_from_file(filename: str):
 
 @pytest.mark.parametrize("filename", test_files)
 def test_partition_json_from_file_with_metadata_filename(filename: str):
-    path = os.path.join(DIRECTORY, "..", "..", "example-docs", filename)
+    path = example_doc_path(filename)
     elements = []
-    filetype = detect_filetype(filename=path)
+    filetype = FileType.from_extension(os.path.splitext(path)[1])
     if filetype == FileType.TXT:
         elements = partition_text(filename=path)
     if filetype == FileType.HTML:
@@ -150,9 +150,9 @@ def test_partition_json_from_file_with_metadata_filename(filename: str):
 
 @pytest.mark.parametrize("filename", test_files)
 def test_partition_json_from_text(filename: str):
-    path = os.path.join(DIRECTORY, "..", "..", "example-docs", filename)
+    path = example_doc_path(filename)
     elements = []
-    filetype = detect_filetype(filename=path)
+    filetype = FileType.from_extension(os.path.splitext(path)[1])
     if filetype == FileType.TXT:
         elements = partition_text(filename=path)
     if filetype == FileType.HTML:
@@ -192,9 +192,9 @@ def test_partition_json_works_with_empty_list():
 
 
 def test_partition_json_raises_with_too_many_specified():
-    path = os.path.join(DIRECTORY, "..", "..", "example-docs", "fake-text.txt")
+    path = example_doc_path("fake-text.txt")
     elements = []
-    filetype = detect_filetype(filename=path)
+    filetype = FileType.from_extension(os.path.splitext(path)[1])
     if filetype == FileType.TXT:
         elements = partition_text(filename=path)
     if filetype == FileType.HTML:
@@ -225,9 +225,9 @@ def test_partition_json_raises_with_too_many_specified():
 
 @pytest.mark.parametrize("filename", test_files)
 def test_partition_json_from_filename_exclude_metadata(filename: str):
-    path = os.path.join(DIRECTORY, "..", "..", "example-docs", filename)
+    path = example_doc_path(filename)
     elements = []
-    filetype = detect_filetype(filename=path)
+    filetype = FileType.from_extension(os.path.splitext(path)[1])
     if filetype == FileType.TXT:
         elements = partition_text(filename=path)
     if filetype == FileType.HTML:
@@ -249,9 +249,9 @@ def test_partition_json_from_filename_exclude_metadata(filename: str):
 
 @pytest.mark.parametrize("filename", test_files)
 def test_partition_json_from_file_exclude_metadata(filename: str):
-    path = os.path.join(DIRECTORY, "..", "..", "example-docs", filename)
+    path = example_doc_path(filename)
     elements = []
-    filetype = detect_filetype(filename=path)
+    filetype = FileType.from_extension(os.path.splitext(path)[1])
     if filetype == FileType.TXT:
         elements = partition_text(filename=path)
     if filetype == FileType.HTML:
@@ -274,9 +274,9 @@ def test_partition_json_from_file_exclude_metadata(filename: str):
 
 @pytest.mark.parametrize("filename", test_files)
 def test_partition_json_from_text_exclude_metadata(filename: str):
-    path = os.path.join(DIRECTORY, "..", "..", "example-docs", filename)
+    path = example_doc_path(filename)
     elements = []
-    filetype = detect_filetype(filename=path)
+    filetype = FileType.from_extension(os.path.splitext(path)[1])
     if filetype == FileType.TXT:
         elements = partition_text(filename=path)
     if filetype == FileType.HTML:
diff --git a/unstructured/__version__.py b/unstructured/__version__.py
index 648e21503..d9462c4cf 100644
--- a/unstructured/__version__.py
+++ b/unstructured/__version__.py
@@ -1 +1 @@
-__version__ = "0.15.1-dev1"  # pragma: no cover
+__version__ = "0.15.1-dev2"  # pragma: no cover
diff --git a/unstructured/file_utils/filetype.py b/unstructured/file_utils/filetype.py
index c11d81c6c..5e930c366 100644
--- a/unstructured/file_utils/filetype.py
+++ b/unstructured/file_utils/filetype.py
@@ -1,18 +1,48 @@
+"""Automatically detect file-type based on inspection of the file's contents.
+
+Auto-detection proceeds via a sequence of strategies. The first strategy to confidently determine a
+file-type returns that value. A strategy that is not applicable, either because it lacks the input
+required or fails to determine a file-type, returns `None` and execution continues with the next
+strategy.
+
+`_FileTypeDetector` is the main object and implements the three strategies.
+
+The three strategies are:
+
+- Use MIME-type asserted by caller in the `content_type` argument.
+- Guess a MIME-type using libmagic, falling back to the `filetype` package when libmagic is
+  unavailable.
+- Map filename-extension to a `FileType` member.
+
+A file that fails all three strategies is assigned the value `FileType.UNK`, for "unknown".
+
+`_FileTypeDetectionContext` encapsulates the various arguments received by `detect_filetype()` and
+provides values derived from them. This object is immutable and can be passed to delegates of
+`_FileTypeDetector` to provide whatever context they need on the current detection instance.
+
+`_FileTypeDetector` delegates to _differentiator_ objects like `_ZipFileDifferentiator` for
+specialized discrimination and/or confirmation of ambiguous or frequently mis-identified
+MIME-types. Additional differentiators are planned, one for `application/x-ole-storage`
+(DOC, PPT, XLS, and MSG file-types) and perhaps others.
+"""
+
 from __future__ import annotations
 
+import contextlib
 import functools
 import importlib.util
 import json
 import os
 import re
 import zipfile
-from typing import IO, Callable, List, Optional
+from typing import IO, Callable, Iterator, Optional
 
+import filetype as ft
 from typing_extensions import ParamSpec
 
 from unstructured.documents.elements import Element
 from unstructured.file_utils.encoding import detect_file_encoding, format_encoding_str
-from unstructured.file_utils.model import PLAIN_TEXT_EXTENSIONS, FileType
+from unstructured.file_utils.model import FileType
 from unstructured.logger import logger
 from unstructured.nlp.patterns import EMAIL_HEAD_RE, LIST_OF_DICTS_PATTERN
 from unstructured.partition.common import (
@@ -21,179 +51,53 @@ from unstructured.partition.common import (
     remove_element_metadata,
     set_element_hierarchy,
 )
-from unstructured.utils import get_call_args_applying_defaults
+from unstructured.utils import get_call_args_applying_defaults, lazyproperty
 
 LIBMAGIC_AVAILABLE = bool(importlib.util.find_spec("magic"))
 
 
 def detect_filetype(
-    filename: Optional[str] = None,
-    content_type: Optional[str] = None,
-    file: Optional[IO[bytes]] = None,
-    file_filename: Optional[str] = None,
-    encoding: Optional[str] = "utf-8",
+    file_path: str | None = None,
+    file: IO[bytes] | None = None,
+    encoding: str | None = None,
+    content_type: str | None = None,
+    metadata_file_path: Optional[str] = None,
 ) -> FileType:
-    """Use libmagic to determine a file's type.
+    """Determine file-type of specified file using libmagic and/or fallback methods.
 
-    Helps determine which partition brick to use for a given file. A return value of None indicates
-    a non-supported file type.
+    One of `file_path` or `file` must be specified. A `file_path` that does not
+    correspond to a file on the filesystem raises `ValueError`.
+
+    Args:
+        content_type: MIME-type of document-source, when already known. Providing
+          a value for this argument disables auto-detection unless it does not map
+          to a FileType member or is ambiguous, in which case it is ignored.
+        encoding: Only used for textual file-types. When omitted, `utf-8` is
+          assumed. Should generally be omitted except to resolve a problem with
+          textual file-types like HTML.
+        metadata_file_path: Only used when `file` is provided and then only as a
+          source for a filename-extension that may be needed as a secondary
+          content-type indicator. Ignored with the document is specified using
+          `file_path`.
+
+    Returns:
+        A member of the `FileType` enumeration, `FileType.UNK` when the file type
+        could not be determined or is not supported.
+
+    Raises:
+        ValueError: when:
+        - `file_path` is specified but does not correspond to a file on the
+          fileesystem.
+        - Neither `file_path` nor `file` were specified.
     """
-    mime_type = None
-    exactly_one(filename=filename, file=file)
-
-    # first check (content_type)
-    if content_type:
-        file_type = FileType.from_mime_type(content_type)
-        if file_type:
-            return file_type
-
-    # second check (filename/file_name/file)
-    # continue if successfully define mime_type
-    if filename or file_filename:
-        _filename = filename or file_filename or ""
-        _, extension = os.path.splitext(_filename)
-        extension = extension.lower()
-        if os.path.isfile(_filename) and LIBMAGIC_AVAILABLE:
-            import magic
-
-            mime_type = magic.from_file(_resolve_symlink(_filename), mime=True)
-        elif os.path.isfile(_filename):
-            import filetype as ft
-
-            mime_type = ft.guess_mime(_filename)
-        if mime_type is None:
-            return FileType.from_extension(extension) or FileType.UNK
-
-    elif file is not None:
-        if hasattr(file, "name"):
-            _, extension = os.path.splitext(file.name)
-        else:
-            extension = ""
-        extension = extension.lower()
-        # NOTE(robinson) - the python-magic docs recommend reading at least the first 2048 bytes
-        # Increased to 4096 because otherwise .xlsx files get detected as a zip file
-        # ref: https://github.com/ahupp/python-magic#usage
-        if LIBMAGIC_AVAILABLE:
-            import magic
-
-            mime_type = magic.from_buffer(file.read(4096), mime=True)
-        else:
-            import filetype as ft
-
-            mime_type = ft.guess_mime(file.read(4096))
-        if mime_type is None:
-            logger.warning(
-                "libmagic is unavailable but assists in filetype detection on file-like objects. "
-                "Please consider installing libmagic for better results.",
-            )
-            return FileType.from_extension(extension) or FileType.UNK
-
-    else:
-        raise ValueError("No filename, file, nor file_filename were specified.")
-
-    """Mime type special cases."""
-    # third check (mime_type)
-
-    # NOTE(Crag): older magic lib does not differentiate between xls and doc
-    if mime_type == "application/msword" and extension == ".xls":
-        return FileType.XLS
-
-    elif mime_type.endswith("xml"):
-        if extension == ".html" or extension == ".htm":
-            return FileType.HTML
-        else:
-            return FileType.XML
-
-    # -- ref: https://www.rfc-editor.org/rfc/rfc822 --
-    elif mime_type == "message/rfc822" or mime_type.startswith("text"):
-        if not encoding:
-            encoding = "utf-8"
-        formatted_encoding = format_encoding_str(encoding)
-
-        if extension in [
-            ".eml",
-            ".p7s",
-            ".md",
-            ".rtf",
-            ".html",
-            ".rst",
-            ".org",
-            ".csv",
-            ".tsv",
-            ".json",
-        ]:
-            return FileType.from_extension(extension) or FileType.TXT
-
-        # NOTE(crag): for older versions of the OS libmagic package, such as is currently
-        # installed on the Unstructured docker image, .json files resolve to "text/plain"
-        # rather than "application/json". this corrects for that case.
-        if _is_text_file_a_json(
-            file=file,
-            filename=filename,
-            encoding=formatted_encoding,
-        ):
-            return FileType.JSON
-
-        if _is_text_file_a_csv(
-            file=file,
-            filename=filename,
-            encoding=formatted_encoding,
-        ):
-            return FileType.CSV
-
-        if file and _check_eml_from_buffer(file=file) is True:
-            return FileType.EML
-
-        if extension in PLAIN_TEXT_EXTENSIONS:
-            return FileType.from_extension(extension) or FileType.UNK
-
-        # Safety catch
-        if file_type := FileType.from_mime_type(mime_type):
-            return file_type
-
-        return FileType.TXT
-
-    elif mime_type == "application/octet-stream":
-        if extension == ".docx":
-            return FileType.DOCX
-        elif file:
-            return _detect_filetype_from_octet_stream(file=file)
-        else:
-            return FileType.from_extension(extension) or FileType.UNK
-
-    elif mime_type == "application/zip":
-        file_type = FileType.UNK
-        if file:
-            file_type = _detect_filetype_from_octet_stream(file=file)
-        elif filename is not None:
-            with open(filename, "rb") as f:
-                file_type = _detect_filetype_from_octet_stream(file=f)
-
-        extension = extension if extension else ""
-        return (
-            FileType.ZIP
-            if file_type in (FileType.UNK, FileType.ZIP)
-            else FileType.from_extension(extension) or file_type
-        )
-
-    elif _is_code_mime_type(mime_type):
-        # NOTE(robinson) - we'll treat all code files as plain text for now.
-        # we can update this logic and add filetypes for specific languages
-        # later if needed.
-        return FileType.TXT
-
-    elif mime_type.endswith("empty"):
-        return FileType.EMPTY
-
-    # For everything else
-    elif file_type := FileType.from_mime_type(mime_type):
-        return file_type
-
-    logger.warning(
-        f"The MIME type{f' of {filename!r}' if filename else ''} is {mime_type!r}. "
-        "This file type is not currently supported in unstructured.",
+    ctx = _FileTypeDetectionContext.new(
+        file_path=file_path,
+        file=file,
+        encoding=encoding,
+        content_type=content_type,
+        metadata_file_path=metadata_file_path,
     )
-    return FileType.from_extension(extension) or FileType.UNK
+    return _FileTypeDetector.file_type(ctx)
 
 
 def is_json_processable(
@@ -217,124 +121,476 @@ def is_json_processable(
     return re.match(LIST_OF_DICTS_PATTERN, file_text) is not None
 
 
-def _check_eml_from_buffer(file: IO[bytes] | IO[str]) -> bool:
-    """Checks if a text/plain file is actually a .eml file.
+class _FileTypeDetector:
+    """Determines file type from a variety of possible inputs."""
 
-    Uses a regex pattern to see if the start of the file matches the typical pattern for a .eml
-    file.
+    def __init__(self, ctx: _FileTypeDetectionContext):
+        self._ctx = ctx
+
+    @classmethod
+    def file_type(cls, ctx: _FileTypeDetectionContext) -> FileType:
+        """Detect file-type of document-source described by `ctx`."""
+        return cls(ctx)._file_type
+
+    @property
+    def _file_type(self) -> FileType:
+        """FileType member corresponding to this document source."""
+        # -- strategy 1: use content-type asserted by caller --
+        if file_type := self._file_type_from_content_type:
+            return file_type
+
+        # -- strategy 2: guess MIME-type using libmagic and use that --
+        if file_type := self._file_type_from_guessed_mime_type:
+            return file_type
+
+        # -- strategy 3: use filename-extension, like ".docx" -> FileType.DOCX --
+        if file_type := self._file_type_from_file_extension:
+            return file_type
+
+        # -- strategy 4: give up and report FileType.UNK --
+        return FileType.UNK
+
+    # == STRATEGIES ============================================================
+
+    @property
+    def _file_type_from_content_type(self) -> FileType | None:
+        """Map passed content-type argument to a file-type, subject to certain rules."""
+        content_type = self._ctx.content_type
+
+        # -- when no content-type was asserted by caller, this strategy is not applicable --
+        if not content_type:
+            return None
+
+        # -- otherwise we trust the passed `content_type` as long as `FileType` recognizes it --
+        return FileType.from_mime_type(content_type)
+
+    @property
+    def _file_type_from_guessed_mime_type(self) -> FileType | None:
+        """FileType based on auto-detection of MIME-type by libmagic.
+
+        In some cases refinements are necessary on the magic-derived MIME-types. This process
+        includes applying those rules, most of which are accumulated through practical experience.
+        """
+        mime_type = self._ctx.mime_type
+        extension = self._ctx.extension
+
+        # -- when libmagic is not installed, the `filetype` package is used instead.
+        # -- `filetype.guess()` returns `None` for file-types it does not support, which
+        # -- unfortunately includes all the textual file-types like CSV, EML, HTML, MD, RST, RTF,
+        # -- TSV, and TXT. When we have no guessed MIME-type, this strategy is not applicable.
+        if mime_type is None:
+            return None
+
+        # NOTE(Crag): older magic lib does not differentiate between xls and doc
+        if mime_type == "application/msword" and extension == ".xls":
+            return FileType.XLS
+
+        if mime_type.endswith("xml"):
+            return FileType.HTML if extension in (".html", ".htm") else FileType.XML
+
+        if differentiator := _TextFileDifferentiator.applies(self._ctx):
+            return differentiator.file_type
+
+        # -- applicable to "application/octet-stream", "application/zip", and all Office 2007+
+        # -- document MIME-types, i.e. those for DOCX, PPTX, and XLSX. Note however it does NOT
+        # -- apply to EPUB or ODT documents, even though those are also Zip archives. The zip and
+        # -- octet-stream MIME-types are fed in because they are ambiguous. The MS-Office types are
+        # -- differentiated because they are sometimes mistaken for each other, like DOCX mime-type
+        # -- is actually a PPTX file etc.
+        if differentiator := _ZipFileDifferentiator.applies(self._ctx, mime_type):
+            return differentiator.file_type
+
+        # -- All source-code files (e.g. *.py, *.js) are classified as plain text for the moment --
+        if self._ctx.has_code_mime_type:
+            return FileType.TXT
+
+        if mime_type.endswith("empty"):
+            return FileType.EMPTY
+
+        # -- if no more-specific rules apply, use the MIME-type -> FileType mapping when present --
+        if file_type := FileType.from_mime_type(mime_type):
+            return file_type
+
+        logger.warning(
+            f"The MIME type{f' of {self._ctx.file_path!r}' if self._ctx.file_path else ''} is"
+            f" {mime_type!r}. This file type is not currently supported in unstructured.",
+        )
+        return None
+
+    @lazyproperty
+    def _file_type_from_file_extension(self) -> FileType | None:
+        """Determine file-type from filename extension.
+
+        Returns `None` when no filename is available or when the extension does not map to a
+        supported file-type.
+        """
+        return FileType.from_extension(self._ctx.extension)
+
+
+class _FileTypeDetectionContext:
+    """Provides all arguments to auto-file detection and values derived from them.
+
+    This keeps computation of derived values out of the file-detection code but more importantly
+    allows the main filetype-detector to pass the full context to any delegates without coupling
+    itself to which values it might need.
     """
-    file.seek(0)
-    file_content = file.read(4096)
-    if isinstance(file_content, bytes):
-        file_head = file_content.decode("utf-8", errors="ignore")
-    else:
-        file_head = file_content
-    return EMAIL_HEAD_RE.match(file_head) is not None
+
+    def __init__(
+        self,
+        file_path: str | None = None,
+        *,
+        file: IO[bytes] | None = None,
+        encoding: str | None = None,
+        content_type: str | None = None,
+        metadata_file_path: str | None = None,
+    ):
+        self._file_path = file_path
+        self._file_arg = file
+        self._encoding_arg = encoding
+        self._content_type = content_type
+        self._metadata_file_path = metadata_file_path
+
+    @classmethod
+    def new(
+        cls,
+        *,
+        file_path: str | None,
+        file: IO[bytes] | None,
+        encoding: str | None,
+        content_type: str | None,
+        metadata_file_path: str | None,
+    ):
+        self = cls(
+            file_path=file_path,
+            file=file,
+            encoding=encoding,
+            content_type=content_type,
+            metadata_file_path=metadata_file_path,
+        )
+        self._validate()
+        return self
+
+    @lazyproperty
+    def content_type(self) -> str | None:
+        """MIME-type asserted by caller; not based on inspection of file by this process.
+
+        Would commonly occur when the file was downloaded via HTTP and a `"Content-Type:` header was
+        present on the response. These are often ambiguous and sometimes just wrong so get some
+        further verification. All lower-case when not `None`.
+        """
+        return self._content_type.lower() if self._content_type else None
+
+    @lazyproperty
+    def encoding(self) -> str:
+        """Character-set used to encode text of this file.
+
+        Relevant for textual file-types only, like HTML, TXT, JSON, etc.
+        """
+        return format_encoding_str(self._encoding_arg or "utf-8")
+
+    @lazyproperty
+    def extension(self) -> str:
+        """Best filename-extension we can muster, "" when there is no available source."""
+        # -- get from file_path, or file when it has a name (path) --
+        with self.open() as file:
+            if hasattr(file, "name") and file.name:
+                return os.path.splitext(file.name)[1].lower()
+
+        # -- otherwise use metadata file-path when provided --
+        if file_path := self._metadata_file_path:
+            return os.path.splitext(file_path)[1].lower()
+
+        # -- otherwise empty str means no extension, same as a path like "a/b/name-no-ext" --
+        return ""
+
+    @lazyproperty
+    def file_head(self) -> bytes:
+        """The initial bytes of the file to be recognized, for use with libmagic detection."""
+        with self.open() as file:
+            return file.read(4096)
+
+    @lazyproperty
+    def file_path(self) -> str | None:
+        """Filesystem path to file to be inspected, when provided on call.
+
+        None when the caller specified the source as a file-like object instead. Useful for user
+        feedback on an error, but users of context should have little use for it otherwise.
+        """
+        return self._file_path
+
+    @lazyproperty
+    def is_zipfile(self) -> bool:
+        """True when file is a Zip archive."""
+        with self.open() as file:
+            return zipfile.is_zipfile(file)
+
+    @lazyproperty
+    def has_code_mime_type(self) -> bool:
+        """True when `mime_type` plausibly indicates a programming language source-code file."""
+        mime_type = self.mime_type
+
+        if mime_type is None:
+            return False
+
+        # -- check Go separately to avoid matching other MIME type containing "go" --
+        if mime_type == "text/x-go":
+            return True
+
+        return any(
+            lang in mime_type
+            for lang in "c# c++ cpp csharp java javascript php python ruby swift typescript".split()
+        )
+
+    @lazyproperty
+    def mime_type(self) -> str | None:
+        """The best MIME-type we can get from `magic` (or `filetype` package).
+
+        A `str` return value is always in lower-case.
+        """
+        if LIBMAGIC_AVAILABLE:
+            import magic
+
+            mime_type = (
+                magic.from_file(_resolve_symlink(self._file_path), mime=True)
+                if self._file_path
+                else magic.from_buffer(self.file_head, mime=True)
+            )
+            return mime_type.lower() if mime_type else None
+
+        mime_type = (
+            ft.guess_mime(self._file_path) if self._file_path else ft.guess_mime(self.file_head)
+        )
+
+        if mime_type is None:
+            logger.warning(
+                "libmagic is unavailable but assists in filetype detection. Please consider"
+                " installing libmagic for better results."
+            )
+            return None
+
+        return mime_type.lower()
+
+    @contextlib.contextmanager
+    def open(self) -> Iterator[IO[bytes]]:
+        """Encapsulates complexity of dealing with file-path or file-like-object.
+
+        Provides an `IO[bytes]` object as the "common-denominator" document source.
+
+        Must be used as a context manager using a `with` statement:
+
+            with self._file as file:
+                do things with file
+
+        File is guaranteed to be at read position 0 when called.
+        """
+        if self._file_path:
+            with open(self._file_path, "rb") as f:
+                yield f
+        else:
+            file = self._file_arg
+            assert file is not None  # -- guaranteed by `._validate()` --
+            file.seek(0)
+            yield file
+
+    @lazyproperty
+    def text_head(self) -> str:
+        """The initial characters of the text file for use with text-format differentiation.
+
+        Raises:
+            UnicodeDecodeError if file cannot be read as text.
+        """
+        # TODO: only attempts fallback character-set detection for file-path case, not for
+        # file-like object case. Seems like we should do both.
+
+        if file := self._file_arg:
+            file.seek(0)
+            content = file.read(4096)
+            file.seek(0)
+            return (
+                content
+                if isinstance(content, str)
+                else content.decode(encoding=self.encoding, errors="ignore")
+            )
+
+        file_path = self._file_path
+        assert file_path is not None  # -- guaranteed by `._validate` --
+
+        try:
+            with open(file_path, encoding=self.encoding) as f:
+                return f.read(4096)
+        except UnicodeDecodeError:
+            encoding, _ = detect_file_encoding(filename=file_path)
+            with open(file_path, encoding=encoding) as f:
+                return f.read(4096)
+
+    def _validate(self) -> None:
+        """Raise if the context is invalid."""
+        if self._file_path and not os.path.isfile(self._file_path):
+            raise FileNotFoundError(f"no such file {self._file_path}")
+        if not self._file_path and not self._file_arg:
+            raise ValueError("either `file_path` or `file` argument must be provided")
 
 
-def _detect_filetype_from_octet_stream(file: IO[bytes]) -> FileType:
-    """Detects the filetype, given a file with an application/octet-stream MIME type."""
-    file.seek(0)
-    if zipfile.is_zipfile(file):
-        file.seek(0)
-        archive = zipfile.ZipFile(file)
+class _TextFileDifferentiator:
+    """Refine a textual file-type that may not be as specific as it could be."""
 
-        # NOTE(robinson) - .docx.xlsx files are actually zip file with a .docx/.xslx extension.
-        # If the MIME type is application/octet-stream, we check if it's a .docx/.xlsx file by
-        # looking for expected filenames within the zip file.
-        archive_filenames = [f.filename for f in archive.filelist]
-        if all(f in archive_filenames for f in ("docProps/core.xml", "word/document.xml")):
-            return FileType.DOCX
-        elif all(f in archive_filenames for f in ("xl/workbook.xml",)):
-            return FileType.XLSX
-        elif all(f in archive_filenames for f in ("docProps/core.xml", "ppt/presentation.xml")):
-            return FileType.PPTX
+    def __init__(self, ctx: _FileTypeDetectionContext):
+        self._ctx = ctx
 
-    if LIBMAGIC_AVAILABLE:
-        import magic
+    @classmethod
+    def applies(cls, ctx: _FileTypeDetectionContext) -> _TextFileDifferentiator | None:
+        """Constructs an instance, but only if this differentiator applies in `ctx`."""
+        mime_type = ctx.mime_type
+        return (
+            cls(ctx)
+            if mime_type and (mime_type == "message/rfc822" or mime_type.startswith("text"))
+            else None
+        )
 
-        # Infer mime type using magic if octet-stream is not zip file
-        mime_type = magic.from_buffer(file.read(4096), mime=True)
-        return FileType.from_mime_type(mime_type) or FileType.UNK
-    logger.warning(
-        "Could not detect the filetype from application/octet-stream MIME type.",
-    )
-    return FileType.UNK
+    @lazyproperty
+    def file_type(self) -> FileType:
+        """Differentiated file-type for textual content.
+
+        Always produces a file-type, worst case that's `FileType.TXT` when nothing more specific
+        applies.
+        """
+        extension = self._ctx.extension
+
+        if extension in ".csv .eml .html .json .md .org .p7s .rst .rtf .tab .tsv".split():
+            return FileType.from_extension(extension) or FileType.TXT
+
+        # NOTE(crag): for older versions of the OS libmagic package, such as is currently
+        # installed on the Unstructured docker image, .json files resolve to "text/plain"
+        # rather than "application/json". this corrects for that case.
+        if self._is_json:
+            return FileType.JSON
+
+        if self._is_csv:
+            return FileType.CSV
+
+        if self._is_eml:
+            return FileType.EML
+
+        if extension in (".text", ".txt"):
+            return FileType.TXT
+
+        # Safety catch
+        if file_type := FileType.from_mime_type(self._ctx.mime_type):
+            return file_type
+
+        return FileType.TXT
+
+    @lazyproperty
+    def _is_csv(self) -> bool:
+        """True when file is plausibly in Comma Separated Values (CSV) format."""
+
+        def count_commas(text: str):
+            """Counts the number of commas in a line, excluding commas in quotes."""
+            pattern = r"(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$),"
+            matches = re.findall(pattern, text)
+            return len(matches)
+
+        lines = self._ctx.text_head.strip().splitlines()
+        if len(lines) < 2:
+            return False
+        # -- check at most the first 10 lines --
+        lines = lines[: len(lines)] if len(lines) < 10 else lines[:10]
+        # -- any lines without at least one comma disqualifies the file --
+        if any("," not in line for line in lines):
+            return False
+        header_count = count_commas(lines[0])
+        return all(count_commas(line) == header_count for line in lines[1:])
+
+    @lazyproperty
+    def _is_eml(self) -> bool:
+        """Checks if a text/plain file is actually a .eml file.
+
+        Uses a regex pattern to see if the start of the file matches the typical pattern for a .eml
+        file.
+        """
+        return EMAIL_HEAD_RE.match(self._ctx.text_head) is not None
+
+    @lazyproperty
+    def _is_json(self) -> bool:
+        """True when file is JSON collection.
+
+        A JSON file that contains only a string, number, or boolean, while valid JSON, will fail
+        this test since it is not partitionable.
+        """
+        text_head = self._ctx.text_head
+
+        # -- an empty file is not JSON --
+        if not text_head:
+            return False
+
+        # -- has to be a list or object, no string, number, or bool --
+        if text_head.lstrip()[0] not in "[{":
+            return False
+
+        try:
+            with self._ctx.open() as file:
+                json.load(file)
+            return True
+        except json.JSONDecodeError:
+            return False
 
 
-def _is_code_mime_type(mime_type: str) -> bool:
-    """True when `mime_type` plausibly indicates a programming language source-code file."""
-    PROGRAMMING_LANGUAGES = [
-        "javascript",
-        "python",
-        "java",
-        "c++",
-        "cpp",
-        "csharp",
-        "c#",
-        "php",
-        "ruby",
-        "swift",
-        "typescript",
-    ]
-    mime_type = mime_type.lower()
-    # NOTE(robinson) - check this one explicitly to avoid conflicts with other
-    # MIME types that contain "go"
-    if mime_type == "text/x-go":
-        return True
-    return any(language in mime_type for language in PROGRAMMING_LANGUAGES)
+class _ZipFileDifferentiator:
+    """Refine a Zip-packaged file-type that may be ambiguous or swapped."""
 
+    def __init__(self, ctx: _FileTypeDetectionContext):
+        self._ctx = ctx
 
-def _is_text_file_a_csv(
-    filename: Optional[str] = None,
-    file: Optional[IO[bytes]] = None,
-    encoding: Optional[str] = "utf-8",
-):
-    """Detects if a file that has a text/plain MIME type is a CSV file."""
+    @classmethod
+    def applies(
+        cls, ctx: _FileTypeDetectionContext, mime_type: str
+    ) -> _ZipFileDifferentiator | None:
+        """Constructs an instance, but only if this differentiator applies for `mime_type`.
 
-    def count_commas(text: str):
-        """Counts the number of commas in a line, excluding commas in quotes."""
-        pattern = r"(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$),"
-        matches = re.findall(pattern, text)
-        return len(matches)
+        Separate `mime_type` argument allows it to be applied to either asserted content-type or
+        guessed mime-type.
+        """
+        return (
+            cls(ctx)
+            if mime_type
+            in (
+                "application/octet-stream",
+                "application/zip",
+                "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+                "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+                "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            )
+            else None
+        )
 
-    file_text = _read_file_start_for_type_check(
-        file=file,
-        filename=filename,
-        encoding=encoding,
-    )
-    lines = file_text.strip().splitlines()
-    if len(lines) < 2:
-        return False
-    lines = lines[: len(lines)] if len(lines) < 10 else lines[:10]
-    header_count = count_commas(lines[0])
-    if any("," not in line for line in lines):
-        return False
-    return all(count_commas(line) == header_count for line in lines[1:])
+    @lazyproperty
+    def file_type(self) -> FileType | None:
+        """Differentiated file-type for a Zip archive.
 
+        Returns `None` if the file is not a Zip archive. Otherwise it returns `FileType.DOCX`,
+        `FileType.PPTX`, or `FileType.XLSX` when one of those applies and `FileType.ZIP` otherwise.
+        """
+        if not self._ctx.is_zipfile:
+            return None
 
-def _is_text_file_a_json(
-    filename: Optional[str] = None,
-    file: Optional[IO[bytes]] = None,
-    encoding: Optional[str] = "utf-8",
-):
-    """Detects if a file that has a text/plain MIME type is a JSON file."""
-    file_text = _read_file_start_for_type_check(
-        file=file,
-        filename=filename,
-        encoding=encoding,
-    )
-    try:
-        output = json.loads(file_text)
-        # NOTE(robinson) - Per RFC 4627 which defines the application/json media type,
-        # a string is a valid JSON. For our purposes, however, we want to treat that
-        # as a text file even if it is serializable as json.
-        # References:
-        # https://stackoverflow.com/questions/7487869/is-this-simple-string-considered-valid-json
-        # https://www.ietf.org/rfc/rfc4627.txt
-        return not isinstance(output, str)
-    except json.JSONDecodeError:
-        return False
+        with self._ctx.open() as file:
+            zip = zipfile.ZipFile(file)
+
+            # NOTE(robinson) - .docx and .xlsx files are actually a zip file with a .docx/.xslx
+            # extension. If the MIME type is application/octet-stream, we check if it's a
+            # .docx/.xlsx file by looking for expected filenames within the zip file.
+            filenames = [f.filename for f in zip.filelist]
+
+            if all(f in filenames for f in ("word/document.xml",)):
+                return FileType.DOCX
+
+            if all(f in filenames for f in ("xl/workbook.xml",)):
+                return FileType.XLSX
+
+            if all(f in filenames for f in ("ppt/presentation.xml",)):
+                return FileType.PPTX
+
+        return FileType.ZIP
 
 
 def _read_file_start_for_type_check(
@@ -379,9 +635,9 @@ def _resolve_symlink(file_path: str) -> str:
 _P = ParamSpec("_P")
 
 
-def add_metadata(func: Callable[_P, List[Element]]) -> Callable[_P, List[Element]]:
+def add_metadata(func: Callable[_P, list[Element]]) -> Callable[_P, list[Element]]:
     @functools.wraps(func)
-    def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> List[Element]:
+    def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> list[Element]:
         elements = func(*args, **kwargs)
         call_args = get_call_args_applying_defaults(func, *args, **kwargs)
         include_metadata = call_args.get("include_metadata", True)
@@ -412,7 +668,7 @@ def add_metadata(func: Callable[_P, List[Element]]) -> Callable[_P, List[Element
 
 def add_filetype(
     filetype: FileType,
-) -> Callable[[Callable[_P, List[Element]]], Callable[_P, List[Element]]]:
+) -> Callable[[Callable[_P, list[Element]]], Callable[_P, list[Element]]]:
     """Post-process element-metadata for list[Element] from partitioning.
 
     This decorator adds a post-processing step to a document partitioner.
@@ -423,9 +679,9 @@ def add_filetype(
 
     """
 
-    def decorator(func: Callable[_P, List[Element]]) -> Callable[_P, List[Element]]:
+    def decorator(func: Callable[_P, list[Element]]) -> Callable[_P, list[Element]]:
         @functools.wraps(func)
-        def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> List[Element]:
+        def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> list[Element]:
             elements = func(*args, **kwargs)
             params = get_call_args_applying_defaults(func, *args, **kwargs)
             include_metadata = params.get("include_metadata", True)
@@ -447,10 +703,10 @@ def add_filetype(
 
 def add_metadata_with_filetype(
     filetype: FileType,
-) -> Callable[[Callable[_P, List[Element]]], Callable[_P, List[Element]]]:
+) -> Callable[[Callable[_P, list[Element]]], Callable[_P, list[Element]]]:
     """..."""
 
-    def decorator(func: Callable[_P, List[Element]]) -> Callable[_P, List[Element]]:
+    def decorator(func: Callable[_P, list[Element]]) -> Callable[_P, list[Element]]:
         return add_filetype(filetype=filetype)(add_metadata(func))
 
     return decorator
diff --git a/unstructured/file_utils/model.py b/unstructured/file_utils/model.py
index 6c285a704..0fe0caa63 100644
--- a/unstructured/file_utils/model.py
+++ b/unstructured/file_utils/model.py
@@ -76,12 +76,14 @@ class FileType(enum.Enum):
         return None
 
     @classmethod
-    def from_mime_type(cls, mime_type: str) -> FileType | None:
+    def from_mime_type(cls, mime_type: str | None) -> FileType | None:
         """Select a FileType member based on a MIME-type.
 
         Returns `None` when `mime_type` is `None` or does not map to the canonical MIME-type of a
         `FileType` member or one of its alias MIME-types.
         """
+        if mime_type is None:
+            return None
         # -- not super efficient but plenty fast enough for once-or-twice-per-file use and avoids
         # -- limitations on defining a class variable on an Enum.
         for m in cls.__members__.values():
@@ -434,6 +436,3 @@ class FileType(enum.Enum):
         "inode/x-empty",
         cast(list[str], []),
     )
-
-
-PLAIN_TEXT_EXTENSIONS = ".csv .eml .html .json .md .org .p7s .rst .rtf .tab .text .tsv .txt".split()
diff --git a/unstructured/metrics/element_type.py b/unstructured/metrics/element_type.py
index 3e4e8cbf8..6511900a4 100644
--- a/unstructured/metrics/element_type.py
+++ b/unstructured/metrics/element_type.py
@@ -1,10 +1,23 @@
+from __future__ import annotations
+
 import json
-from typing import Dict, Optional, Tuple, Union
+
+from typing_extensions import TypeAlias
+
+FrequencyDict: TypeAlias = "dict[tuple[str, int | None], int]"
+"""Like:
+    {
+        ("ListItem", 0): 2,
+        ("NarrativeText", None): 2,
+        ("Title", 0): 5,
+        ("UncategorizedText", None): 6,
+    }
+"""
 
 
 def get_element_type_frequency(
     elements: str,
-) -> Union[Dict[Tuple[str, Optional[int]], int], Dict]:
+) -> FrequencyDict:
     """
     Calculate the frequency of Element Types from a list of elements.
 
@@ -13,7 +26,7 @@ def get_element_type_frequency(
     Returns:
         Element type and its frequency in dictionary format.
     """
-    frequency: Dict = {}
+    frequency: dict[tuple[str, int | None], int] = {}
     if len(elements) == 0:
         return frequency
     for element in json.loads(elements):
@@ -28,14 +41,14 @@ def get_element_type_frequency(
 
 
 def calculate_element_type_percent_match(
-    output: Dict,
-    source: Dict,
+    output: FrequencyDict,
+    source: FrequencyDict,
     category_depth_weight: float = 0.5,
 ) -> float:
-    """
-    Calculate the percent match between two frequency dictionary. Intended to use with
-    `get_element_type_frequency` function. The function counts the absolute exact match
-    (type and depth), and counts the weighted match (correct type but different depth),
+    """Calculate the percent match between two frequency dictionary.
+
+    Intended to use with `get_element_type_frequency` function. The function counts the absolute
+    exact match (type and depth), and counts the weighted match (correct type but different depth),
     then normalized with source's total elements.
     """
     if len(output) == 0 or len(source) == 0:
@@ -46,8 +59,8 @@ def calculate_element_type_percent_match(
     total_source_element_count = 0
     total_match_element_count = 0
 
-    unmatched_depth_output = {}
-    unmatched_depth_source = {}
+    unmatched_depth_output: dict[str, int] = {}
+    unmatched_depth_source: dict[str, int] = {}
 
     # loop through the output list to find match with source
     for k, _ in output_copy.items():
@@ -80,12 +93,12 @@ def calculate_element_type_percent_match(
     return min(max(total_match_element_count / total_source_element_count, 0.0), 1.0)
 
 
-def _convert_to_frequency_without_depth(d: Dict) -> Dict:
+def _convert_to_frequency_without_depth(d: FrequencyDict) -> dict[str, int]:
     """
     Takes in element frequency with depth of format (type, depth): value
     and converts to dictionary without depth of format type: value
     """
-    res = {}
+    res: dict[str, int] = {}
     for k, v in d.items():
         element_type = k[0]
         if element_type not in res:
diff --git a/unstructured/partition/auto.py b/unstructured/partition/auto.py
index b7cad8055..21c15d2f4 100644
--- a/unstructured/partition/auto.py
+++ b/unstructured/partition/auto.py
@@ -184,11 +184,11 @@ def partition(
                 "The headers kwarg will be ignored.",
             )
         file_type = detect_filetype(
-            filename=filename,
+            file_path=filename,
             file=file,
-            file_filename=metadata_filename,
-            content_type=content_type,
             encoding=encoding,
+            content_type=content_type,
+            metadata_file_path=metadata_filename,
         )
 
     if file is not None:
@@ -471,12 +471,13 @@ def file_and_type_from_url(
     response = requests.get(url, headers=headers, verify=ssl_verify, timeout=request_timeout)
     file = io.BytesIO(response.content)
 
-    content_type = (
-        content_type or response.headers.get("Content-Type", "").split(";")[0].strip().lower()
-    )
-    encoding = response.headers.get("Content-Encoding", "utf-8")
+    if content_type := content_type or response.headers.get("Content-Type", None):
+        content_type = content_type.split(";")[0].strip().lower()
 
-    filetype = detect_filetype(file=file, content_type=content_type, encoding=encoding)
+    # -- non-None when response is textual --
+    encoding = response.encoding
+
+    filetype = detect_filetype(file=file, encoding=encoding, content_type=content_type)
     return file, filetype
 
 
diff --git a/unstructured/staging/base.py b/unstructured/staging/base.py
index 331c860a9..429195f68 100644
--- a/unstructured/staging/base.py
+++ b/unstructured/staging/base.py
@@ -133,10 +133,12 @@ def elements_to_json(
     filename: Optional[str] = None,
     indent: int = 4,
     encoding: str = "utf-8",
-) -> Optional[str]:
-    """Saves a list of elements to a JSON file if filename is specified.
+) -> str:
+    """Serialize `elements` to a JSON array.
 
-    Otherwise, return the list of elements as a string.
+    Also writes the JSON to `filename` if it is provided, encoded using `encoding`.
+
+    The JSON is returned as a string.
     """
     # -- serialize `elements` as a JSON array (str) --
     precision_adjusted_elements = _fix_metadata_field_precision(elements)
@@ -146,7 +148,6 @@ def elements_to_json(
     if filename is not None:
         with open(filename, "w", encoding=encoding) as f:
             f.write(json_str)
-        return None
 
     return json_str