rfctr(auto): add _PartitionerLoader (#3418)

**Summary** Replace conditional explicit import of partitioner modules in `.partition.auto` with the new `_PartitionerLoader` class. This avoids unbound variable warnings and is much less noisy. `_PartitionerLoader` makes use of the new `FileType` property `.importable_package_dependencies` to determine whether all required packages are importable before dispatching the file to its partitioner. It uses `FileType.extra_name` to form a helpful error message when a dependency is not installed, so the caller knows which `pip install` extra to specify to remedy the error. `PartitionerLoader` uses the `FileType` properties `.partitioner_module_qname` and `partitioner_function_name` to load the partitioner once its dependencies are verified. Loaded partitioners are cached with module lifetime scope for efficiency.
2025-12-05 11:32:35 +00:00 · 2024-07-21 23:03:55 -07:00 · 2024-07-21 23:03:55 -07:00 · 49c4bd34be
commit 49c4bd34be
parent ec59abfabc
7 changed files with 626 additions and 284 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,3 +1,11 @@
+## 0.15.1-dev0
+
+### Enhancements
+
+### Features
+
+### Fixes
+
 ## 0.15.0

 ### Enhancements
--- a/test_unstructured/file_utils/test_model.py
+++ b/test_unstructured/file_utils/test_model.py
@ -50,6 +50,68 @@ class DescribeFileType:
    def but_not_when_that_mime_type_is_not_registered_by_a_file_type(self, mime_type: str):
        assert FileType.from_mime_type(mime_type) is None

+    @pytest.mark.parametrize(
+        ("file_type", "expected_value"),
+        [
+            (FileType.BMP, ("unstructured_inference",)),
+            (FileType.CSV, ("pandas",)),
+            (FileType.DOC, ("docx",)),
+            (FileType.EMPTY, ()),
+            (FileType.HTML, ()),
+            (FileType.ODT, ("docx", "pypandoc")),
+            (FileType.PDF, ("pdf2image", "pdfminer", "PIL")),
+            (FileType.UNK, ()),
+            (FileType.WAV, ()),
+            (FileType.ZIP, ()),
+        ],
+    )
+    def it_knows_which_importable_packages_its_partitioner_depends_on(
+        self, file_type: FileType, expected_value: tuple[str, ...]
+    ):
+        assert file_type.importable_package_dependencies == expected_value
+
+    @pytest.mark.parametrize(
+        ("file_type", "expected_value"),
+        [
+            (FileType.BMP, "image"),
+            (FileType.DOC, "doc"),
+            (FileType.DOCX, "docx"),
+            (FileType.EML, None),
+            (FileType.EMPTY, None),
+            (FileType.MSG, "msg"),
+            (FileType.PDF, "pdf"),
+            (FileType.XLS, "xlsx"),
+            (FileType.UNK, None),
+            (FileType.WAV, None),
+            (FileType.ZIP, None),
+        ],
+    )
+    def and_it_knows_which_pip_extra_needs_to_be_installed_to_get_those_dependencies(
+        self, file_type: FileType, expected_value: str | None
+    ):
+        assert file_type.extra_name == expected_value
+
+    @pytest.mark.parametrize(
+        ("file_type", "expected_value"),
+        [
+            (FileType.BMP, True),
+            (FileType.CSV, True),
+            (FileType.DOC, True),
+            (FileType.EML, True),
+            (FileType.JPG, True),
+            (FileType.PDF, True),
+            (FileType.PPTX, True),
+            (FileType.WAV, False),
+            (FileType.ZIP, False),
+            (FileType.EMPTY, False),
+            (FileType.UNK, False),
+        ],
+    )
+    def it_knows_whether_files_of_its_type_are_directly_partitionable(
+        self, file_type: FileType, expected_value: str
+    ):
+        assert file_type.is_partitionable is expected_value
+
    @pytest.mark.parametrize(
        ("file_type", "mime_type"),
        [
@ -68,3 +130,72 @@ class DescribeFileType:
    )
    def it_knows_its_canonical_MIME_type(self, file_type: FileType, mime_type: str):
        assert file_type.mime_type == mime_type
+
+    @pytest.mark.parametrize(
+        ("file_type", "expected_value"),
+        [
+            (FileType.BMP, "partition_image"),
+            (FileType.CSV, "partition_csv"),
+            (FileType.DOC, "partition_doc"),
+            (FileType.DOCX, "partition_docx"),
+            (FileType.JPG, "partition_image"),
+            (FileType.PNG, "partition_image"),
+            (FileType.TIFF, "partition_image"),
+        ],
+    )
+    def it_knows_its_partitioner_function_name(self, file_type: FileType, expected_value: str):
+        assert file_type.partitioner_function_name == expected_value
+
+    @pytest.mark.parametrize(
+        "file_type", [FileType.WAV, FileType.ZIP, FileType.EMPTY, FileType.UNK]
+    )
+    def but_it_raises_on_partitioner_function_name_access_when_the_file_type_is_not_partitionable(
+        self, file_type: FileType
+    ):
+        with pytest.raises(ValueError, match="`.partitioner_function_name` is undefined because "):
+            file_type.partitioner_function_name
+
+    @pytest.mark.parametrize(
+        ("file_type", "expected_value"),
+        [
+            (FileType.BMP, "unstructured.partition.image"),
+            (FileType.CSV, "unstructured.partition.csv"),
+            (FileType.DOC, "unstructured.partition.doc"),
+            (FileType.DOCX, "unstructured.partition.docx"),
+            (FileType.JPG, "unstructured.partition.image"),
+            (FileType.PNG, "unstructured.partition.image"),
+            (FileType.TIFF, "unstructured.partition.image"),
+        ],
+    )
+    def it_knows_the_fully_qualified_name_of_its_partitioner_module(
+        self, file_type: FileType, expected_value: str
+    ):
+        assert file_type.partitioner_module_qname == expected_value
+
+    @pytest.mark.parametrize(
+        "file_type", [FileType.WAV, FileType.ZIP, FileType.EMPTY, FileType.UNK]
+    )
+    def but_it_raises_on_partitioner_module_qname_access_when_the_file_type_is_not_partitionable(
+        self, file_type: FileType
+    ):
+        with pytest.raises(ValueError, match="`.partitioner_module_qname` is undefined because "):
+            file_type.partitioner_module_qname
+
+    @pytest.mark.parametrize(
+        ("file_type", "expected_value"),
+        [
+            (FileType.BMP, "image"),
+            (FileType.CSV, "csv"),
+            (FileType.DOC, "doc"),
+            (FileType.DOCX, "docx"),
+            (FileType.JPG, "image"),
+            (FileType.PNG, "image"),
+            (FileType.TIFF, "image"),
+            (FileType.XLS, "xlsx"),
+            (FileType.XLSX, "xlsx"),
+        ],
+    )
+    def it_provides_access_to_the_partitioner_shortname(
+        self, file_type: FileType, expected_value: str
+    ):
+        assert file_type.partitioner_shortname == expected_value
--- a/test_unstructured/partition/test_auto.py
+++ b/test_unstructured/partition/test_auto.py
@ -10,7 +10,7 @@ import sys
 import tempfile
 import warnings
 from importlib import import_module
-from typing import Callable, Iterator, cast
+from typing import Iterator, cast
 from unittest.mock import patch

 import pytest
@ -27,7 +27,6 @@ from test_unstructured.unit_utils import (
    ANY,
    FixtureRequest,
    LogCaptureFixture,
-    MonkeyPatch,
    example_doc_path,
    function_mock,
    method_mock,
@ -46,8 +45,7 @@ from unstructured.documents.elements import (
    Title,
 )
 from unstructured.file_utils.model import FileType
-from unstructured.partition import auto
-from unstructured.partition.auto import IMAGE_FILETYPES, _get_partition_with_extras, partition
+from unstructured.partition.auto import _PartitionerLoader, partition
 from unstructured.partition.utils.constants import PartitionStrategy
 from unstructured.staging.base import elements_from_json, elements_to_dicts, elements_to_json

@ -570,16 +568,21 @@ def test_auto_partition_pdf_from_file(pass_metadata_filename: bool, content_type
    assert e.text.startswith("Zejiang Shen")


-def test_auto_partition_pdf_with_fast_strategy(monkeypatch: MonkeyPatch):
+def test_auto_partition_pdf_with_fast_strategy(request: FixtureRequest):
+    partition_pdf_ = function_mock(
+        request,
+        "unstructured.partition.pdf.partition_pdf",
+        return_value=[NarrativeText("Hello there!")],
+    )
+    partitioner_loader_get_ = method_mock(
+        request, _PartitionerLoader, "get", return_value=partition_pdf_
+    )
    file_path = example_doc_path("pdf/layout-parser-paper-fast.pdf")

-    mock_return = [NarrativeText("Hello there!")]
-    with patch.object(auto, "partition_pdf", return_value=mock_return) as mock_partition:
-        mock_partition_with_extras_map = {"pdf": mock_partition}
-        monkeypatch.setattr(auto, "PARTITION_WITH_EXTRAS_MAP", mock_partition_with_extras_map)
-        partition(filename=file_path, strategy=PartitionStrategy.FAST)
+    partition(file_path, strategy=PartitionStrategy.FAST)

-    mock_partition.assert_called_once_with(
+    partitioner_loader_get_.assert_called_once_with(ANY, FileType.PDF)
+    partition_pdf_.assert_called_once_with(
        filename=file_path,
        file=None,
        url=None,
@ -919,10 +922,10 @@ def test_auto_partition_xml_from_file_with_tags():

 def test_auto_partition_raises_with_bad_type(request: FixtureRequest):
    detect_filetype_ = function_mock(
-        request, "unstructured.partition.auto.detect_filetype", return_value=None
+        request, "unstructured.partition.auto.detect_filetype", return_value=FileType.UNK
    )

-    with pytest.raises(ValueError, match="Invalid file made-up.fake. The None file type is not "):
+    with pytest.raises(ValueError, match="Invalid file made-up.fake. The FileType.UNK file type "):
        partition(filename="made-up.fake", strategy=PartitionStrategy.HI_RES)

    detect_filetype_.assert_called_once_with(
@ -1026,23 +1029,7 @@ def test_auto_partition_respects_detect_language_per_element_arg():


@pytest.mark.parametrize(
-    "file_extension",
-    [
-        "doc",
-        "docx",
-        "eml",
-        "epub",
-        "html",
-        "md",
-        "odt",
-        "org",
-        "ppt",
-        "pptx",
-        "rst",
-        "rtf",
-        "txt",
-        "xml",
-    ],
+    "file_extension", "doc docx eml epub html md odt org ppt pptx rst rtf txt xml".split()
 )
 def test_auto_partition_respects_language_arg(file_extension: str):
    elements = partition(
@ -1167,7 +1154,7 @@ def test_auto_partition_respects_skip_infer_table_types(


@pytest.mark.parametrize(
-    ("content_type", "filetype_shortname", "expected_value"),
+    ("content_type", "shortname", "expected_value"),
    [
        ("text/csv", "csv", "text/csv"),
        ("text/html", "html", "text/html"),
@ -1177,22 +1164,23 @@ def test_auto_partition_respects_skip_infer_table_types(
 def test_auto_partition_adds_filetype_to_metadata(
    request: FixtureRequest,
    content_type: str,
-    filetype_shortname: str,
+    shortname: str,
    expected_value: str | None,
-    monkeypatch: MonkeyPatch,
 ):
    partition_fn_ = function_mock(
        request,
-        f"unstructured.partition.auto.partition_{filetype_shortname}",
+        f"unstructured.partition.{shortname}.partition_{shortname}",
        return_value=[Text("text 1"), Text("text 2")],
    )
-    mock_partition_with_extras_map = {filetype_shortname: partition_fn_}
-    monkeypatch.setattr(auto, "PARTITION_WITH_EXTRAS_MAP", mock_partition_with_extras_map)
+    partitioner_loader_get_ = method_mock(
+        request, _PartitionerLoader, "get", return_value=partition_fn_
+    )

    elements = partition(
        example_doc_path("pdf/layout-parser-paper-fast.pdf"), content_type=content_type
    )

+    partitioner_loader_get_.assert_called_once()
    assert len(elements) == 2
    assert all(e.metadata.filetype == expected_value for e in elements)

@ -1207,20 +1195,23 @@ def test_auto_partition_adds_filetype_to_metadata(
    ],
 )
 def test_auto_partition_overwrites_any_filetype_applied_by_file_specific_partitioner(
-    request: FixtureRequest, content_type: str | None, monkeypatch: MonkeyPatch
+    request: FixtureRequest, content_type: str | None
 ):
    metadata = ElementMetadata(filetype="imapdf")
    partition_pdf_ = function_mock(
        request,
-        "unstructured.partition.auto.partition_pdf",
+        "unstructured.partition.pdf.partition_pdf",
        return_value=[Text("text 1", metadata=metadata), Text("text 2", metadata=metadata)],
    )
-    monkeypatch.setattr(auto, "PARTITION_WITH_EXTRAS_MAP", {"pdf": partition_pdf_})
+    partitioner_loader_get_ = method_mock(
+        request, _PartitionerLoader, "get", return_value=partition_pdf_
+    )

    elements = partition(
        example_doc_path("pdf/layout-parser-paper-fast.pdf"), content_type=content_type
    )

+    partitioner_loader_get_.assert_called_once_with(ANY, FileType.PDF)
    assert len(elements) == 2
    assert all(e.metadata.filetype == "application/pdf" for e in elements)

@ -1231,7 +1222,7 @@ def test_auto_partition_overwrites_any_filetype_applied_by_file_specific_partiti
        t
        for t in FileType
        if t not in (FileType.EMPTY, FileType.UNK, FileType.WAV, FileType.XLS, FileType.ZIP)
-        and t not in IMAGE_FILETYPES
+        and t.partitioner_function_name != "partition_image"
    ],
 )
 def test_auto_partition_applies_the_correct_filetype_for_all_filetypes(filetype: FileType):
@ -1305,10 +1296,18 @@ def test_auto_partition_from_file_works_on_empty_file():
        assert partition(file=f) == []


-def test_auto_partition_requiring_extras_prompts_to_install_missing_dependencies():
-    partition_with_extras_map: dict[str, Callable[..., list[Element]]] = {}
-    with pytest.raises(ImportError, match="partition_pdf is not available. Install the pdf depen"):
-        _get_partition_with_extras("pdf", partition_with_extras_map)
+def test_auto_partition_that_requires_extras_raises_when_dependencies_are_not_installed(
+    request: FixtureRequest,
+):
+    _PartitionerLoader._partitioners.pop(FileType.PDF, None)
+    dependency_exists_ = function_mock(
+        request, "unstructured.partition.auto.dependency_exists", return_value=False
+    )
+    match = r"partition_pdf\(\) is not available because one or more dependencies are not installed"
+    with pytest.raises(ImportError, match=match):
+        partition(example_doc_path("layout-parser-paper-fast.pdf"))
+
+    dependency_exists_.assert_called_once_with("pdf2image")


 # ================================================================================================
--- a/unstructured/version.py
+++ b/unstructured/version.py
@ -1 +1 @@
-__version__ = "0.15.0"  # pragma: no cover
+__version__ = "0.15.1-dev0"  # pragma: no cover
--- a/unstructured/file_utils/filetype.py
+++ b/unstructured/file_utils/filetype.py
@ -32,7 +32,7 @@ def detect_filetype(
    file: Optional[IO[bytes]] = None,
    file_filename: Optional[str] = None,
    encoding: Optional[str] = "utf-8",
-) -> Optional[FileType]:
+) -> FileType:
    """Use libmagic to determine a file's type.

    Helps determine which partition brick to use for a given file. A return value of None indicates
@ -122,7 +122,7 @@ def detect_filetype(
            ".tsv",
            ".json",
        ]:
-            return FileType.from_extension(extension)
+            return FileType.from_extension(extension) or FileType.TXT

        # NOTE(crag): for older versions of the OS libmagic package, such as is currently
        # installed on the Unstructured docker image, .json files resolve to "text/plain"
--- a/unstructured/file_utils/model.py
+++ b/unstructured/file_utils/model.py
@ -12,7 +12,17 @@ class FileType(enum.Enum):
    Note not all of these can be partitioned, e.g. WAV and ZIP have no partitioner.
    """

+    _partitioner_shortname: str | None
+    """Like "docx", from which partitioner module and function-name can be derived via template."""
+
+    _importable_package_dependencies: tuple[str, ...]
+    """Packages that must be available for import for this file-type's partitioner to work."""
+
+    _extra_name: str | None
+    """`pip install` extra that provides package dependencies for this file-type."""
+
    _extensions: tuple[str, ...]
+    """Filename-extensions recognized as this file-type. Use for secondary identification only."""

    _canonical_mime_type: str
    """The MIME-type used as `.metadata.filetype` for this file-type."""
@ -23,12 +33,18 @@ class FileType(enum.Enum):
    def __new__(
        cls,
        value: str,
+        partitioner_shortname: str | None,
+        importable_package_dependencies: Iterable[str],
+        extra_name: str | None,
        extensions: Iterable[str],
        canonical_mime_type: str,
        alias_mime_types: Iterable[str],
    ):
        self = object.__new__(cls)
        self._value_ = value
+        self._partitioner_shortname = partitioner_shortname
+        self._importable_package_dependencies = tuple(importable_package_dependencies)
+        self._extra_name = extra_name
        self._extensions = tuple(extensions)
        self._canonical_mime_type = canonical_mime_type
        self._alias_mime_types = tuple(alias_mime_types)
@ -41,8 +57,150 @@ class FileType(enum.Enum):
        """
        return self.name < other.name

-    BMP = ("bmp", [".bmp"], "image/bmp", cast(list[str], []))
+    @classmethod
+    def from_extension(cls, extension: str | None) -> FileType | None:
+        """Select a FileType member based on an extension.
+
+        `extension` must include the leading period, like `".pdf"`. Extension is suitable as a
+        secondary file-type identification method but is unreliable for primary identification.
+
+        Returns `None` when `extension` is not registered for any supported file-type.
+        """
+        if extension in (None, "", "."):
+            return None
+        # -- not super efficient but plenty fast enough for once-or-twice-per-file use and avoids
+        # -- limitations on defining a class variable on an Enum.
+        for m in cls.__members__.values():
+            if extension in m._extensions:
+                return m
+        return None
+
+    @classmethod
+    def from_mime_type(cls, mime_type: str) -> FileType | None:
+        """Select a FileType member based on a MIME-type.
+
+        Returns `None` when `mime_type` is `None` or does not map to the canonical MIME-type of a
+        `FileType` member or one of its alias MIME-types.
+        """
+        # -- not super efficient but plenty fast enough for once-or-twice-per-file use and avoids
+        # -- limitations on defining a class variable on an Enum.
+        for m in cls.__members__.values():
+            if mime_type == m._canonical_mime_type or mime_type in m._alias_mime_types:
+                return m
+        return None
+
+    @property
+    def extra_name(self) -> str | None:
+        """The `pip` "extra" that must be installed to provide this file-type's dependencies.
+
+        Like "image" for PNG, as in `pip install "unstructured[image]"`.
+
+        `None` when partitioning this file-type requires only the base `unstructured` install.
+        """
+        return self._extra_name
+
+    @property
+    def importable_package_dependencies(self) -> tuple[str, ...]:
+        """Packages that must be importable for this file-type's partitioner to work.
+
+        In general, these are the packages provided by the `pip install` "extra" for this file-type,
+        like `pip install "unstructured[docx]"` loads the `python-docx` package.
+
+        Note that these names are the ones used in an `import` statement, which is not necessarily
+        the same as the _distribution_ package name used by `pip`. For example, the DOCX
+        distribution package name is `"python-docx"` whereas the _importable_ package name is
+        `"docx"`. This latter name as it appears like `import docx` is what is provided by this
+        property.
+
+        The return value is an empty tuple for file-types that do not require optional dependencies.
+
+        Note this property does not complain when accessed on a non-partitionable file-type, it
+        simply returns an empty tuple because file-types that are not partitionable require no
+        optional dependencies.
+        """
+        return self._importable_package_dependencies
+
+    @property
+    def is_partitionable(self) -> bool:
+        """True when there is a partitioner for this file-type.
+
+        Note this does not check whether the dependencies for this file-type are installed so
+        attempting to partition a file of this type may still fail. This is meant for
+        distinguishing file-types like WAV, ZIP, EMPTY, and UNK which are legitimate file-types
+        but have no associated partitioner.
+        """
+        return bool(self._partitioner_shortname)
+
+    @property
+    def mime_type(self) -> str:
+        """The canonical MIME-type for this file-type, suitable for use in metadata.
+
+        This value is used in `.metadata.filetype` for elements partitioned from files of this
+        type. In general it is the "offical", "recommended", or "defacto-standard" MIME-type for
+        files of this type, in that order, as available.
+        """
+        return self._canonical_mime_type
+
+    @property
+    def partitioner_function_name(self) -> str:
+        """Name of partitioner function for this file-type. Like "partition_docx".
+
+        Raises when this property is accessed on a file-type that is not partitionable. Use
+        `.is_partitionable` to avoid exceptions when partitionability is unknown.
+        """
+        # -- Raise when this property is accessed on a FileType member that has no partitioner
+        # -- shortname. This prevents a harder-to-find bug from appearing far away from this call
+        # -- when code would try to `getattr(module, None)` or whatever.
+        if (shortname := self._partitioner_shortname) is None:
+            raise ValueError(
+                f"`.partitioner_function_name` is undefined because FileType.{self.name} is not"
+                f" partitionable. Use `.is_partitionable` to determine whether a `FileType`"
+                f" is partitionable."
+            )
+        return f"partition_{shortname}"
+
+    @property
+    def partitioner_module_qname(self) -> str:
+        """Fully-qualified name of module providing partitioner for this file-type.
+
+        e.g. "unstructured.partition.docx" for FileType.DOCX.
+        """
+        # -- Raise when this property is accessed on a FileType member that has no partitioner
+        # -- shortname. This prevents a harder-to-find bug from appearing far away from this call
+        # -- when code would try to `importlib.import_module(None)` or whatever.
+        if (shortname := self._partitioner_shortname) is None:
+            raise ValueError(
+                f"`.partitioner_module_qname` is undefined because FileType.{self.name} is not"
+                f" partitionable. Use `.is_partitionable` to determine whether a `FileType`"
+                f" is partitionable."
+            )
+        return f"unstructured.partition.{shortname}"
+
+    @property
+    def partitioner_shortname(self) -> str | None:
+        """Familiar name of partitioner, like "image" for file-types that use `partition_image()`.
+
+        One use is to determine whether a file-type is one of the five image types, all of which
+        are processed by `partition_image()`.
+
+        `None` for file-types that are not partitionable, although `.is_partitionable` is the
+        preferred way of discovering that.
+        """
+        return self._partitioner_shortname
+
+    BMP = (
+        "bmp",  # -- value for this Enum member, like BMP = "bmp" in a simple enum --
+        "image",  # -- partitioner_shortname --
+        ["unstructured_inference"],  # -- importable_package_dependencies --
+        "image",  # -- extra_name - like `pip install "unstructured[image]"` in this case --
+        [".bmp"],  # -- extensions - filename extensions that map to this file-type --
+        "image/bmp",  # -- canonical_mime_type -  MIME-type written to `.metadata.filetype` --
+        cast(list[str], []),  # -- alias_mime-types - other MIME-types that map to this file-type --
+    )
    CSV = (
+        "csv",
+        "csv",
+        ["pandas"],
        "csv",
        [".csv"],
        "text/csv",
@ -54,38 +212,143 @@ class FileType(enum.Enum):
            "text/x-csv",
        ],
    )
-    DOC = ("doc", [".doc"], "application/msword", cast(list[str], []))
+    DOC = ("doc", "doc", ["docx"], "doc", [".doc"], "application/msword", cast(list[str], []))
    DOCX = (
+        "docx",
+        "docx",
+        ["docx"],
        "docx",
        [".docx"],
        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
        cast(list[str], []),
    )
-    EML = ("eml", [".eml", ".p7s"], "message/rfc822", cast(list[str], []))
-    EPUB = ("epub", [".epub"], "application/epub", ["application/epub+zip"])
-    HEIC = ("heic", [".heic"], "image/heic", cast(list[str], []))
-    HTML = ("html", [".html", ".htm"], "text/html", cast(list[str], []))
-    JPG = ("jpg", [".jpeg", ".jpg"], "image/jpeg", cast(list[str], []))
-    JSON = ("json", [".json"], "application/json", cast(list[str], []))
-    MD = ("md", [".md"], "text/markdown", ["text/x-markdown"])
-    MSG = ("msg", [".msg"], "application/vnd.ms-outlook", ["application/x-ole-storage"])
-    ODT = ("odt", [".odt"], "application/vnd.oasis.opendocument.text", cast(list[str], []))
-    ORG = ("org", [".org"], "text/org", cast(list[str], []))
-    PDF = ("pdf", [".pdf"], "application/pdf", cast(list[str], []))
-    PNG = ("png", [".png"], "image/png", cast(list[str], []))
-    PPT = ("ppt", [".ppt"], "application/vnd.ms-powerpoint", cast(list[str], []))
+    EML = (
+        "eml",
+        "email",
+        cast(list[str], []),
+        None,
+        [".eml", ".p7s"],
+        "message/rfc822",
+        cast(list[str], []),
+    )
+    EPUB = (
+        "epub",
+        "epub",
+        ["pypandoc"],
+        "epub",
+        [".epub"],
+        "application/epub",
+        ["application/epub+zip"],
+    )
+    HEIC = (
+        "heic",
+        "image",
+        ["unstructured_inference"],
+        "image",
+        [".heic"],
+        "image/heic",
+        cast(list[str], []),
+    )
+    HTML = (
+        "html",
+        "html",
+        cast(list[str], []),
+        None,
+        [".html", ".htm"],
+        "text/html",
+        cast(list[str], []),
+    )
+    JPG = (
+        "jpg",
+        "image",
+        ["unstructured_inference"],
+        "image",
+        [".jpeg", ".jpg"],
+        "image/jpeg",
+        cast(list[str], []),
+    )
+    JSON = (
+        "json",
+        "json",
+        cast(list[str], []),
+        None,
+        [".json"],
+        "application/json",
+        cast(list[str], []),
+    )
+    MD = ("md", "md", ["markdown"], "md", [".md"], "text/markdown", ["text/x-markdown"])
+    MSG = (
+        "msg",
+        "msg",
+        ["oxmsg"],
+        "msg",
+        [".msg"],
+        "application/vnd.ms-outlook",
+        ["application/x-ole-storage"],
+    )
+    ODT = (
+        "odt",
+        "odt",
+        ["docx", "pypandoc"],
+        "odt",
+        [".odt"],
+        "application/vnd.oasis.opendocument.text",
+        cast(list[str], []),
+    )
+    ORG = ("org", "org", ["pypandoc"], "org", [".org"], "text/org", cast(list[str], []))
+    PDF = (
+        "pdf",
+        "pdf",
+        ["pdf2image", "pdfminer", "PIL"],
+        "pdf",
+        [".pdf"],
+        "application/pdf",
+        cast(list[str], []),
+    )
+    PNG = (
+        "png",
+        "image",
+        ["unstructured_inference"],
+        "image",
+        [".png"],
+        "image/png",
+        cast(list[str], []),
+    )
+    PPT = (
+        "ppt",
+        "ppt",
+        ["pptx"],
+        "ppt",
+        [".ppt"],
+        "application/vnd.ms-powerpoint",
+        cast(list[str], []),
+    )
    PPTX = (
+        "pptx",
+        "pptx",
+        ["pptx"],
        "pptx",
        [".pptx"],
        "application/vnd.openxmlformats-officedocument.presentationml.presentation",
        cast(list[str], []),
    )
-    RST = ("rst", [".rst"], "text/x-rst", cast(list[str], []))
-    RTF = ("rtf", [".rtf"], "text/rtf", ["application/rtf"])
-    TIFF = ("tiff", [".tiff"], "image/tiff", cast(list[str], []))
-    TSV = ("tsv", [".tab", ".tsv"], "text/tsv", cast(list[str], []))
+    RST = ("rst", "rst", ["pypandoc"], "rst", [".rst"], "text/x-rst", cast(list[str], []))
+    RTF = ("rtf", "rtf", ["pypandoc"], "rtf", [".rtf"], "text/rtf", ["application/rtf"])
+    TIFF = (
+        "tiff",
+        "image",
+        ["unstructured_inference"],
+        "image",
+        [".tiff"],
+        "image/tiff",
+        cast(list[str], []),
+    )
+    TSV = ("tsv", "tsv", ["pandas"], "tsv", [".tab", ".tsv"], "text/tsv", cast(list[str], []))
    TXT = (
        "txt",
+        "text",
+        cast(list[str], []),
+        None,
        [
            ".txt",
            ".text",
@ -119,6 +382,9 @@ class FileType(enum.Enum):
    )
    WAV = (
        "wav",
+        None,
+        cast(list[str], []),
+        None,
        [".wav"],
        "audio/wav",
        [
@ -129,60 +395,45 @@ class FileType(enum.Enum):
            "audio/x-wav",
        ],
    )
-    XLS = ("xls", [".xls"], "application/vnd.ms-excel", cast(list[str], []))
+    XLS = (
+        "xls",
+        "xlsx",
+        ["pandas", "openpyxl"],
+        "xlsx",
+        [".xls"],
+        "application/vnd.ms-excel",
+        cast(list[str], []),
+    )
    XLSX = (
+        "xlsx",
+        "xlsx",
+        ["pandas", "openpyxl"],
        "xlsx",
        [".xlsx"],
        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
        cast(list[str], []),
    )
-    XML = ("xml", [".xml"], "application/xml", ["text/xml"])
-    ZIP = ("zip", [".zip"], "application/zip", cast(list[str], []))
+    XML = ("xml", "xml", cast(list[str], []), None, [".xml"], "application/xml", ["text/xml"])
+    ZIP = ("zip", None, cast(list[str], []), None, [".zip"], "application/zip", cast(list[str], []))

-    UNK = ("unk", cast(list[str], []), "application/octet-stream", cast(list[str], []))
-    EMPTY = ("empty", cast(list[str], []), "inode/x-empty", cast(list[str], []))
-
-    @classmethod
-    def from_extension(cls, extension: str | None) -> FileType | None:
-        """Select a FileType member based on an extension.
-
-        `extension` must include the leading period, like `".pdf"`. Extension is suitable as a
-        secondary file-type identification method but is unreliable for primary identification..
-
-        Returns `None` when `extension` is not registered for any supported file-type.
-        """
-        if extension in (None, "", "."):
-            return None
-        # -- not super efficient but plenty fast enough for once-or-twice-per-file use and avoids
-        # -- limitations on defining a class variable on an Enum.
-        for m in cls.__members__.values():
-            if extension in m._extensions:
-                return m
-        return None
-
-    @classmethod
-    def from_mime_type(cls, mime_type: str) -> FileType | None:
-        """Select a FileType member based on a MIME-type.
-
-        `extension` must include the leading period, like `".pdf"`. Extension is suitable as a
-        secondary file-type identification method but is unreliable for primary identification..
-        """
-        # -- not super efficient but plenty fast enough for once-or-twice-per-file use and avoids
-        # -- limitations on defining a class variable on an Enum.
-        for m in cls.__members__.values():
-            if mime_type == m._canonical_mime_type or mime_type in m._alias_mime_types:
-                return m
-        return None
-
-    @property
-    def mime_type(self) -> str:
-        """The canonical MIME-type for this file-type, suitable for use in metadata.
-
-        This value is used in `.metadata.filetype` for elements partitioned from files of this
-        type. In general it is the "offical", "recommended", or "defacto-standard" MIME-type for
-        files of this type, in that order, as available.
-        """
-        return self._canonical_mime_type
+    UNK = (
+        "unk",
+        None,
+        cast(list[str], []),
+        None,
+        cast(list[str], []),
+        "application/octet-stream",
+        cast(list[str], []),
+    )
+    EMPTY = (
+        "empty",
+        None,
+        cast(list[str], []),
+        None,
+        cast(list[str], []),
+        "inode/x-empty",
+        cast(list[str], []),
+    )


 PLAIN_TEXT_EXTENSIONS = ".csv .eml .html .json .md .org .p7s .rst .rtf .tab .text .tsv .txt".split()
--- a/unstructured/partition/auto.py
+++ b/unstructured/partition/auto.py
@ -2,131 +2,28 @@

 from __future__ import annotations

+import importlib
 import io
 from typing import IO, Any, Callable, Literal, Optional

 import requests
+from typing_extensions import TypeAlias

 from unstructured.documents.elements import DataSourceMetadata, Element
 from unstructured.file_utils.filetype import detect_filetype, is_json_processable
 from unstructured.file_utils.model import FileType
 from unstructured.logger import logger
 from unstructured.partition.common import exactly_one
-from unstructured.partition.email import partition_email
-from unstructured.partition.html import partition_html
-from unstructured.partition.json import partition_json
 from unstructured.partition.lang import check_language_args
-from unstructured.partition.text import partition_text
 from unstructured.partition.utils.constants import PartitionStrategy
-from unstructured.partition.xml import partition_xml
 from unstructured.utils import dependency_exists

-PARTITION_WITH_EXTRAS_MAP: dict[str, Callable[..., list[Element]]] = {}
-
-if dependency_exists("pandas"):
-    from unstructured.partition.csv import partition_csv
-    from unstructured.partition.tsv import partition_tsv
-
-    PARTITION_WITH_EXTRAS_MAP["csv"] = partition_csv
-    PARTITION_WITH_EXTRAS_MAP["tsv"] = partition_tsv
-
-
-if dependency_exists("docx"):
-    from unstructured.partition.doc import partition_doc
-    from unstructured.partition.docx import partition_docx
-
-    PARTITION_WITH_EXTRAS_MAP["doc"] = partition_doc
-    PARTITION_WITH_EXTRAS_MAP["docx"] = partition_docx
-
-
-if dependency_exists("docx") and dependency_exists("pypandoc"):
-    from unstructured.partition.odt import partition_odt
-
-    PARTITION_WITH_EXTRAS_MAP["odt"] = partition_odt
-
-
-if dependency_exists("pypandoc"):
-    from unstructured.partition.epub import partition_epub
-
-    PARTITION_WITH_EXTRAS_MAP["epub"] = partition_epub
-
-
-if dependency_exists("pypandoc"):
-    from unstructured.partition.org import partition_org
-    from unstructured.partition.rst import partition_rst
-    from unstructured.partition.rtf import partition_rtf
-
-    PARTITION_WITH_EXTRAS_MAP["org"] = partition_org
-    PARTITION_WITH_EXTRAS_MAP["rst"] = partition_rst
-    PARTITION_WITH_EXTRAS_MAP["rtf"] = partition_rtf
-
-
-if dependency_exists("markdown"):
-    from unstructured.partition.md import partition_md
-
-    PARTITION_WITH_EXTRAS_MAP["md"] = partition_md
-
-
-if dependency_exists("oxmsg"):
-    from unstructured.partition.msg import partition_msg
-
-    PARTITION_WITH_EXTRAS_MAP["msg"] = partition_msg
-
-
-pdf_imports = ["pdf2image", "pdfminer", "PIL"]
-if all(dependency_exists(dep) for dep in pdf_imports):
-    from unstructured.partition.pdf import partition_pdf
-
-    PARTITION_WITH_EXTRAS_MAP["pdf"] = partition_pdf
-
-
-if dependency_exists("unstructured_inference"):
-    from unstructured.partition.image import partition_image
-
-    PARTITION_WITH_EXTRAS_MAP["image"] = partition_image
-
-
-if dependency_exists("pptx"):
-    from unstructured.partition.ppt import partition_ppt
-    from unstructured.partition.pptx import partition_pptx
-
-    PARTITION_WITH_EXTRAS_MAP["ppt"] = partition_ppt
-    PARTITION_WITH_EXTRAS_MAP["pptx"] = partition_pptx
-
-
-if dependency_exists("pandas") and dependency_exists("openpyxl"):
-    from unstructured.partition.xlsx import partition_xlsx
-
-    PARTITION_WITH_EXTRAS_MAP["xlsx"] = partition_xlsx
-
-
-IMAGE_FILETYPES = [
-    FileType.HEIC,
-    FileType.PNG,
-    FileType.JPG,
-    FileType.TIFF,
-    FileType.BMP,
-]
-
-
-def _get_partition_with_extras(
-    doc_type: str,
-    partition_with_extras_map: Optional[dict[str, Callable[..., list[Element]]]] = None,
-):
-    if partition_with_extras_map is None:
-        partition_with_extras_map = PARTITION_WITH_EXTRAS_MAP
-    _partition_func = partition_with_extras_map.get(doc_type)
-    if _partition_func is None:
-        raise ImportError(
-            f"partition_{doc_type} is not available. "
-            f"Install the {doc_type} dependencies with "
-            f'pip install "unstructured[{doc_type}]"',
-        )
-    return _partition_func
+Partitioner: TypeAlias = Callable[..., list[Element]]


 def partition(
    filename: Optional[str] = None,
+    *,
    content_type: Optional[str] = None,
    file: Optional[IO[bytes]] = None,
    file_filename: Optional[str] = None,
@ -156,10 +53,11 @@ def partition(
    starting_page_number: int = 1,
    **kwargs: Any,
 ):
-    """Partitions a document into its constituent elements. Will use libmagic to determine
-    the file's type and route it to the appropriate partitioning function. Applies the default
-    parameters for each partitioning function. Use the document-type specific partitioning
-    functions if you need access to additional kwarg options.
+    """Partitions a document into its constituent elements.
+
+    Uses libmagic to determine the file's type and route it to the appropriate partitioning
+    function. Applies the default parameters for each partitioning function. Use the document-type
+    specific partitioning functions if you need access to additional kwarg options.

    Parameters
    ----------
@ -272,7 +170,7 @@ def partition(
    languages = check_language_args(languages or [], ocr_languages)

    if url is not None:
-        file, filetype = file_and_type_from_url(
+        file, file_type = file_and_type_from_url(
            url=url,
            content_type=content_type,
            headers=headers,
@ -285,7 +183,7 @@ def partition(
                "The headers kwarg is set but the url kwarg is not. "
                "The headers kwarg will be ignored.",
            )
-        filetype = detect_filetype(
+        file_type = detect_filetype(
            filename=filename,
            file=file,
            file_filename=metadata_filename,
@ -297,14 +195,16 @@ def partition(
        file.seek(0)

    infer_table_structure = decide_table_extraction(
-        filetype,
+        file_type,
        skip_infer_table_types,
        pdf_infer_table_structure,
    )

-    if filetype == FileType.CSV:
-        _partition_csv = _get_partition_with_extras("csv")
-        elements = _partition_csv(
+    partitioner_loader = _PartitionerLoader()
+
+    if file_type == FileType.CSV:
+        partition_csv = partitioner_loader.get(file_type)
+        elements = partition_csv(
            filename=filename,
            file=file,
            infer_table_structure=infer_table_structure,
@ -312,9 +212,9 @@ def partition(
            detect_language_per_element=detect_language_per_element,
            **kwargs,
        )
-    elif filetype == FileType.DOC:
-        _partition_doc = _get_partition_with_extras("doc")
-        elements = _partition_doc(
+    elif file_type == FileType.DOC:
+        partition_doc = partitioner_loader.get(file_type)
+        elements = partition_doc(
            filename=filename,
            file=file,
            infer_table_structure=infer_table_structure,
@ -324,9 +224,9 @@ def partition(
            strategy=strategy,
            **kwargs,
        )
-    elif filetype == FileType.DOCX:
-        _partition_docx = _get_partition_with_extras("docx")
-        elements = _partition_docx(
+    elif file_type == FileType.DOCX:
+        partition_docx = partitioner_loader.get(file_type)
+        elements = partition_docx(
            filename=filename,
            file=file,
            infer_table_structure=infer_table_structure,
@ -336,7 +236,8 @@ def partition(
            strategy=strategy,
            **kwargs,
        )
-    elif filetype == FileType.EML:
+    elif file_type == FileType.EML:
+        partition_email = partitioner_loader.get(file_type)
        elements = partition_email(
            filename=filename,
            file=file,
@ -345,9 +246,9 @@ def partition(
            detect_language_per_element=detect_language_per_element,
            **kwargs,
        )
-    elif filetype == FileType.EPUB:
-        _partition_epub = _get_partition_with_extras("epub")
-        elements = _partition_epub(
+    elif file_type == FileType.EPUB:
+        partition_epub = partitioner_loader.get(file_type)
+        elements = partition_epub(
            filename=filename,
            file=file,
            include_page_breaks=include_page_breaks,
@ -356,7 +257,8 @@ def partition(
            detect_language_per_element=detect_language_per_element,
            **kwargs,
        )
-    elif filetype == FileType.HTML:
+    elif file_type == FileType.HTML:
+        partition_html = partitioner_loader.get(file_type)
        elements = partition_html(
            filename=filename,
            file=file,
@ -366,9 +268,9 @@ def partition(
            detect_language_per_element=detect_language_per_element,
            **kwargs,
        )
-    elif filetype in IMAGE_FILETYPES:
-        _partition_image = _get_partition_with_extras("image")
-        elements = _partition_image(
+    elif file_type.partitioner_shortname == "image":
+        partition_image = partitioner_loader.get(file_type)
+        elements = partition_image(
            filename=filename,
            file=file,
            url=None,
@ -384,16 +286,17 @@ def partition(
            starting_page_number=starting_page_number,
            **kwargs,
        )
-    elif filetype == FileType.JSON:
+    elif file_type == FileType.JSON:
        if not is_json_processable(filename=filename, file=file):
            raise ValueError(
                "Detected a JSON file that does not conform to the Unstructured schema. "
                "partition_json currently only processes serialized Unstructured output.",
            )
+        partition_json = partitioner_loader.get(file_type)
        elements = partition_json(filename=filename, file=file, **kwargs)
-    elif filetype == FileType.MD:
-        _partition_md = _get_partition_with_extras("md")
-        elements = _partition_md(
+    elif file_type == FileType.MD:
+        partition_md = partitioner_loader.get(file_type)
+        elements = partition_md(
            filename=filename,
            file=file,
            include_page_breaks=include_page_breaks,
@ -402,18 +305,18 @@ def partition(
            detect_language_per_element=detect_language_per_element,
            **kwargs,
        )
-    elif filetype == FileType.MSG:
-        _partition_msg = _get_partition_with_extras("msg")
-        elements = _partition_msg(
+    elif file_type == FileType.MSG:
+        partition_msg = partitioner_loader.get(file_type)
+        elements = partition_msg(
            filename=filename,
            file=file,
            languages=languages,
            detect_language_per_element=detect_language_per_element,
            **kwargs,
        )
-    elif filetype == FileType.ODT:
-        _partition_odt = _get_partition_with_extras("odt")
-        elements = _partition_odt(
+    elif file_type == FileType.ODT:
+        partition_odt = partitioner_loader.get(file_type)
+        elements = partition_odt(
            filename=filename,
            file=file,
            infer_table_structure=infer_table_structure,
@ -423,9 +326,9 @@ def partition(
            strategy=strategy,
            **kwargs,
        )
-    elif filetype == FileType.ORG:
-        _partition_org = _get_partition_with_extras("org")
-        elements = _partition_org(
+    elif file_type == FileType.ORG:
+        partition_org = partitioner_loader.get(file_type)
+        elements = partition_org(
            filename=filename,
            file=file,
            include_page_breaks=include_page_breaks,
@ -433,9 +336,9 @@ def partition(
            detect_language_per_element=detect_language_per_element,
            **kwargs,
        )
-    elif filetype == FileType.PDF:
-        _partition_pdf = _get_partition_with_extras("pdf")
-        elements = _partition_pdf(
+    elif file_type == FileType.PDF:
+        partition_pdf = partitioner_loader.get(file_type)
+        elements = partition_pdf(
            filename=filename,
            file=file,
            url=None,
@ -451,9 +354,9 @@ def partition(
            starting_page_number=starting_page_number,
            **kwargs,
        )
-    elif filetype == FileType.PPT:
-        _partition_ppt = _get_partition_with_extras("ppt")
-        elements = _partition_ppt(
+    elif file_type == FileType.PPT:
+        partition_ppt = partitioner_loader.get(file_type)
+        elements = partition_ppt(
            filename=filename,
            file=file,
            include_page_breaks=include_page_breaks,
@ -463,9 +366,9 @@ def partition(
            strategy=strategy,
            **kwargs,
        )
-    elif filetype == FileType.PPTX:
-        _partition_pptx = _get_partition_with_extras("pptx")
-        elements = _partition_pptx(
+    elif file_type == FileType.PPTX:
+        partition_pptx = partitioner_loader.get(file_type)
+        elements = partition_pptx(
            filename=filename,
            file=file,
            include_page_breaks=include_page_breaks,
@ -476,9 +379,9 @@ def partition(
            strategy=strategy,
            **kwargs,
        )
-    elif filetype == FileType.RST:
-        _partition_rst = _get_partition_with_extras("rst")
-        elements = _partition_rst(
+    elif file_type == FileType.RST:
+        partition_rst = partitioner_loader.get(file_type)
+        elements = partition_rst(
            filename=filename,
            file=file,
            include_page_breaks=include_page_breaks,
@ -487,9 +390,9 @@ def partition(
            detect_language_per_element=detect_language_per_element,
            **kwargs,
        )
-    elif filetype == FileType.RTF:
-        _partition_rtf = _get_partition_with_extras("rtf")
-        elements = _partition_rtf(
+    elif file_type == FileType.RTF:
+        partition_rtf = partitioner_loader.get(file_type)
+        elements = partition_rtf(
            filename=filename,
            file=file,
            include_page_breaks=include_page_breaks,
@ -498,16 +401,17 @@ def partition(
            detect_language_per_element=detect_language_per_element,
            **kwargs,
        )
-    elif filetype == FileType.TSV:
-        _partition_tsv = _get_partition_with_extras("tsv")
-        elements = _partition_tsv(
+    elif file_type == FileType.TSV:
+        partition_tsv = partitioner_loader.get(file_type)
+        elements = partition_tsv(
            filename=filename,
            file=file,
            languages=languages,
            detect_language_per_element=detect_language_per_element,
            **kwargs,
        )
-    elif filetype == FileType.TXT:
+    elif file_type == FileType.TXT:
+        partition_text = partitioner_loader.get(file_type)
        elements = partition_text(
            filename=filename,
            file=file,
@ -517,9 +421,9 @@ def partition(
            detect_language_per_element=detect_language_per_element,
            **kwargs,
        )
-    elif filetype in (FileType.XLS, FileType.XLSX):
-        _partition_xlsx = _get_partition_with_extras("xlsx")
-        elements = _partition_xlsx(
+    elif file_type in (FileType.XLS, FileType.XLSX):
+        partition_xlsx = partitioner_loader.get(file_type)
+        elements = partition_xlsx(
            filename=filename,
            file=file,
            infer_table_structure=infer_table_structure,
@ -528,7 +432,8 @@ def partition(
            starting_page_number=starting_page_number,
            **kwargs,
        )
-    elif filetype == FileType.XML:
+    elif file_type == FileType.XML:
+        partition_xml = partitioner_loader.get(file_type)
        elements = partition_xml(
            filename=filename,
            file=file,
@ -538,11 +443,11 @@ def partition(
            detect_language_per_element=detect_language_per_element,
            **kwargs,
        )
-    elif filetype == FileType.EMPTY:
+    elif file_type == FileType.EMPTY:
        elements = []
    else:
        msg = "Invalid file" if not filename else f"Invalid file {filename}"
-        raise ValueError(f"{msg}. The {filetype} file type is not supported in partition.")
+        raise ValueError(f"{msg}. The {file_type} file type is not supported in partition.")

    for element in elements:
        element.metadata.url = url
@ -551,7 +456,7 @@ def partition(
            out_filetype = FileType.from_mime_type(content_type)
            element.metadata.filetype = out_filetype.mime_type if out_filetype is not None else None
        else:
-            element.metadata.filetype = filetype.mime_type
+            element.metadata.filetype = file_type.mime_type

    return elements

@ -562,7 +467,7 @@ def file_and_type_from_url(
    headers: dict[str, str] = {},
    ssl_verify: bool = True,
    request_timeout: Optional[int] = None,
-) -> tuple[io.BytesIO, Optional[FileType]]:
+) -> tuple[io.BytesIO, FileType]:
    response = requests.get(url, headers=headers, verify=ssl_verify, timeout=request_timeout)
    file = io.BytesIO(response.content)

@ -590,3 +495,51 @@ def decide_table_extraction(
        return pdf_infer_table_structure or doc_type not in skip_infer_table_types

    return doc_type not in skip_infer_table_types
+
+
+class _PartitionerLoader:
+    """Provides uniform helpful error when a partitioner dependency is not installed.
+
+    Used by `partition()` to encapsulate coping with the possibility the Python
+    environment it is executing in may not have all dependencies installed for a
+    particular partitioner.
+
+    Provides `.get()` to access partitioners by file-type, which raises when one or
+    more dependencies for that partitioner are not installed.
+
+    The error message indicates what extra needs to be installed to enable that
+    partitioner. This avoids an inconsistent variety of possibly puzzling exceptions
+    arising from much deeper in the partitioner when access to the missing dependency is
+    first attempted.
+    """
+
+    # -- module-lifetime cache for partitioners once loaded --
+    _partitioners: dict[FileType, Partitioner] = {}
+
+    def get(self, file_type: FileType) -> Partitioner:
+        """Return partitioner for `file_type`.
+
+        Raises when one or more package dependencies for that file-type have not been
+        installed.
+        """
+        if file_type not in self._partitioners:
+            self._partitioners[file_type] = self._load_partitioner(file_type)
+
+        return self._partitioners[file_type]
+
+    def _load_partitioner(self, file_type: FileType) -> Partitioner:
+        """Load the partitioner for `file_type` after verifying dependencies."""
+        # -- verify all package dependencies are installed --
+        for pkg_name in file_type.importable_package_dependencies:
+            if not dependency_exists(pkg_name):
+                raise ImportError(
+                    f"{file_type.partitioner_function_name}() is not available because one or"
+                    f" more dependencies are not installed. Use:"
+                    f' pip install "unstructured[{file_type.extra_name}]" (including quotes)'
+                    f" to install the required dependencies",
+                )
+
+        # -- load the partitioner and return it --
+        assert file_type.is_partitionable  # -- would be a programming error if this failed --
+        partitioner_module = importlib.import_module(file_type.partitioner_module_qname)
+        return getattr(partitioner_module, file_type.partitioner_function_name)