diff --git a/CHANGELOG.md b/CHANGELOG.md
index 580b0234c..d57e4a1b1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+## 0.9.0
+
+### Enhancements
+
+* Dependencies are now split by document type, creating a slimmer base installation.
+
 ## 0.8.8
 
 ### Enhancements
@@ -6,6 +12,7 @@
 
 ### Fixes
 
+
 * Rename "date" field to "last_modified"
 * Adds Box connector
 
diff --git a/Dockerfile b/Dockerfile
index f84cfb6ec..d3ec67f0e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -30,7 +30,15 @@ RUN python3.8 -m pip install pip==${PIP_VERSION} && \
   pip install --no-cache -r requirements/ingest-s3.txt && \
   pip install --no-cache -r requirements/ingest-slack.txt && \
   pip install --no-cache -r requirements/ingest-wikipedia.txt && \
-  pip install --no-cache -r requirements/local-inference.txt && \
+  pip install --no-cache -r requirements/extra-csv.txt && \
+  pip install --no-cache -r requirements/extra-docx.txt && \
+  pip install --no-cache -r requirements/extra-markdown.txt && \
+  pip install --no-cache -r requirements/extra-msg.txt && \
+  pip install --no-cache -r requirements/extra-odt.txt && \
+  pip install --no-cache -r requirements/extra-pandoc.txt && \
+  pip install --no-cache -r requirements/extra-pdf-image.txt && \
+  pip install --no-cache -r requirements/extra-pptx.txt && \
+  pip install --no-cache -r requirements/extra-xlsx.txt && \
   dnf -y groupremove "Development Tools" && \
   dnf clean all
 
diff --git a/Makefile b/Makefile
index fd5f73a68..10c77f045 100644
--- a/Makefile
+++ b/Makefile
@@ -18,10 +18,10 @@ install-base: install-base-pip-packages install-nltk-models
 
 ## install:                 installs all test, dev, and experimental requirements
 .PHONY: install
-install: install-base-pip-packages install-dev install-nltk-models install-test install-huggingface install-unstructured-inference
+install: install-base-pip-packages install-dev install-nltk-models install-test install-huggingface install-all-docs
 
 .PHONY: install-ci
-install-ci: install-base-pip-packages install-nltk-models install-huggingface install-unstructured-inference install-test
+install-ci: install-base-pip-packages install-nltk-models install-huggingface install-all-docs install-test
 
 .PHONY: install-base-pip-packages
 install-base-pip-packages:
@@ -53,6 +53,45 @@ install-dev:
 install-build:
 	python3 -m pip install -r requirements/build.txt
 
+.PHONY: install-csv
+install-csv:
+	python3 -m pip install -r requirements/extra-csv.txt
+
+.PHONY: install-docx
+install-docx:
+	python3 -m pip install -r requirements/extra-docx.txt
+
+.PHONY: install-odt
+install-odt:
+	python3 -m pip install -r requirements/extra-odt.txt
+
+.PHONY: install-pypandoc
+install-pypandoc:
+	python3 -m pip install -r requirements/extra-pandoc.txt
+
+.PHONY: install-markdown
+install-markdown:
+	python3 -m pip install -r requirements/extra-markdown.txt
+
+.PHONY: install-msg
+install-msg:
+	python3 -m pip install -r requirements/extra-msg.txt
+
+.PHONY: install-pdf-image
+install-pdf-image:
+	python3 -m pip install -r requirements/extra-pdf-image.txt
+
+.PHONY: install-pptx
+install-pptx:
+	python3 -m pip install -r requirements/extra-pptx.txt
+
+.PHONY: install-xlsx
+install-xlsx:
+	python3 -m pip install -r requirements/extra-xlsx.txt
+
+.PHONY: install-all-docs
+install-all-docs: install-base install-csv install-docx install-docx install-odt install-pypandoc install-markdown install-msg install-pdf-image install-pptx install-xlsx
+
 .PHONY: install-ingest-google-drive
 install-ingest-google-drive:
 	python3 -m pip install -r requirements/ingest-google-drive.txt
@@ -124,7 +163,7 @@ install-unstructured-inference:
 
 ## install-local-inference: installs requirements for local inference
 .PHONY: install-local-inference
-install-local-inference: install install-unstructured-inference
+install-local-inference: install install-all-docs
 
 .PHONY: install-pandoc
 install-pandoc:
@@ -135,12 +174,23 @@ install-pandoc:
 .PHONY: pip-compile
 pip-compile:
 	pip-compile --upgrade requirements/base.in
+
+	# Extra requirements that are specific to document types
+	pip-compile --upgrade requirements/extra-csv.in
+	pip-compile --upgrade requirements/extra-docx.in
+	pip-compile --upgrade requirements/extra-pandoc.in
+	pip-compile --upgrade requirements/extra-markdown.in
+	pip-compile --upgrade requirements/extra-msg.in
+	pip-compile --upgrade requirements/extra-odt.in
+	pip-compile --upgrade requirements/extra-pdf-image.in
+	pip-compile --upgrade requirements/extra-pptx.in
+	pip-compile --upgrade requirements/extra-xlsx.in
+
 	# Extra requirements for huggingface staging functions
 	pip-compile --upgrade requirements/huggingface.in
 	pip-compile --upgrade requirements/test.in
 	pip-compile --upgrade requirements/dev.in
 	pip-compile --upgrade requirements/build.in
-	pip-compile --upgrade requirements/local-inference.in
 	# NOTE(robinson) - doc/requirements.txt is where the GitHub action for building
 	# sphinx docs looks for additional requirements
 	cp requirements/build.txt docs/requirements.txt
@@ -158,6 +208,7 @@ pip-compile:
 	pip-compile --upgrade requirements/ingest-google-drive.in
 	pip-compile --upgrade requirements/ingest-elasticsearch.in
 	pip-compile --upgrade requirements/ingest-onedrive.in
+	pip-compile --upgrade requirements/ingest-outlook.in
 	pip-compile --upgrade requirements/ingest-confluence.in
 
 ## install-project-local:   install unstructured into your local python environment
diff --git a/docs/requirements.txt b/docs/requirements.txt
index f831436ff..958e379c9 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/build.in
@@ -12,10 +12,14 @@ beautifulsoup4==4.12.2
     # via furo
 certifi==2023.7.22
     # via
+    #   -c requirements/base.txt
+    #   -c requirements/constraints.in
     #   -r requirements/build.in
     #   requests
 charset-normalizer==3.2.0
-    # via requests
+    # via
+    #   -c requirements/base.txt
+    #   requests
 docutils==0.18.1
     # via
     #   sphinx
@@ -23,11 +27,11 @@ docutils==0.18.1
 furo==2023.7.26
     # via -r requirements/build.in
 idna==3.4
-    # via requests
+    # via
+    #   -c requirements/base.txt
+    #   requests
 imagesize==1.4.1
     # via sphinx
-importlib-metadata==6.8.0
-    # via sphinx
 jinja2==3.1.2
     # via sphinx
 markupsafe==2.1.3
@@ -38,10 +42,10 @@ pygments==2.15.1
     # via
     #   furo
     #   sphinx
-pytz==2023.3
-    # via babel
 requests==2.31.0
-    # via sphinx
+    # via
+    #   -c requirements/base.txt
+    #   sphinx
 snowballstemmer==2.2.0
     # via sphinx
 soupsieve==2.4.1
@@ -71,7 +75,8 @@ sphinxcontrib-qthelp==1.0.3
     # via sphinx
 sphinxcontrib-serializinghtml==1.1.5
     # via sphinx
-urllib3==2.0.4
-    # via requests
-zipp==3.16.2
-    # via importlib-metadata
+urllib3==1.26.16
+    # via
+    #   -c requirements/base.txt
+    #   -c requirements/constraints.in
+    #   requests
diff --git a/docs/source/installing.rst b/docs/source/installing.rst
index 49d465078..5171ebed9 100644
--- a/docs/source/installing.rst
+++ b/docs/source/installing.rst
@@ -7,8 +7,15 @@ Quick Start
 Use the following instructions to get up and running with ``unstructured`` and test your
 installation.
 
-* Install the Python SDK with ``pip install "unstructured[local-inference]"``
-	* If you do not need to process PDFs or images, you can run ``pip install unstructured``
+* Install the Python SDK with ``pip install unstructured``
+	* Plain text files, HTML, XML, JSON and Emails do not require any extra dependencies.
+	* If you need to process other document types, you can install the extras required for those documents
+		with ``pip install "unstructured[docx,pptx]"``.
+	* To install the extras for every document type, use ``pip install "unstructured[all-docs]"``.
+	* For ``unstructured<0.9.0``, you can install the extras for all document types with
+		``pip install "unstructured[local-inference]"``. The ``local-inference`` extra is still
+		supported in newer versions for backward compatibility, but may be deprecated in a future version.
+		The ``all-docs`` extra is the officially supported installation pattern.
 
 * Install the following system dependencies if they are not already available on your system. Depending on what document types you're parsing, you may not need all of these.
 	* ``libmagic-dev`` (filetype detection)
diff --git a/requirements/base.in b/requirements/base.in
index 5ac5957e3..acc32d982 100644
--- a/requirements/base.in
+++ b/requirements/base.in
@@ -1,19 +1,8 @@
 -c "constraints.in"
 chardet
 filetype
-lxml
-msg_parser
-nltk
-openpyxl
-pandas
-pdf2image
-pdfminer.six
-pillow
-pypandoc
-python-docx
-python-pptx
 python-magic
-markdown
-requests
+lxml
+nltk
 tabulate
-xlrd
+requests
diff --git a/requirements/base.txt b/requirements/base.txt
index 03125fc3c..d4f27a422 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/base.in
@@ -8,89 +8,33 @@ certifi==2023.7.22
     # via
     #   -c requirements/constraints.in
     #   requests
-cffi==1.15.1
-    # via cryptography
 chardet==5.1.0
     # via -r requirements/base.in
 charset-normalizer==3.2.0
-    # via
-    #   pdfminer-six
-    #   requests
+    # via requests
 click==8.1.6
     # via nltk
-cryptography==41.0.2
-    # via pdfminer-six
-et-xmlfile==1.1.0
-    # via openpyxl
 filetype==1.2.0
     # via -r requirements/base.in
 idna==3.4
     # via requests
-importlib-metadata==6.8.0
-    # via markdown
 joblib==1.3.1
     # via nltk
 lxml==4.9.3
-    # via
-    #   -r requirements/base.in
-    #   python-docx
-    #   python-pptx
-markdown==3.4.4
-    # via -r requirements/base.in
-msg-parser==1.2.0
     # via -r requirements/base.in
 nltk==3.8.1
     # via -r requirements/base.in
-numpy==1.24.4
-    # via pandas
-olefile==0.46
-    # via msg-parser
-openpyxl==3.1.2
-    # via -r requirements/base.in
-pandas==2.0.3
-    # via -r requirements/base.in
-pdf2image==1.16.3
-    # via -r requirements/base.in
-pdfminer-six==20221105
-    # via -r requirements/base.in
-pillow==10.0.0
-    # via
-    #   -r requirements/base.in
-    #   pdf2image
-    #   python-pptx
-pycparser==2.21
-    # via cffi
-pypandoc==1.11
-    # via -r requirements/base.in
-python-dateutil==2.8.2
-    # via pandas
-python-docx==0.8.11
-    # via -r requirements/base.in
 python-magic==0.4.27
     # via -r requirements/base.in
-python-pptx==0.6.21
-    # via -r requirements/base.in
-pytz==2023.3
-    # via pandas
 regex==2023.6.3
     # via nltk
 requests==2.31.0
     # via -r requirements/base.in
-six==1.16.0
-    # via python-dateutil
 tabulate==0.9.0
     # via -r requirements/base.in
 tqdm==4.65.0
     # via nltk
-tzdata==2023.3
-    # via pandas
 urllib3==1.26.16
     # via
     #   -c requirements/constraints.in
     #   requests
-xlrd==2.0.1
-    # via -r requirements/base.in
-xlsxwriter==3.1.2
-    # via python-pptx
-zipp==3.16.2
-    # via importlib-metadata
diff --git a/requirements/build.in b/requirements/build.in
index 0ba653ba8..dba8fb1f4 100644
--- a/requirements/build.in
+++ b/requirements/build.in
@@ -1,3 +1,6 @@
+-c base.txt
+-c constraints.in
+
 sphinx
 # NOTE(alan) - Pinning to resolve a conflict with sphinx. We can unpin on next sphinx_rtd_theme release.
 sphinx_rtd_theme==1.2.2
diff --git a/requirements/build.txt b/requirements/build.txt
index f831436ff..958e379c9 100644
--- a/requirements/build.txt
+++ b/requirements/build.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/build.in
@@ -12,10 +12,14 @@ beautifulsoup4==4.12.2
     # via furo
 certifi==2023.7.22
     # via
+    #   -c requirements/base.txt
+    #   -c requirements/constraints.in
     #   -r requirements/build.in
     #   requests
 charset-normalizer==3.2.0
-    # via requests
+    # via
+    #   -c requirements/base.txt
+    #   requests
 docutils==0.18.1
     # via
     #   sphinx
@@ -23,11 +27,11 @@ docutils==0.18.1
 furo==2023.7.26
     # via -r requirements/build.in
 idna==3.4
-    # via requests
+    # via
+    #   -c requirements/base.txt
+    #   requests
 imagesize==1.4.1
     # via sphinx
-importlib-metadata==6.8.0
-    # via sphinx
 jinja2==3.1.2
     # via sphinx
 markupsafe==2.1.3
@@ -38,10 +42,10 @@ pygments==2.15.1
     # via
     #   furo
     #   sphinx
-pytz==2023.3
-    # via babel
 requests==2.31.0
-    # via sphinx
+    # via
+    #   -c requirements/base.txt
+    #   sphinx
 snowballstemmer==2.2.0
     # via sphinx
 soupsieve==2.4.1
@@ -71,7 +75,8 @@ sphinxcontrib-qthelp==1.0.3
     # via sphinx
 sphinxcontrib-serializinghtml==1.1.5
     # via sphinx
-urllib3==2.0.4
-    # via requests
-zipp==3.16.2
-    # via importlib-metadata
+urllib3==1.26.16
+    # via
+    #   -c requirements/base.txt
+    #   -c requirements/constraints.in
+    #   requests
diff --git a/requirements/constraints.in b/requirements/constraints.in
index 65faee62a..fbe60d75f 100644
--- a/requirements/constraints.in
+++ b/requirements/constraints.in
@@ -12,6 +12,13 @@ jupyter-core>=4.11.2
 wheel>=0.38.1
 # NOTE(robinson) - The following pins are to address
 # vulnerabilities in dependency scans
-certifi>=2022.12.07
+certifi>=2023.7.22
 # From pycocotools in local-inference
 pyparsing<3.1.0
+# NOTE(robinson) - Numpy dropped Python 3.8 support in 1.25.0
+numpy<1.25.0
+scipy<1.11.0
+IPython<8.13
+# NOTE(robinson) - See this issue here
+# https://github.com/facebookresearch/detectron2/issues/5010
+Pillow<10.0.0
diff --git a/requirements/dev.txt b/requirements/dev.txt
index 89f19697d..c976f81ba 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/dev.in
@@ -41,9 +41,7 @@ certifi==2023.7.22
     #   -c requirements/test.txt
     #   requests
 cffi==1.15.1
-    # via
-    #   -c requirements/base.txt
-    #   argon2-cffi-bindings
+    # via argon2-cffi-bindings
 cfgv==3.3.1
     # via pre-commit
 charset-normalizer==3.2.0
@@ -57,7 +55,9 @@ click==8.1.6
     #   -c requirements/test.txt
     #   pip-tools
 comm==0.1.3
-    # via ipykernel
+    # via
+    #   ipykernel
+    #   ipywidgets
 debugpy==1.6.7
     # via ipykernel
 decorator==5.1.1
@@ -66,10 +66,6 @@ defusedxml==0.7.1
     # via nbconvert
 distlib==0.3.7
     # via virtualenv
-exceptiongroup==1.1.2
-    # via
-    #   -c requirements/test.txt
-    #   anyio
 executing==1.2.0
     # via stack-data
 fastjsonschema==2.18.0
@@ -87,40 +83,26 @@ idna==3.4
     #   anyio
     #   jsonschema
     #   requests
-importlib-metadata==6.8.0
-    # via
-    #   -c requirements/base.txt
-    #   jupyter-client
-    #   jupyter-lsp
-    #   jupyterlab
-    #   jupyterlab-server
-    #   nbconvert
-importlib-resources==6.0.0
-    # via
-    #   jsonschema
-    #   jsonschema-specifications
-    #   jupyterlab
-    #   notebook
 ipykernel==6.25.0
     # via
-    #   ipywidgets
     #   jupyter
     #   jupyter-console
     #   jupyterlab
     #   qtconsole
 ipython==8.12.2
     # via
+    #   -c requirements/constraints.in
     #   -r requirements/dev.in
     #   ipykernel
     #   ipywidgets
     #   jupyter-console
 ipython-genutils==0.2.0
     # via qtconsole
-ipywidgets==8.0.7
+ipywidgets==8.1.0
     # via jupyter
 isoduration==20.11.0
     # via jsonschema
-jedi==0.18.2
+jedi==0.19.0
     # via ipython
 jinja2==3.1.2
     # via
@@ -162,7 +144,7 @@ jupyter-core==5.3.1
     #   nbconvert
     #   nbformat
     #   qtconsole
-jupyter-events==0.6.3
+jupyter-events==0.7.0
     # via jupyter-server
 jupyter-lsp==2.2.0
     # via jupyterlab
@@ -201,16 +183,16 @@ nbconvert==7.7.3
     # via
     #   jupyter
     #   jupyter-server
-nbformat==5.9.1
+nbformat==5.9.2
     # via
     #   jupyter-server
     #   nbclient
     #   nbconvert
-nest-asyncio==1.5.6
+nest-asyncio==1.5.7
     # via ipykernel
 nodeenv==1.8.0
     # via pre-commit
-notebook==7.0.0
+notebook==7.0.1
     # via jupyter
 notebook-shim==0.2.3
     # via
@@ -239,9 +221,7 @@ pickleshare==0.7.5
     # via ipython
 pip-tools==7.1.0
     # via -r requirements/dev.in
-pkgutil-resolve-name==1.3.10
-    # via jsonschema
-platformdirs==3.9.1
+platformdirs==3.10.0
     # via
     #   -c requirements/test.txt
     #   jupyter-core
@@ -263,9 +243,7 @@ ptyprocess==0.7.0
 pure-eval==0.2.2
     # via stack-data
 pycparser==2.21
-    # via
-    #   -c requirements/base.txt
-    #   cffi
+    # via cffi
 pygments==2.15.1
     # via
     #   ipython
@@ -276,16 +254,11 @@ pyproject-hooks==1.0.0
     # via build
 python-dateutil==2.8.2
     # via
-    #   -c requirements/base.txt
     #   -c requirements/test.txt
     #   arrow
     #   jupyter-client
 python-json-logger==2.0.7
     # via jupyter-events
-pytz==2023.3
-    # via
-    #   -c requirements/base.txt
-    #   babel
 pyyaml==6.0.1
     # via
     #   -c requirements/test.txt
@@ -306,6 +279,7 @@ referencing==0.30.0
     # via
     #   jsonschema
     #   jsonschema-specifications
+    #   jupyter-events
 requests==2.31.0
     # via
     #   -c requirements/base.txt
@@ -327,7 +301,6 @@ send2trash==1.8.2
     # via jupyter-server
 six==1.16.0
     # via
-    #   -c requirements/base.txt
     #   -c requirements/test.txt
     #   asttokens
     #   bleach
@@ -345,13 +318,6 @@ terminado==0.17.1
     #   jupyter-server-terminals
 tinycss2==1.2.1
     # via nbconvert
-tomli==2.0.1
-    # via
-    #   -c requirements/test.txt
-    #   build
-    #   jupyterlab
-    #   pip-tools
-    #   pyproject-hooks
 tornado==6.3.2
     # via
     #   ipykernel
@@ -377,11 +343,6 @@ traitlets==5.9.0
     #   nbconvert
     #   nbformat
     #   qtconsole
-typing-extensions==4.7.1
-    # via
-    #   -c requirements/test.txt
-    #   async-lru
-    #   ipython
 uri-template==1.3.0
     # via jsonschema
 urllib3==1.26.16
@@ -408,11 +369,6 @@ wheel==0.41.0
     #   pip-tools
 widgetsnbextension==4.0.8
     # via ipywidgets
-zipp==3.16.2
-    # via
-    #   -c requirements/base.txt
-    #   importlib-metadata
-    #   importlib-resources
 
 # The following packages are considered to be unsafe in a requirements file:
 # pip
diff --git a/requirements/local-inference.in b/requirements/extra-csv.in
similarity index 50%
rename from requirements/local-inference.in
rename to requirements/extra-csv.in
index 9ac43c9a7..fed74c14e 100644
--- a/requirements/local-inference.in
+++ b/requirements/extra-csv.in
@@ -1,3 +1,4 @@
 -c constraints.in
 -c base.txt
-unstructured-inference==0.5.7
+
+pandas
diff --git a/requirements/extra-csv.txt b/requirements/extra-csv.txt
new file mode 100644
index 000000000..74886420a
--- /dev/null
+++ b/requirements/extra-csv.txt
@@ -0,0 +1,20 @@
+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    pip-compile requirements/extra-csv.in
+#
+numpy==1.24.4
+    # via
+    #   -c requirements/constraints.in
+    #   pandas
+pandas==2.0.3
+    # via -r requirements/extra-csv.in
+python-dateutil==2.8.2
+    # via pandas
+pytz==2023.3
+    # via pandas
+six==1.16.0
+    # via python-dateutil
+tzdata==2023.3
+    # via pandas
diff --git a/requirements/extra-docx.in b/requirements/extra-docx.in
new file mode 100644
index 000000000..46569e09c
--- /dev/null
+++ b/requirements/extra-docx.in
@@ -0,0 +1,4 @@
+-c constraints.in
+-c base.txt
+
+python-docx
diff --git a/requirements/extra-docx.txt b/requirements/extra-docx.txt
new file mode 100644
index 000000000..c5ddbc39d
--- /dev/null
+++ b/requirements/extra-docx.txt
@@ -0,0 +1,12 @@
+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    pip-compile requirements/extra-docx.in
+#
+lxml==4.9.3
+    # via
+    #   -c requirements/base.txt
+    #   python-docx
+python-docx==0.8.11
+    # via -r requirements/extra-docx.in
diff --git a/requirements/extra-markdown.in b/requirements/extra-markdown.in
new file mode 100644
index 000000000..44e817488
--- /dev/null
+++ b/requirements/extra-markdown.in
@@ -0,0 +1,4 @@
+-c "constraints.in"
+-c "base.txt"
+
+markdown
diff --git a/requirements/extra-markdown.txt b/requirements/extra-markdown.txt
new file mode 100644
index 000000000..e09266a16
--- /dev/null
+++ b/requirements/extra-markdown.txt
@@ -0,0 +1,8 @@
+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    pip-compile requirements/extra-markdown.in
+#
+markdown==3.4.4
+    # via -r requirements/extra-markdown.in
diff --git a/requirements/extra-msg.in b/requirements/extra-msg.in
new file mode 100644
index 000000000..01021471b
--- /dev/null
+++ b/requirements/extra-msg.in
@@ -0,0 +1,4 @@
+-c constraints.in
+-c base.txt
+
+msg_parser
diff --git a/requirements/extra-msg.txt b/requirements/extra-msg.txt
new file mode 100644
index 000000000..722b0980e
--- /dev/null
+++ b/requirements/extra-msg.txt
@@ -0,0 +1,10 @@
+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    pip-compile requirements/extra-msg.in
+#
+msg-parser==1.2.0
+    # via -r requirements/extra-msg.in
+olefile==0.46
+    # via msg-parser
diff --git a/requirements/extra-odt.in b/requirements/extra-odt.in
new file mode 100644
index 000000000..6076a76a1
--- /dev/null
+++ b/requirements/extra-odt.in
@@ -0,0 +1,5 @@
+-c constraints.in
+-c base.txt
+
+python-docx
+pypandoc
diff --git a/requirements/extra-odt.txt b/requirements/extra-odt.txt
new file mode 100644
index 000000000..dea5ebb68
--- /dev/null
+++ b/requirements/extra-odt.txt
@@ -0,0 +1,14 @@
+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    pip-compile requirements/extra-odt.in
+#
+lxml==4.9.3
+    # via
+    #   -c requirements/base.txt
+    #   python-docx
+pypandoc==1.11
+    # via -r requirements/extra-odt.in
+python-docx==0.8.11
+    # via -r requirements/extra-odt.in
diff --git a/requirements/extra-pandoc.in b/requirements/extra-pandoc.in
new file mode 100644
index 000000000..21720efda
--- /dev/null
+++ b/requirements/extra-pandoc.in
@@ -0,0 +1,4 @@
+-c constraints.in
+-c base.txt
+
+pypandoc
diff --git a/requirements/extra-pandoc.txt b/requirements/extra-pandoc.txt
new file mode 100644
index 000000000..b0804f16d
--- /dev/null
+++ b/requirements/extra-pandoc.txt
@@ -0,0 +1,8 @@
+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    pip-compile requirements/extra-pandoc.in
+#
+pypandoc==1.11
+    # via -r requirements/extra-pandoc.in
diff --git a/requirements/extra-pdf-image.in b/requirements/extra-pdf-image.in
new file mode 100644
index 000000000..812d902ce
--- /dev/null
+++ b/requirements/extra-pdf-image.in
@@ -0,0 +1,9 @@
+-c constraints.in
+-c base.txt
+
+pdf2image
+pdfminer.six
+# NOTE(robinson) - See this issue here
+# https://github.com/facebookresearch/detectron2/issues/5010
+Pillow<10
+unstructured-inference==0.5.7
diff --git a/requirements/local-inference.txt b/requirements/extra-pdf-image.txt
similarity index 80%
rename from requirements/local-inference.txt
rename to requirements/extra-pdf-image.txt
index 110e2d0a3..130b774c9 100644
--- a/requirements/local-inference.txt
+++ b/requirements/extra-pdf-image.txt
@@ -1,8 +1,8 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
-#    pip-compile requirements/local-inference.in
+#    pip-compile requirements/extra-pdf-image.in
 #
 antlr4-python3-runtime==4.9.3
     # via omegaconf
@@ -12,9 +12,7 @@ certifi==2023.7.22
     #   -c requirements/constraints.in
     #   requests
 cffi==1.15.1
-    # via
-    #   -c requirements/base.txt
-    #   cryptography
+    # via cryptography
 charset-normalizer==3.2.0
     # via
     #   -c requirements/base.txt
@@ -25,9 +23,7 @@ coloredlogs==15.0.1
 contourpy==1.1.0
     # via matplotlib
 cryptography==41.0.2
-    # via
-    #   -c requirements/base.txt
-    #   pdfminer-six
+    # via pdfminer-six
 cycler==0.11.0
     # via matplotlib
 effdet==0.4.1
@@ -54,8 +50,6 @@ idna==3.4
     # via
     #   -c requirements/base.txt
     #   requests
-importlib-resources==6.0.0
-    # via matplotlib
 iopath==0.1.10
     # via layoutparser
 jinja2==3.1.2
@@ -74,7 +68,7 @@ networkx==3.1
     # via torch
 numpy==1.24.4
     # via
-    #   -c requirements/base.txt
+    #   -c requirements/constraints.in
     #   contourpy
     #   layoutparser
     #   matplotlib
@@ -101,22 +95,21 @@ packaging==23.1
     #   pytesseract
     #   transformers
 pandas==2.0.3
-    # via
-    #   -c requirements/base.txt
-    #   layoutparser
+    # via layoutparser
 pdf2image==1.16.3
     # via
-    #   -c requirements/base.txt
+    #   -r requirements/extra-pdf-image.in
     #   layoutparser
 pdfminer-six==20221105
     # via
-    #   -c requirements/base.txt
+    #   -r requirements/extra-pdf-image.in
     #   pdfplumber
-pdfplumber==0.10.1
+pdfplumber==0.10.2
     # via layoutparser
-pillow==10.0.0
+pillow==9.5.0
     # via
-    #   -c requirements/base.txt
+    #   -c requirements/constraints.in
+    #   -r requirements/extra-pdf-image.in
     #   layoutparser
     #   matplotlib
     #   pdf2image
@@ -132,9 +125,7 @@ protobuf==4.23.4
 pycocotools==2.0.6
     # via effdet
 pycparser==2.21
-    # via
-    #   -c requirements/base.txt
-    #   cffi
+    # via cffi
 pyparsing==3.0.9
     # via
     #   -c requirements/constraints.in
@@ -145,15 +136,12 @@ pytesseract==0.3.10
     # via layoutparser
 python-dateutil==2.8.2
     # via
-    #   -c requirements/base.txt
     #   matplotlib
     #   pandas
 python-multipart==0.0.6
     # via unstructured-inference
 pytz==2023.3
-    # via
-    #   -c requirements/base.txt
-    #   pandas
+    # via pandas
 pyyaml==6.0.1
     # via
     #   huggingface-hub
@@ -176,11 +164,11 @@ safetensors==0.3.1
     #   timm
     #   transformers
 scipy==1.10.1
-    # via layoutparser
-six==1.16.0
     # via
-    #   -c requirements/base.txt
-    #   python-dateutil
+    #   -c requirements/constraints.in
+    #   layoutparser
+six==1.16.0
+    # via python-dateutil
 sympy==1.12
     # via
     #   onnxruntime
@@ -214,17 +202,11 @@ typing-extensions==4.7.1
     #   iopath
     #   torch
 tzdata==2023.3
-    # via
-    #   -c requirements/base.txt
-    #   pandas
+    # via pandas
 unstructured-inference==0.5.7
-    # via -r requirements/local-inference.in
+    # via -r requirements/extra-pdf-image.in
 urllib3==1.26.16
     # via
     #   -c requirements/base.txt
     #   -c requirements/constraints.in
     #   requests
-zipp==3.16.2
-    # via
-    #   -c requirements/base.txt
-    #   importlib-resources
diff --git a/requirements/extra-pptx.in b/requirements/extra-pptx.in
new file mode 100644
index 000000000..9f5499c68
--- /dev/null
+++ b/requirements/extra-pptx.in
@@ -0,0 +1,3 @@
+-c "constraints.in"
+
+python-pptx
diff --git a/requirements/extra-pptx.txt b/requirements/extra-pptx.txt
new file mode 100644
index 000000000..550afe1a4
--- /dev/null
+++ b/requirements/extra-pptx.txt
@@ -0,0 +1,16 @@
+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    pip-compile requirements/extra-pptx.in
+#
+lxml==4.9.3
+    # via python-pptx
+pillow==9.5.0
+    # via
+    #   -c requirements/constraints.in
+    #   python-pptx
+python-pptx==0.6.21
+    # via -r requirements/extra-pptx.in
+xlsxwriter==3.1.2
+    # via python-pptx
diff --git a/requirements/extra-xlsx.in b/requirements/extra-xlsx.in
new file mode 100644
index 000000000..5e296abf8
--- /dev/null
+++ b/requirements/extra-xlsx.in
@@ -0,0 +1,6 @@
+-c constraints.in
+-c base.txt
+
+openpyxl
+pandas
+xlrd
diff --git a/requirements/extra-xlsx.txt b/requirements/extra-xlsx.txt
new file mode 100644
index 000000000..b9be1f037
--- /dev/null
+++ b/requirements/extra-xlsx.txt
@@ -0,0 +1,26 @@
+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    pip-compile requirements/extra-xlsx.in
+#
+et-xmlfile==1.1.0
+    # via openpyxl
+numpy==1.24.4
+    # via
+    #   -c requirements/constraints.in
+    #   pandas
+openpyxl==3.1.2
+    # via -r requirements/extra-xlsx.in
+pandas==2.0.3
+    # via -r requirements/extra-xlsx.in
+python-dateutil==2.8.2
+    # via pandas
+pytz==2023.3
+    # via pandas
+six==1.16.0
+    # via python-dateutil
+tzdata==2023.3
+    # via pandas
+xlrd==2.0.1
+    # via -r requirements/extra-xlsx.in
diff --git a/requirements/huggingface.txt b/requirements/huggingface.txt
index 21686e2a3..b5ffcacfd 100644
--- a/requirements/huggingface.txt
+++ b/requirements/huggingface.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/huggingface.in
@@ -46,7 +46,7 @@ networkx==3.1
     # via torch
 numpy==1.24.4
     # via
-    #   -c requirements/base.txt
+    #   -c requirements/constraints.in
     #   transformers
 packaging==23.1
     # via
@@ -74,7 +74,6 @@ sentencepiece==0.1.99
     # via -r requirements/huggingface.in
 six==1.16.0
     # via
-    #   -c requirements/base.txt
     #   langdetect
     #   sacremoses
 sympy==1.12
diff --git a/requirements/ingest-azure.txt b/requirements/ingest-azure.txt
index feba108e9..1d3d50436 100644
--- a/requirements/ingest-azure.txt
+++ b/requirements/ingest-azure.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/ingest-azure.in
@@ -32,7 +32,6 @@ certifi==2023.7.22
     #   requests
 cffi==1.15.1
     # via
-    #   -c requirements/base.txt
     #   azure-datalake-store
     #   cryptography
 charset-normalizer==3.2.0
@@ -42,7 +41,6 @@ charset-normalizer==3.2.0
     #   requests
 cryptography==41.0.2
     # via
-    #   -c requirements/base.txt
     #   azure-identity
     #   azure-storage-blob
     #   msal
@@ -76,9 +74,7 @@ multidict==6.0.4
 portalocker==2.7.0
     # via msal-extensions
 pycparser==2.21
-    # via
-    #   -c requirements/base.txt
-    #   cffi
+    # via cffi
 pyjwt[crypto]==2.8.0
     # via msal
 requests==2.31.0
@@ -89,7 +85,6 @@ requests==2.31.0
     #   msal
 six==1.16.0
     # via
-    #   -c requirements/base.txt
     #   azure-core
     #   azure-identity
     #   isodate
diff --git a/requirements/ingest-box.txt b/requirements/ingest-box.txt
index 81f70b2fa..a00a671c2 100644
--- a/requirements/ingest-box.txt
+++ b/requirements/ingest-box.txt
@@ -8,7 +8,7 @@ attrs==23.1.0
     # via boxsdk
 boxfs==0.2.0
     # via -r requirements/ingest-box.in
-boxsdk[jwt]==3.8.0
+boxsdk[jwt]==3.8.1
     # via boxfs
 certifi==2023.7.22
     # via
@@ -16,17 +16,13 @@ certifi==2023.7.22
     #   -c requirements/constraints.in
     #   requests
 cffi==1.15.1
-    # via
-    #   -c requirements/base.txt
-    #   cryptography
+    # via cryptography
 charset-normalizer==3.2.0
     # via
     #   -c requirements/base.txt
     #   requests
 cryptography==41.0.2
-    # via
-    #   -c requirements/base.txt
-    #   boxsdk
+    # via boxsdk
 fsspec==2023.6.0
     # via
     #   -r requirements/ingest-box.in
@@ -36,15 +32,11 @@ idna==3.4
     #   -c requirements/base.txt
     #   requests
 pycparser==2.21
-    # via
-    #   -c requirements/base.txt
-    #   cffi
+    # via cffi
 pyjwt==2.8.0
     # via boxsdk
 python-dateutil==2.8.2
-    # via
-    #   -c requirements/base.txt
-    #   boxsdk
+    # via boxsdk
 requests==2.31.0
     # via
     #   -c requirements/base.txt
@@ -53,9 +45,7 @@ requests==2.31.0
 requests-toolbelt==1.0.0
     # via boxsdk
 six==1.16.0
-    # via
-    #   -c requirements/base.txt
-    #   python-dateutil
+    # via python-dateutil
 urllib3==1.26.16
     # via
     #   -c requirements/base.txt
diff --git a/requirements/ingest-confluence.txt b/requirements/ingest-confluence.txt
index 31a80b3f8..6239278e5 100644
--- a/requirements/ingest-confluence.txt
+++ b/requirements/ingest-confluence.txt
@@ -1,10 +1,10 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/ingest-confluence.in
 #
-atlassian-python-api==3.39.0
+atlassian-python-api==3.40.0
     # via -r requirements/ingest-confluence.in
 certifi==2023.7.22
     # via
@@ -33,9 +33,7 @@ requests==2.31.0
 requests-oauthlib==1.3.1
     # via atlassian-python-api
 six==1.16.0
-    # via
-    #   -c requirements/base.txt
-    #   atlassian-python-api
+    # via atlassian-python-api
 urllib3==1.26.16
     # via
     #   -c requirements/base.txt
diff --git a/requirements/ingest-discord.txt b/requirements/ingest-discord.txt
index 544398b4c..3f68d17af 100644
--- a/requirements/ingest-discord.txt
+++ b/requirements/ingest-discord.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/ingest-discord.in
diff --git a/requirements/ingest-dropbox.txt b/requirements/ingest-dropbox.txt
index 1bd06426e..42afe8a64 100644
--- a/requirements/ingest-dropbox.txt
+++ b/requirements/ingest-dropbox.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/ingest-dropbox.in
@@ -34,7 +34,6 @@ requests==2.31.0
     #   dropboxdrivefs
 six==1.16.0
     # via
-    #   -c requirements/base.txt
     #   dropbox
     #   stone
 stone==3.3.1
diff --git a/requirements/ingest-elasticsearch.txt b/requirements/ingest-elasticsearch.txt
index 78b7d9623..ccd9575c5 100644
--- a/requirements/ingest-elasticsearch.txt
+++ b/requirements/ingest-elasticsearch.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/ingest-elasticsearch.in
@@ -11,7 +11,7 @@ certifi==2023.7.22
     #   elastic-transport
 elastic-transport==8.4.0
     # via elasticsearch
-elasticsearch==8.8.2
+elasticsearch==8.9.0
     # via -r requirements/ingest-elasticsearch.in
 jq==1.4.1
     # via -r requirements/ingest-elasticsearch.in
diff --git a/requirements/ingest-gcs.txt b/requirements/ingest-gcs.txt
index 129eae20a..eba10abe5 100644
--- a/requirements/ingest-gcs.txt
+++ b/requirements/ingest-gcs.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/ingest-gcs.in
@@ -57,7 +57,7 @@ google-crc32c==1.5.0
     # via google-resumable-media
 google-resumable-media==2.5.0
     # via google-cloud-storage
-googleapis-common-protos==1.59.1
+googleapis-common-protos==1.60.0
     # via google-api-core
 idna==3.4
     # via
@@ -74,7 +74,6 @@ protobuf==4.23.4
     # via
     #   -c requirements/constraints.in
     #   google-api-core
-    #   googleapis-common-protos
 pyasn1==0.5.0
     # via
     #   pyasn1-modules
@@ -93,9 +92,7 @@ requests-oauthlib==1.3.1
 rsa==4.9
     # via google-auth
 six==1.16.0
-    # via
-    #   -c requirements/base.txt
-    #   google-auth
+    # via google-auth
 urllib3==1.26.16
     # via
     #   -c requirements/base.txt
diff --git a/requirements/ingest-github.txt b/requirements/ingest-github.txt
index 3c1504b9c..6f580e3a2 100644
--- a/requirements/ingest-github.txt
+++ b/requirements/ingest-github.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/ingest-github.in
@@ -11,7 +11,6 @@ certifi==2023.7.22
     #   requests
 cffi==1.15.1
     # via
-    #   -c requirements/base.txt
     #   cryptography
     #   pynacl
 charset-normalizer==3.2.0
@@ -19,9 +18,7 @@ charset-normalizer==3.2.0
     #   -c requirements/base.txt
     #   requests
 cryptography==41.0.2
-    # via
-    #   -c requirements/base.txt
-    #   pyjwt
+    # via pyjwt
 deprecated==1.2.14
     # via pygithub
 idna==3.4
@@ -29,9 +26,7 @@ idna==3.4
     #   -c requirements/base.txt
     #   requests
 pycparser==2.21
-    # via
-    #   -c requirements/base.txt
-    #   cffi
+    # via cffi
 pygithub==1.58.2
     # via -r requirements/ingest-github.in
 pyjwt[crypto]==2.8.0
diff --git a/requirements/ingest-gitlab.txt b/requirements/ingest-gitlab.txt
index dbff64042..b0f34d769 100644
--- a/requirements/ingest-gitlab.txt
+++ b/requirements/ingest-gitlab.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/ingest-gitlab.in
diff --git a/requirements/ingest-google-drive.txt b/requirements/ingest-google-drive.txt
index fc48a89ca..8400e3597 100644
--- a/requirements/ingest-google-drive.txt
+++ b/requirements/ingest-google-drive.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/ingest-google-drive.in
@@ -26,7 +26,7 @@ google-auth==2.22.0
     #   google-auth-httplib2
 google-auth-httplib2==0.1.0
     # via google-api-python-client
-googleapis-common-protos==1.59.1
+googleapis-common-protos==1.60.0
     # via google-api-core
 httplib2==0.22.0
     # via
@@ -59,7 +59,6 @@ rsa==4.9
     # via google-auth
 six==1.16.0
     # via
-    #   -c requirements/base.txt
     #   google-auth
     #   google-auth-httplib2
 uritemplate==4.1.1
diff --git a/requirements/ingest-onedrive.txt b/requirements/ingest-onedrive.txt
index 7502a3bdc..c87c1494e 100644
--- a/requirements/ingest-onedrive.txt
+++ b/requirements/ingest-onedrive.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/ingest-onedrive.in
@@ -10,16 +10,13 @@ certifi==2023.7.22
     #   -c requirements/constraints.in
     #   requests
 cffi==1.15.1
-    # via
-    #   -c requirements/base.txt
-    #   cryptography
+    # via cryptography
 charset-normalizer==3.2.0
     # via
     #   -c requirements/base.txt
     #   requests
 cryptography==41.0.2
     # via
-    #   -c requirements/base.txt
     #   -r requirements/ingest-onedrive.in
     #   msal
     #   pyjwt
@@ -34,15 +31,11 @@ msal==1.23.0
 office365-rest-python-client==2.4.2
     # via -r requirements/ingest-onedrive.in
 pycparser==2.21
-    # via
-    #   -c requirements/base.txt
-    #   cffi
+    # via cffi
 pyjwt[crypto]==2.8.0
     # via msal
 pytz==2023.3
-    # via
-    #   -c requirements/base.txt
-    #   office365-rest-python-client
+    # via office365-rest-python-client
 requests==2.31.0
     # via
     #   -c requirements/base.txt
diff --git a/requirements/ingest-outlook.txt b/requirements/ingest-outlook.txt
index f8984bf4a..fd89eab61 100644
--- a/requirements/ingest-outlook.txt
+++ b/requirements/ingest-outlook.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/ingest-outlook.in
@@ -10,16 +10,13 @@ certifi==2023.7.22
     #   -c requirements/constraints.in
     #   requests
 cffi==1.15.1
-    # via
-    #   -c requirements/base.txt
-    #   cryptography
+    # via cryptography
 charset-normalizer==3.2.0
     # via
     #   -c requirements/base.txt
     #   requests
 cryptography==41.0.2
     # via
-    #   -c requirements/base.txt
     #   -r requirements/ingest-outlook.in
     #   msal
     #   pyjwt
@@ -34,15 +31,11 @@ msal==1.23.0
 office365-rest-python-client==2.4.2
     # via -r requirements/ingest-outlook.in
 pycparser==2.21
-    # via
-    #   -c requirements/base.txt
-    #   cffi
+    # via cffi
 pyjwt[crypto]==2.8.0
     # via msal
 pytz==2023.3
-    # via
-    #   -c requirements/base.txt
-    #   office365-rest-python-client
+    # via office365-rest-python-client
 requests==2.31.0
     # via
     #   -c requirements/base.txt
diff --git a/requirements/ingest-reddit.txt b/requirements/ingest-reddit.txt
index 42bffeadd..c7f364fc2 100644
--- a/requirements/ingest-reddit.txt
+++ b/requirements/ingest-reddit.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/ingest-reddit.in
diff --git a/requirements/ingest-s3.txt b/requirements/ingest-s3.txt
index e9801a500..26c6ee4fd 100644
--- a/requirements/ingest-s3.txt
+++ b/requirements/ingest-s3.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/ingest-s3.in
@@ -43,17 +43,11 @@ multidict==6.0.4
     #   aiohttp
     #   yarl
 python-dateutil==2.8.2
-    # via
-    #   -c requirements/base.txt
-    #   botocore
+    # via botocore
 s3fs==2023.6.0
     # via -r requirements/ingest-s3.in
 six==1.16.0
-    # via
-    #   -c requirements/base.txt
-    #   python-dateutil
-typing-extensions==4.7.1
-    # via aioitertools
+    # via python-dateutil
 urllib3==1.26.16
     # via
     #   -c requirements/base.txt
diff --git a/requirements/ingest-slack.txt b/requirements/ingest-slack.txt
index b8c94147b..f88d94e18 100644
--- a/requirements/ingest-slack.txt
+++ b/requirements/ingest-slack.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/ingest-slack.in
diff --git a/requirements/ingest-wikipedia.txt b/requirements/ingest-wikipedia.txt
index 7455fb82c..e25272b66 100644
--- a/requirements/ingest-wikipedia.txt
+++ b/requirements/ingest-wikipedia.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/ingest-wikipedia.in
diff --git a/requirements/test.txt b/requirements/test.txt
index 1e35908d7..11d41d6da 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile requirements/test.in
@@ -26,9 +26,7 @@ coverage[toml]==7.2.7
     # via
     #   -r requirements/test.in
     #   pytest-cov
-exceptiongroup==1.1.2
-    # via pytest
-flake8==6.0.0
+flake8==6.1.0
     # via -r requirements/test.in
 freezegun==1.2.2
     # via -r requirements/test.in
@@ -64,17 +62,17 @@ packaging==23.1
     # via
     #   black
     #   pytest
-pathspec==0.11.1
+pathspec==0.11.2
     # via black
-platformdirs==3.9.1
+platformdirs==3.10.0
     # via black
 pluggy==1.2.0
     # via pytest
-pycodestyle==2.10.0
+pycodestyle==2.11.0
     # via flake8
 pydantic==1.10.12
     # via label-studio-sdk
-pyflakes==3.0.1
+pyflakes==3.1.0
     # via flake8
 pytest==7.4.0
     # via
@@ -85,28 +83,17 @@ pytest-cov==4.1.0
 pytest-mock==3.11.1
     # via -r requirements/test.in
 python-dateutil==2.8.2
-    # via
-    #   -c requirements/base.txt
-    #   freezegun
+    # via freezegun
 pyyaml==6.0.1
     # via vcrpy
 requests==2.31.0
     # via
     #   -c requirements/base.txt
     #   label-studio-sdk
-ruff==0.0.280
+ruff==0.0.281
     # via -r requirements/test.in
 six==1.16.0
-    # via
-    #   -c requirements/base.txt
-    #   python-dateutil
-    #   vcrpy
-tomli==2.0.1
-    # via
-    #   black
-    #   coverage
-    #   mypy
-    #   pytest
+    # via python-dateutil
 types-click==7.1.8
     # via -r requirements/test.in
 types-markdown==3.4.2.10
@@ -119,7 +106,6 @@ types-urllib3==1.26.25.14
     # via types-requests
 typing-extensions==4.7.1
     # via
-    #   black
     #   mypy
     #   pydantic
 urllib3==1.26.16
@@ -127,8 +113,7 @@ urllib3==1.26.16
     #   -c requirements/base.txt
     #   -c requirements/constraints.in
     #   requests
-    #   vcrpy
-vcrpy==5.0.0
+vcrpy==5.1.0
     # via -r requirements/test.in
 wrapt==1.15.0
     # via vcrpy
diff --git a/setup.py b/setup.py
index c05f592d4..eaaec0ce1 100644
--- a/setup.py
+++ b/setup.py
@@ -34,11 +34,48 @@ def load_requirements(file_list: Optional[Union[str, List[str]]] = None) -> List
         with open(file, encoding="utf-8") as f:
             requirements.extend(f.readlines())
     requirements = [
-        req for req in requirements if not req.startswith("#") and not req.startswith("-")
+        req
+        for req in requirements
+        if not req.startswith("#") and not req.startswith("-")
     ]
     return requirements
 
 
+csv_reqs = load_requirements("requirements/extra-csv.in")
+docx_reqs = load_requirements("requirements/extra-docx.in")
+epub_reqs = load_requirements("requirements/extra-pandoc.in")
+image_reqs = load_requirements("requirements/extra-pdf-image.in")
+markdown_reqs = load_requirements("requirements/extra-markdown.in")
+msg_reqs = load_requirements("requirements/extra-msg.in")
+odt_reqs = load_requirements("requirements/extra-odt.in")
+org_reqs = load_requirements("requirements/extra-pandoc.in")
+pdf_reqs = load_requirements("requirements/extra-pdf-image.in")
+pptx_reqs = load_requirements("requirements/extra-pptx.in")
+rtf_reqs = load_requirements("requirements/extra-pandoc.in")
+rst_reqs = load_requirements("requirements/extra-pandoc.in")
+tsv_reqs = load_requirements("requirements/extra-csv.in")
+xlsx_reqs = load_requirements("requirements/extra-xlsx.in")
+
+all_doc_reqs = list(
+    set(
+        csv_reqs
+        + docx_reqs
+        + epub_reqs
+        + image_reqs
+        + markdown_reqs
+        + msg_reqs
+        + odt_reqs
+        + org_reqs
+        + pdf_reqs
+        + pptx_reqs
+        + rtf_reqs
+        + rst_reqs
+        + tsv_reqs
+        + xlsx_reqs,
+    ),
+)
+
+
 setup(
     name="unstructured",
     description="A library that prepares raw documents for downstream ML tasks.",
@@ -71,8 +108,23 @@ setup(
     },
     install_requires=load_requirements(),
     extras_require={
-        "huggingface": load_requirements("requirements/huggingface.in"),
-        "local-inference": load_requirements("requirements/local-inference.in"),
+        # Document specific extra requirements
+        "all-docs": all_doc_reqs,
+        "csv": csv_reqs,
+        "docx": docx_reqs,
+        "epub": epub_reqs,
+        "image": image_reqs,
+        "md": markdown_reqs,
+        "msg": msg_reqs,
+        "odt": odt_reqs,
+        "org": org_reqs,
+        "pdf": pdf_reqs,
+        "pptx": pptx_reqs,
+        "rtf": rtf_reqs,
+        "rst": rst_reqs,
+        "tsv": tsv_reqs,
+        "xlsx": xlsx_reqs,
+        # Extra requirements for data connectors
         "s3": load_requirements("requirements/ingest-s3.in"),
         "azure": load_requirements("requirements/ingest-azure.in"),
         "discord": load_requirements("requirements/ingest-discord.in"),
@@ -89,6 +141,9 @@ setup(
         "onedrive": load_requirements("requirements/ingest-onedrive.in"),
         "outlook": load_requirements("requirements/ingest-outlook.in"),
         "confluence": load_requirements("requirements/ingest-confluence.in"),
+        # Legacy extra requirements
+        "huggingface": load_requirements("requirements/huggingface.in"),
+        "local-inference": all_doc_reqs,
     },
     package_dir={"unstructured": "unstructured"},
     package_data={"unstructured": ["nlp/*.txt"]},
diff --git a/test_unstructured/file_utils/test_filetype.py b/test_unstructured/file_utils/test_filetype.py
index 6aee31a73..d96401b65 100644
--- a/test_unstructured/file_utils/test_filetype.py
+++ b/test_unstructured/file_utils/test_filetype.py
@@ -478,4 +478,4 @@ def test_document_to_element_list_omits_coord_system_when_coord_points_absent():
 def test_get_page_image_metadata_and_coordinate_system():
     doc = MockDocumentLayout()
     metadata = _get_page_image_metadata(doc.pages[0])
-    assert type(metadata) == dict
+    assert isinstance(metadata, dict)
diff --git a/test_unstructured/partition/test_doc.py b/test_unstructured/partition/test_doc.py
index 271b17ff7..bd47749e0 100644
--- a/test_unstructured/partition/test_doc.py
+++ b/test_unstructured/partition/test_doc.py
@@ -267,6 +267,6 @@ def test_partition_doc_from_file_without_metadata_date(
         sf = SpooledTemporaryFile()
         sf.write(f.read())
         sf.seek(0)
-        elements = partition_doc(file=sf, metadata_last_modified=None)
+        elements = partition_doc(file=sf, metadata_date="2020-07-05")
 
-    assert elements[0].metadata.last_modified is None
+    assert elements[0].metadata.date == "2020-07-05"
diff --git a/test_unstructured_ingest/unit/test_interfaces.py b/test_unstructured_ingest/unit/test_interfaces.py
index 1fd948f33..2dacd4161 100644
--- a/test_unstructured_ingest/unit/test_interfaces.py
+++ b/test_unstructured_ingest/unit/test_interfaces.py
@@ -216,8 +216,8 @@ def test_process_file_metadata_exclude_filename_pagenum(mocker, partition_test_r
     isd_elems = test_ingest_doc.process_file()
     assert len(isd_elems)
     for elem in isd_elems:
-        assert "filename" not in elem["metadata"].keys()
-        assert "page_number" not in elem["metadata"].keys()
+        assert "filename" not in elem["metadata"]
+        assert "page_number" not in elem["metadata"]
 
 
 def test_process_file_flatten_metadata(mocker, partition_test_results):
diff --git a/unstructured/__version__.py b/unstructured/__version__.py
index 844274171..e0b78aab5 100644
--- a/unstructured/__version__.py
+++ b/unstructured/__version__.py
@@ -1 +1 @@
-__version__ = "0.8.8"  # pragma: no cover
+__version__ = "0.9.0"  # pragma: no cover
diff --git a/unstructured/file_utils/file_conversion.py b/unstructured/file_utils/file_conversion.py
index e92b47043..23b803ecf 100644
--- a/unstructured/file_utils/file_conversion.py
+++ b/unstructured/file_utils/file_conversion.py
@@ -1,11 +1,14 @@
 import tempfile
 from typing import IO, Optional
 
-import pypandoc
-
 from unstructured.partition.common import exactly_one
+from unstructured.utils import dependency_exists, requires_dependencies
+
+if dependency_exists("pypandoc"):
+    import pypandoc
 
 
+@requires_dependencies(["pypandoc"])
 def convert_file_to_text(filename: str, source_format: str, target_format: str) -> str:
     """Uses pandoc to convert the source document to a raw text string."""
     try:
diff --git a/unstructured/ingest/connector/google_drive.py b/unstructured/ingest/connector/google_drive.py
index 67e905e60..02b5a07be 100644
--- a/unstructured/ingest/connector/google_drive.py
+++ b/unstructured/ingest/connector/google_drive.py
@@ -76,7 +76,7 @@ class SimpleGoogleDriveConfig(BaseConnectorConfig):
     recursive: bool = False
 
     def __post_init__(self):
-        if self.extension and self.extension not in EXT_TO_FILETYPE.keys():
+        if self.extension and self.extension not in EXT_TO_FILETYPE:
             raise ValueError(
                 f"Extension not supported. "
                 f"Value MUST be one of {', '.join([k for k in EXT_TO_FILETYPE if k is not None])}.",
diff --git a/unstructured/ingest/connector/onedrive.py b/unstructured/ingest/connector/onedrive.py
index 313ebeafd..24d0e5edc 100644
--- a/unstructured/ingest/connector/onedrive.py
+++ b/unstructured/ingest/connector/onedrive.py
@@ -65,7 +65,7 @@ class OneDriveIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
         if not self.ext:
             raise ValueError("Unsupported file without extension.")
 
-        if self.ext not in EXT_TO_FILETYPE.keys():
+        if self.ext not in EXT_TO_FILETYPE:
             raise ValueError(
                 f"Extension not supported. "
                 f"Value MUST be one of {', '.join([k for k in EXT_TO_FILETYPE if k is not None])}.",
diff --git a/unstructured/partition/auto.py b/unstructured/partition/auto.py
index ea56b0ee0..9383256d1 100644
--- a/unstructured/partition/auto.py
+++ b/unstructured/partition/auto.py
@@ -13,27 +13,58 @@ from unstructured.file_utils.filetype import (
 )
 from unstructured.logger import logger
 from unstructured.partition.common import exactly_one
-from unstructured.partition.csv import partition_csv
-from unstructured.partition.doc import partition_doc
-from unstructured.partition.docx import partition_docx
 from unstructured.partition.email import partition_email
-from unstructured.partition.epub import partition_epub
 from unstructured.partition.html import partition_html
-from unstructured.partition.image import partition_image
 from unstructured.partition.json import partition_json
-from unstructured.partition.md import partition_md
-from unstructured.partition.msg import partition_msg
-from unstructured.partition.odt import partition_odt
-from unstructured.partition.org import partition_org
-from unstructured.partition.pdf import partition_pdf
-from unstructured.partition.ppt import partition_ppt
-from unstructured.partition.pptx import partition_pptx
-from unstructured.partition.rst import partition_rst
-from unstructured.partition.rtf import partition_rtf
 from unstructured.partition.text import partition_text
-from unstructured.partition.tsv import partition_tsv
-from unstructured.partition.xlsx import partition_xlsx
 from unstructured.partition.xml import partition_xml
+from unstructured.utils import dependency_exists
+
+if dependency_exists("pandas"):
+    from unstructured.partition.csv import partition_csv
+    from unstructured.partition.tsv import partition_tsv
+
+
+if dependency_exists("docx"):
+    from unstructured.partition.doc import partition_doc
+    from unstructured.partition.docx import partition_docx
+
+
+if dependency_exists("docx") and dependency_exists("pypandoc"):
+    from unstructured.partition.odt import partition_odt
+
+
+if dependency_exists("pypandoc"):
+    from unstructured.partition.epub import partition_epub
+    from unstructured.partition.org import partition_org
+    from unstructured.partition.rst import partition_rst
+    from unstructured.partition.rtf import partition_rtf
+
+
+if dependency_exists("markdown"):
+    from unstructured.partition.md import partition_md
+
+
+if dependency_exists("msg_parser"):
+    from unstructured.partition.msg import partition_msg
+
+
+pdf_imports = ["pdf2image", "pdfminer", "PIL"]
+if all(dependency_exists(dep) for dep in pdf_imports):
+    from unstructured.partition.pdf import partition_pdf
+
+
+if dependency_exists("unstructured_inference"):
+    from unstructured.partition.image import partition_image
+
+
+if dependency_exists("pptx"):
+    from unstructured.partition.ppt import partition_ppt
+    from unstructured.partition.pptx import partition_pptx
+
+
+if dependency_exists("pandas") and dependency_exists("openpyxl"):
+    from unstructured.partition.xlsx import partition_xlsx
 
 
 def partition(
diff --git a/unstructured/partition/common.py b/unstructured/partition/common.py
index 2b38a5d67..5e75387e1 100644
--- a/unstructured/partition/common.py
+++ b/unstructured/partition/common.py
@@ -7,7 +7,6 @@ from io import BufferedReader, BytesIO, TextIOWrapper
 from tempfile import SpooledTemporaryFile
 from typing import IO, TYPE_CHECKING, Any, BinaryIO, Dict, List, Optional, Tuple, Union
 
-from docx import table as docxtable
 from tabulate import tabulate
 
 from unstructured.documents.coordinates import CoordinateSystem
@@ -23,6 +22,10 @@ from unstructured.documents.elements import (
 )
 from unstructured.logger import logger
 from unstructured.nlp.patterns import ENUMERATED_BULLETS_RE, UNICODE_BULLETS_RE
+from unstructured.utils import dependency_exists
+
+if dependency_exists("docx"):
+    import docx.table as docxtable
 
 if TYPE_CHECKING:
     from unstructured_inference.inference.layoutelement import (
@@ -303,12 +306,12 @@ def convert_to_bytes(
     return f_bytes
 
 
-def convert_ms_office_table_to_text(table: docxtable.Table, as_html: bool = True):
+def convert_ms_office_table_to_text(table: "docxtable.Table", as_html: bool = True) -> str:
     """
     Convert a table object from a Word document to an HTML table string using the tabulate library.
 
     Args:
-        table (Table): A Table object.
+        table (Table): A docx.table.Table object.
         as_html (bool): Whether to return the table as an HTML string (True) or a
             plain text string (False)
 
diff --git a/unstructured/partition/docx.py b/unstructured/partition/docx.py
index fadee72d7..26ae804d7 100644
--- a/unstructured/partition/docx.py
+++ b/unstructured/partition/docx.py
@@ -4,7 +4,6 @@ from tempfile import SpooledTemporaryFile
 from typing import IO, BinaryIO, List, Optional, Tuple, Union, cast
 
 import docx
-import pypandoc
 from docx.oxml.shared import qn
 from docx.text.paragraph import Paragraph
 from docx.text.run import Run
@@ -38,6 +37,10 @@ from unstructured.partition.text_type import (
     is_possible_title,
     is_us_city_state_zip,
 )
+from unstructured.utils import dependency_exists
+
+if dependency_exists("pypandoc"):
+    import pypandoc
 
 # NOTE(robinson) - documentation on built in styles can be found at the link below
 # ref: https://python-docx.readthedocs.io/en/latest/user/
@@ -314,7 +317,7 @@ def convert_and_partition_docx(
     metadata_filename: Optional[str] = None,
     metadata_last_modified: Optional[str] = None,
 ) -> List[Element]:
-    """Converts a document to DOCX and then partitions it using partition_html. Works with
+    """Converts a document to DOCX and then partitions it using partition_docx. Works with
     any file format support by pandoc.
 
     Parameters
diff --git a/unstructured/staging/base.py b/unstructured/staging/base.py
index 5c1a691bc..6dd5667f9 100644
--- a/unstructured/staging/base.py
+++ b/unstructured/staging/base.py
@@ -3,8 +3,6 @@ import io
 import json
 from typing import Any, Dict, List, Optional
 
-import pandas as pd
-
 from unstructured.documents.elements import (
     TYPE_TO_TEXT_ELEMENT_MAP,
     CheckBox,
@@ -13,6 +11,10 @@ from unstructured.documents.elements import (
     NoID,
 )
 from unstructured.partition.common import exactly_one
+from unstructured.utils import dependency_exists, requires_dependencies
+
+if dependency_exists("pandas"):
+    import pandas as pd
 
 
 def _get_metadata_table_fieldnames():
@@ -161,7 +163,7 @@ def convert_to_isd_csv(elements: List[Element]) -> str:
 
         if row.get("sent_from"):
             row["sender"] = row.get("sent_from")
-            if type(row["sender"]) == list:
+            if isinstance(row["sender"], list):
                 row["sender"] = row["sender"][0]
 
     with io.StringIO() as buffer:
@@ -176,11 +178,14 @@ def convert_to_csv(elements: List[Element]) -> str:
     return convert_to_isd_csv(elements)
 
 
-def convert_to_dataframe(elements: List[Element], drop_empty_cols: bool = True) -> pd.DataFrame:
+@requires_dependencies(["pandas"])
+def convert_to_dataframe(elements: List[Element], drop_empty_cols: bool = True) -> "pd.DataFrame":
     """Converts document elements to a pandas DataFrame. The dataframe contains the
     following columns:
         text: the element text
         type: the text type (NarrativeText, Title, etc)
+
+    Output is pd.DataFrame
     """
     csv_string = convert_to_isd_csv(elements)
     csv_string_io = io.StringIO(csv_string)