diff --git a/.github/actions/base-ingest-cache/action.yml b/.github/actions/base-ingest-cache/action.yml
index b83a833cf..f29d86764 100644
--- a/.github/actions/base-ingest-cache/action.yml
+++ b/.github/actions/base-ingest-cache/action.yml
@@ -39,7 +39,7 @@ runs:
           python -m pip install --upgrade setuptools
         fi
         make install-ci
-        make install-all-ingest
+        make install-ingest
     - name: Save Ingest Cache
       if: steps.ingest-virtualenv-cache-restore.outputs.cache-hit != 'true'
       id: ingest-virtualenv-cache-save
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 142578885..12c261ffb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -72,7 +72,6 @@ jobs:
       - name: Install all doc and test dependencies
         run: |
           make install-ci
-          make install-all-ingest
           make check-licenses
 
   lint:
@@ -273,37 +272,6 @@ jobs:
           python-version: ${{ matrix.python-version }}
           check-only: 'true'
 
-  test_ingest_unit:
-    strategy:
-      matrix:
-        python-version: [ "3.9","3.10" ]
-    runs-on: ubuntu-latest
-    needs: [ setup_ingest, lint ]
-    steps:
-      # actions/checkout MUST come before auth
-      - uses: 'actions/checkout@v4'
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Get full Python version
-        id: full-python-version
-        run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
-      - name: Setup virtual environment
-        uses: ./.github/actions/base-ingest-cache
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Test Ingest (unit)
-        env:
-           NLTK_DATA: ${{ github.workspace }}/nltk_data
-           PYTHON: python${{ matrix.python-version }}
-        run: |
-          source .venv/bin/activate
-          make install-ci
-          make install-all-ingest
-          PYTHONPATH=. ${PYTHON} -m pytest test_unstructured_ingest/unit
-
-
   test_ingest_src:
     strategy:
       matrix:
@@ -378,8 +346,6 @@ jobs:
         PYTHON: python${{ matrix.python-version }}
       run: |
         source .venv/bin/activate
-        make install-ci
-        make install-all-ingest
         sudo apt-get update
         sudo apt-get install -y libmagic-dev poppler-utils libreoffice
         make install-pandoc
@@ -392,103 +358,6 @@ jobs:
         ./test_unstructured_ingest/test-ingest-src.sh
 
 
-  test_ingest_dest:
-    environment: ci
-    strategy:
-      matrix:
-        python-version: ["3.9","3.10"]
-    runs-on: ubuntu-latest-m
-    needs: [setup_ingest, lint]
-    steps:
-    # actions/checkout MUST come before auth
-    - uses: 'actions/checkout@v4'
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Get full Python version
-      id: full-python-version
-      run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
-    - name: Setup virtual environment
-      uses: ./.github/actions/base-ingest-cache
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Setup docker-compose
-      uses: KengoTODA/actions-setup-docker-compose@v1
-      with:
-        version: '2.22.0'
-    - name: Test (end-to-end)
-      env:
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-        S3_INGEST_TEST_ACCESS_KEY: ${{ secrets.S3_INGEST_TEST_ACCESS_KEY }}
-        S3_INGEST_TEST_SECRET_KEY: ${{ secrets.S3_INGEST_TEST_SECRET_KEY }}
-        AZURE_SEARCH_ENDPOINT: ${{ secrets.AZURE_SEARCH_ENDPOINT }}
-        AZURE_SEARCH_API_KEY: ${{ secrets.AZURE_SEARCH_API_KEY }}
-        BOX_APP_CONFIG: ${{ secrets.BOX_APP_CONFIG }}
-        DROPBOX_APP_KEY: ${{ secrets.DROPBOX_APP_KEY }}
-        DROPBOX_APP_SECRET: ${{ secrets.DROPBOX_APP_SECRET }}
-        DROPBOX_REFRESH_TOKEN: ${{ secrets.DROPBOX_REFRESH_TOKEN }}
-        GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }}
-        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        MONGODB_URI: ${{ secrets.MONGODB_URI }}
-        MONGODB_DATABASE_NAME: ${{ secrets.MONGODB_DATABASE_NAME }}
-        AZURE_DEST_CONNECTION_STR: ${{ secrets.AZURE_DEST_CONNECTION_STR }}
-        PINECONE_API_KEY: ${{secrets.PINECONE_API_KEY}}
-        VECTARA_OAUTH_CLIENT_ID: ${{secrets.VECTARA_OAUTH_CLIENT_ID}}
-        VECTARA_OAUTH_SECRET: ${{secrets.VECTARA_OAUTH_SECRET}}
-        VECTARA_CUSTOMER_ID: ${{secrets.VECTARA_CUSTOMER_ID}}
-        ASTRA_DB_APPLICATION_TOKEN: ${{secrets.ASTRA_DB_TOKEN}}
-        ASTRA_DB_API_ENDPOINT: ${{secrets.ASTRA_DB_ENDPOINT}}
-        CLARIFAI_API_KEY: ${{secrets.CLARIFAI_API_KEY}}
-        DATABRICKS_HOST: ${{secrets.DATABRICKS_HOST}}
-        DATABRICKS_USERNAME: ${{secrets.DATABRICKS_USERNAME}}
-        DATABRICKS_PASSWORD: ${{secrets.DATABRICKS_PASSWORD}}
-        DATABRICKS_CATALOG: ${{secrets.DATABRICKS_CATALOG}}
-        OCR_AGENT: "unstructured.partition.utils.ocr_models.tesseract_ocr.OCRAgentTesseract"
-        CI: "true"
-        NLTK_DATA: ${{ github.workspace }}/nltk_data
-        PYTHON: python${{ matrix.python-version }}
-      run: |
-        source .venv/bin/activate
-        make install-ci
-        make install-all-ingest
-        sudo apt-get update
-        sudo apt-get install -y libmagic-dev poppler-utils libreoffice
-        make install-pandoc
-        sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5
-        sudo apt-get update
-        sudo apt-get install -y tesseract-ocr
-        sudo apt-get install -y tesseract-ocr-kor
-        sudo apt-get install diffstat
-        tesseract --version
-        ./test_unstructured_ingest/test-ingest-dest.sh
-
-  test_ingest_help:
-    environment: ci
-    strategy:
-      matrix:
-        python-version: ["3.9","3.10","3.11", "3.12"]
-    runs-on: ubuntu-latest
-    needs: [setup_ingest, lint]
-    steps:
-      - uses: 'actions/checkout@v4'
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Setup virtual environment
-        uses: ./.github/actions/base-ingest-cache
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Validate --help
-        run: |
-          source .venv/bin/activate
-          make install-ci
-          make install-all-ingest
-          ./test_unstructured_ingest/test-help.sh
-
-
   test_unstructured_api_unit:
     strategy:
       matrix:
diff --git a/.github/workflows/ingest-test-fixtures-update-pr.yml b/.github/workflows/ingest-test-fixtures-update-pr.yml
index d22a5aab9..f724e8dfc 100644
--- a/.github/workflows/ingest-test-fixtures-update-pr.yml
+++ b/.github/workflows/ingest-test-fixtures-update-pr.yml
@@ -105,6 +105,7 @@ jobs:
           sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5
           sudo apt-get install -y tesseract-ocr
           sudo apt-get install -y tesseract-ocr-kor
+          sudo apt-get install diffstat
           tesseract --version
           ./test_unstructured_ingest/test-ingest-src.sh
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 293ffe581..68ccf02a1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,12 @@
-## 0.15.15-dev0
+## 0.16.0
 
 ### Enhancements
 
+* **Remove ingest implementation.** The deprecated ingest functionality has been removed, as it is now maintained in the separate [unstructured-ingest](https://github.com/Unstructured-IO/unstructured-ingest) repository.
+  * Replace extras in `requirements/ingest` directory with a new `ingest.txt` extra for installing the `unstructured-ingest` library.
+  * Remove the `unstructured.ingest` submodule.
+  * Delete all shell scripts previously used for destination ingest tests.
+
 ### Features
 
 ### Fixes
diff --git a/MANIFEST.in b/MANIFEST.in
index e5c39fc29..e4c7d4da5 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -15,45 +15,3 @@ include requirements/extra-pptx.in
 include requirements/extra-xlsx.in
 include requirements/huggingface.in
 
-# Ingest extras
-include requirements/ingest/airtable.in
-include requirements/ingest/astradb.in
-include requirements/ingest/azure-cognitive-search.in
-include requirements/ingest/azure.in
-include requirements/ingest/biomed.in
-include requirements/ingest/box.in
-include requirements/ingest/chroma.in
-include requirements/ingest/confluence.in
-include requirements/ingest/databricks-volumes.in
-include requirements/ingest/delta-table.in
-include requirements/ingest/discord.in
-include requirements/ingest/dropbox.in
-include requirements/ingest/elasticsearch.in
-include requirements/ingest/embed-aws-bedrock.in
-include requirements/ingest/embed-huggingface.in
-include requirements/ingest/embed-mixedbreadai.in
-include requirements/ingest/embed-openai.in
-include requirements/ingest/gcs.in
-include requirements/ingest/github.in
-include requirements/ingest/gitlab.in
-include requirements/ingest/google-drive.in
-include requirements/ingest/hubspot.in
-include requirements/ingest/jira.in
-include requirements/ingest/kafka.in
-include requirements/ingest/mongodb.in
-include requirements/ingest/notion.in
-include requirements/ingest/onedrive.in
-include requirements/ingest/opensearch.in
-include requirements/ingest/outlook.in
-include requirements/ingest/pinecone.in
-include requirements/ingest/postgres.in
-include requirements/ingest/qdrant.in
-include requirements/ingest/reddit.in
-include requirements/ingest/s3.in
-include requirements/ingest/salesforce.in
-include requirements/ingest/sftp.in
-include requirements/ingest/sharepoint.in
-include requirements/ingest/slack.in
-include requirements/ingest/singlestore.in
-include requirements/ingest/weaviate.in
-include requirements/ingest/wikipedia.in
diff --git a/Makefile b/Makefile
index d9a3e1803..714992a83 100644
--- a/Makefile
+++ b/Makefile
@@ -99,171 +99,9 @@ install-xlsx:
 .PHONY: install-all-docs
 install-all-docs: install-base install-csv install-docx install-epub install-odt install-pypandoc install-markdown install-pdf-image install-pptx install-xlsx
 
-.PHONY: install-all-ingest
-install-all-ingest:
-	find requirements/ingest -type f -name "*.txt" -exec ${PYTHON} -m pip install -r '{}' ';'
-
-
-.PHONY: install-ingest-google-drive
-install-ingest-google-drive:
-	${PYTHON} -m pip install -r requirements/ingest/google-drive.txt
-
-## install-ingest-s3:       install requirements for the s3 connector
-.PHONY: install-ingest-s3
-install-ingest-s3:
-	${PYTHON} -m pip install -r requirements/ingest/s3.txt
-
-.PHONY: install-ingest-gcs
-install-ingest-gcs:
-	${PYTHON} -m pip install -r requirements/ingest/gcs.txt
-
-.PHONY: install-ingest-dropbox
-install-ingest-dropbox:
-	${PYTHON} -m pip install -r requirements/ingest/dropbox.txt
-
-.PHONY: install-ingest-azure
-install-ingest-azure:
-	${PYTHON} -m pip install -r requirements/ingest/azure.txt
-
-.PHONY: install-ingest-box
-install-ingest-box:
-	${PYTHON} -m pip install -r requirements/ingest/box.txt
-
-.PHONY: install-ingest-delta-table
-install-ingest-delta-table:
-	${PYTHON} -m pip install -r requirements/ingest/delta-table.txt
-
-.PHONY: install-ingest-discord
-install-ingest-discord:
-	${PYTHON} -m pip install -r requirements/ingest/discord.txt
-
-.PHONY: install-ingest-github
-install-ingest-github:
-	${PYTHON} -m pip install -r requirements/ingest/github.txt
-
-.PHONY: install-ingest-biomed
-install-ingest-biomed:
-	${PYTHON} -m pip install -r requirements/ingest/biomed.txt
-
-.PHONY: install-ingest-gitlab
-install-ingest-gitlab:
-	${PYTHON} -m pip install -r requirements/ingest/gitlab.txt
-
-.PHONY: install-ingest-onedrive
-install-ingest-onedrive:
-	${PYTHON} -m pip install -r requirements/ingest/onedrive.txt
-
-.PHONY: install-ingest-outlook
-install-ingest-outlook:
-	${PYTHON} -m pip install -r requirements/ingest/outlook.txt
-
-.PHONY: install-ingest-reddit
-install-ingest-reddit:
-	${PYTHON} -m pip install -r requirements/ingest/reddit.txt
-
-.PHONY: install-ingest-slack
-install-ingest-slack:
-	${PYTHON} -m pip install -r requirements/ingest/slack.txt
-
-.PHONY: install-ingest-kafka
-install-ingest-kafka:
-	${PYTHON} -m pip install -r requirements/ingest/kafka.txt
-
-.PHONY: install-ingest-wikipedia
-install-ingest-wikipedia:
-	${PYTHON} -m pip install -r requirements/ingest/wikipedia.txt
-
-.PHONY: install-ingest-elasticsearch
-install-ingest-elasticsearch:
-	${PYTHON} -m pip install -r requirements/ingest/elasticsearch.txt
-
-.PHONY: install-ingest-opensearch
-install-ingest-opensearch:
-	${PYTHON} -m pip install -r requirements/ingest/opensearch.txt
-
-.PHONY: install-ingest-confluence
-install-ingest-confluence:
-	${PYTHON} -m pip install -r requirements/ingest/confluence.txt
-
-.PHONY: install-ingest-airtable
-install-ingest-airtable:
-	${PYTHON} -m pip install -r requirements/ingest/airtable.txt
-
-.PHONY: install-ingest-sharepoint
-install-ingest-sharepoint:
-	${PYTHON} -m pip install -r requirements/ingest/sharepoint.txt
-
-.PHONY: install-ingest-singlestore
-install-ingest-singlestore:
-	${PYTHON} -m pip install -r requirements/ingest/singlestore.txt
-
-.PHONY: install-ingest-weaviate
-install-ingest-weaviate:
-	${PYTHON} -m pip install -r requirements/ingest/weaviate.txt
-
-.PHONY: install-ingest-local
-install-ingest-local:
-	echo "no unique dependencies for local connector"
-
-.PHONY: install-ingest-notion
-install-ingest-notion:
-	${PYTHON} -m pip install -r requirements/ingest/notion.txt
-
-.PHONY: install-ingest-salesforce
-install-ingest-salesforce:
-	${PYTHON} -m pip install -r requirements/ingest/salesforce.txt
-
-.PHONY: install-ingest-jira
-install-ingest-jira:
-	${PYTHON} -m pip install -r requirements/ingest/jira.txt
-
-.PHONY: install-ingest-hubspot
-install-ingest-hubspot:
-	${PYTHON} -m pip install -r requirements/ingest/hubspot.txt
-
-.PHONY: install-ingest-sftp
-install-ingest-sftp:
-	${PYTHON} -m pip install -r requirements/ingest/sftp.txt
-
-.PHONY: install-ingest-pinecone
-install-ingest-pinecone:
-	${PYTHON} -m pip install -r requirements/ingest/pinecone.txt
-
-.PHONY: install-ingest-qdrant
-install-ingest-qdrant:
-	${PYTHON} -m pip install -r requirements/ingest/qdrant.txt
-
-.PHONY: install-ingest-chroma
-install-ingest-chroma:
-	${PYTHON} -m pip install -r requirements/ingest/chroma.txt
-
-.PHONY: install-ingest-postgres
-install-ingest-postgres:
-	${PYTHON} -m pip install -r requirements/ingest/postgres.txt
-
-.PHONY: install-ingest-mongodb
-install-ingest-mongodb:
-	${PYTHON} -m pip install -r requirements/ingest/mongodb.txt
-
-.PHONY: install-ingest-databricks-volumes
-install-ingest-databricks-volumes:
-	${PYTHON} -m pip install -r requirements/ingest/databricks-volumes.txt
-
-.PHONY: install-ingest-astradb
-install-ingest-astradb:
-	${PYTHON} -m pip install -r requirements/ingest/astradb.txt
-
-.PHONY: install-ingest-clarifai
-install-ingest-clarifai:
-	${PYTHON} -m pip install -r requirements/ingest/clarifai.txt
-
-.PHONY: install-embed-huggingface
-install-embed-huggingface:
-	${PYTHON} -m pip install -r requirements/ingest/embed-huggingface.txt
-
-.PHONY: install-unstructured-inference
-install-unstructured-inference:
-	${PYTHON} -m pip install -r requirements/ingest/local-inference.txt
+.PHONY: install-ingest
+install-ingest:
+	python3 -m pip install -r requirements/ingest/ingest.txt
 
 ## install-local-inference: installs requirements for local inference
 .PHONY: install-local-inference
@@ -367,7 +205,7 @@ test-extra-xlsx:
 
 ## check:                   runs linters (includes tests)
 .PHONY: check
-check: check-ruff check-black check-flake8 check-version check-flake8-print
+check: check-ruff check-black check-flake8 check-version
 
 .PHONY: check-shfmt
 check-shfmt:
@@ -385,12 +223,6 @@ check-flake8:
 check-licenses:
 	@scripts/check-licenses.sh
 
-# Check for print statements in ingest since anything going to console should be using the ingest logger
-# as it has a built in filter to redact sensitive information
-.PHONY: check-flake8-print
-check-flake8-print:
-	flake8 --per-file-ignores "" ./unstructured/ingest
-
 .PHONY: check-ruff
 check-ruff:
     # -- ruff options are determined by pyproject.toml --
diff --git a/docs/requirements.in b/docs/requirements.in
index 27a82d80c..46b71caac 100644
--- a/docs/requirements.in
+++ b/docs/requirements.in
@@ -22,4 +22,4 @@ furo==2023.7.26
 certifi>=2022.12.07
 
 # NOTE(ronny) - Added to suppress Sphinx warnings
-myst-parser
\ No newline at end of file
+myst-parser
diff --git a/docs/requirements.txt b/docs/requirements.txt
index ee5fdd1d2..e20c1267e 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -10,17 +10,17 @@ babel==2.13.1
     # via sphinx
 beautifulsoup4==4.12.2
     # via
-    #   -c base.txt
+    #   -c ./deps/base.txt
     #   furo
 certifi==2023.11.17
     # via
-    #   -c base.txt
+    #   -c ./deps/base.txt
     #   -c constraints.in
     #   -r build.in
     #   requests
 charset-normalizer==3.3.2
     # via
-    #   -c base.txt
+    #   -c ./deps/base.txt
     #   requests
 docutils==0.18.1
     # via
@@ -32,7 +32,7 @@ furo==2023.7.26
     # via -r build.in
 idna==3.6
     # via
-    #   -c base.txt
+    #   -c ./deps/base.txt
     #   requests
 imagesize==1.4.1
     # via sphinx
@@ -56,7 +56,7 @@ myst-parser==2.0.0
     # via -r build.in
 packaging==23.2
     # via
-    #   -c base.txt
+    #   -c ./deps/base.txt
     #   sphinx
 pygments==2.17.2
     # via
@@ -69,13 +69,13 @@ pyyaml==6.0.1
     # via myst-parser
 requests==2.31.0
     # via
-    #   -c base.txt
+    #   -c ./deps/base.txt
     #   sphinx
 snowballstemmer==2.2.0
     # via sphinx
 soupsieve==2.5
     # via
-    #   -c base.txt
+    #   -c ./deps/base.txt
     #   beautifulsoup4
 sphinx==6.2.1
     # via
@@ -118,7 +118,7 @@ sphinxcontrib-serializinghtml==1.1.5
     #   sphinx
 urllib3==1.26.18
     # via
-    #   -c base.txt
+    #   -c ./deps/base.txt
     #   -c constraints.in
     #   requests
 zipp==3.17.0
diff --git a/requirements/Makefile b/requirements/Makefile
index 9c4175401..9e6b685fc 100644
--- a/requirements/Makefile
+++ b/requirements/Makefile
@@ -3,12 +3,8 @@ SHELL := /bin/bash
 BASE_REQUIREMENTS := $(shell ls ./*.in)
 BASE_REQUIREMENTSTXT := $(patsubst %.in,%.txt,$(BASE_REQUIREMENTS))
 
-INGEST_REQUIREMENTS := $(shell ls ./ingest/*.in)
-INGEST_REQUIREMENTSTXT := $(patsubst %.in,%.txt,$(INGEST_REQUIREMENTS))
-
-
 .PHONY: all
-all: compile-all-base compile-ingest
+all: compile-all-base
 
 .PHONY: compile-test
 compile-test:
@@ -26,18 +22,9 @@ compile-base:
 compile-all-base: compile-base compile-test compile-dev
 	@$(foreach file,$(BASE_REQUIREMENTS),echo -e "\n\ncompiling: $(file)" && pip-compile --no-strip-extras --upgrade $(file) || exit;)
 
-.PHONY: compile-ingest
-compile-ingest:
-	@$(foreach file,$(INGEST_REQUIREMENTS),echo -e "\n\ncompiling: $(file)" && pip-compile --no-strip-extras --upgrade $(file) || exit;)
-
 .PHONY: clean
-clean: clean-base clean-ingest
+clean: clean-base
 
 .PHONY: clean-base
 clean-base:
-	rm $(BASE_REQUIREMENTSTXT)
-
-.PHONY: clean-ingest
-clean-ingest:
-	rm $(INGEST_REQUIREMENTSTXT)
-
+	rm $(BASE_REQUIREMENTSTXT)
\ No newline at end of file
diff --git a/requirements/base.txt b/requirements/base.txt
index b0f454c0f..5ff129c06 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile ./base.in
 #
-anyio==4.5.0
+anyio==4.6.0
     # via httpx
 backoff==2.2.1
     # via -r ./base.in
@@ -36,7 +36,7 @@ dataclasses-json==0.6.7
     #   unstructured-client
 deepdiff==8.0.1
     # via unstructured-client
-emoji==2.13.0
+emoji==2.14.0
     # via -r ./base.in
 exceptiongroup==1.2.2
     # via anyio
@@ -44,7 +44,7 @@ filetype==1.2.0
     # via -r ./base.in
 h11==0.14.0
     # via httpcore
-httpcore==1.0.5
+httpcore==1.0.6
     # via httpx
 httpx==0.27.2
     # via unstructured-client
@@ -88,7 +88,7 @@ psutil==6.0.0
     # via -r ./base.in
 pycparser==2.22
     # via cffi
-pypdf==5.0.0
+pypdf==5.0.1
     # via unstructured-client
 python-dateutil==2.9.0.post0
     # via unstructured-client
@@ -98,7 +98,7 @@ python-magic==0.4.27
     # via -r ./base.in
 python-oxmsg==0.0.1
     # via -r ./base.in
-rapidfuzz==3.9.7
+rapidfuzz==3.10.0
     # via -r ./base.in
 regex==2024.9.11
     # via nltk
@@ -130,7 +130,6 @@ typing-extensions==4.12.2
     # via
     #   -r ./base.in
     #   anyio
-    #   emoji
     #   pypdf
     #   python-oxmsg
     #   typing-inspect
@@ -140,7 +139,9 @@ typing-inspect==0.9.0
     #   dataclasses-json
     #   unstructured-client
 unstructured-client==0.25.9
-    # via -r ./base.in
+    # via
+    #   -c ././deps/constraints.txt
+    #   -r ./base.in
 urllib3==1.26.20
     # via
     #   -c ././deps/constraints.txt
diff --git a/requirements/cache.txt b/requirements/cache.txt
deleted file mode 100644
index d229daaec..000000000
--- a/requirements/cache.txt
+++ /dev/null
@@ -1 +0,0 @@
-# a
\ No newline at end of file
diff --git a/requirements/dev.txt b/requirements/dev.txt
index 4df21d1ab..3ce9e87d6 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -38,7 +38,7 @@ platformdirs==4.3.6
     #   virtualenv
 pre-commit==3.8.0
     # via -r ./dev.in
-pyproject-hooks==1.1.0
+pyproject-hooks==1.2.0
     # via
     #   build
     #   pip-tools
@@ -46,12 +46,12 @@ pyyaml==6.0.2
     # via
     #   -c ./test.txt
     #   pre-commit
-tomli==2.0.1
+tomli==2.0.2
     # via
     #   -c ./test.txt
     #   build
     #   pip-tools
-virtualenv==20.26.5
+virtualenv==20.26.6
     # via pre-commit
 wheel==0.44.0
     # via pip-tools
diff --git a/requirements/extra-csv.txt b/requirements/extra-csv.txt
index f606a04a4..1896204fb 100644
--- a/requirements/extra-csv.txt
+++ b/requirements/extra-csv.txt
@@ -8,7 +8,7 @@ numpy==1.26.4
     # via
     #   -c ./base.txt
     #   pandas
-pandas==2.2.2
+pandas==2.2.3
     # via -r ./extra-csv.in
 python-dateutil==2.9.0.post0
     # via
@@ -20,5 +20,5 @@ six==1.16.0
     # via
     #   -c ./base.txt
     #   python-dateutil
-tzdata==2024.1
+tzdata==2024.2
     # via pandas
diff --git a/requirements/extra-paddleocr.txt b/requirements/extra-paddleocr.txt
index 048822000..db0079f9f 100644
--- a/requirements/extra-paddleocr.txt
+++ b/requirements/extra-paddleocr.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile ./extra-paddleocr.in
 #
-anyio==4.5.0
+anyio==4.6.0
     # via
     #   -c ./base.txt
     #   httpx
@@ -32,13 +32,13 @@ exceptiongroup==1.2.2
     # via
     #   -c ./base.txt
     #   anyio
-fonttools==4.53.1
+fonttools==4.54.1
     # via matplotlib
 h11==0.14.0
     # via
     #   -c ./base.txt
     #   httpcore
-httpcore==1.0.5
+httpcore==1.0.6
     # via
     #   -c ./base.txt
     #   httpx
@@ -127,7 +127,7 @@ python-dateutil==2.9.0.post0
     #   matplotlib
 pyyaml==6.0.2
     # via unstructured-paddleocr
-rapidfuzz==3.9.7
+rapidfuzz==3.10.0
     # via
     #   -c ./base.txt
     #   unstructured-paddleocr
diff --git a/requirements/extra-pdf-image.txt b/requirements/extra-pdf-image.txt
index 29ace44ca..a7d3ce8cf 100644
--- a/requirements/extra-pdf-image.txt
+++ b/requirements/extra-pdf-image.txt
@@ -42,7 +42,7 @@ filelock==3.16.1
     #   transformers
 flatbuffers==24.3.25
     # via onnxruntime
-fonttools==4.53.1
+fonttools==4.54.1
     # via matplotlib
 fsspec==2024.9.0
     # via
@@ -60,14 +60,14 @@ googleapis-common-protos==1.65.0
     # via
     #   google-api-core
     #   grpcio-status
-grpcio==1.66.1
+grpcio==1.66.2
     # via
     #   -c ././deps/constraints.txt
     #   google-api-core
     #   grpcio-status
 grpcio-status==1.62.3
     # via google-api-core
-huggingface-hub==0.25.0
+huggingface-hub==0.25.1
     # via
     #   timm
     #   tokenizers
@@ -119,7 +119,7 @@ numpy==1.26.4
     #   transformers
 omegaconf==2.3.0
     # via effdet
-onnx==1.16.2
+onnx==1.17.0
     # via
     #   -r ./extra-pdf-image.in
     #   unstructured-inference
@@ -138,7 +138,7 @@ packaging==24.1
     #   pikepdf
     #   transformers
     #   unstructured-pytesseract
-pandas==2.2.2
+pandas==2.2.3
     # via layoutparser
 pdf2image==1.17.0
     # via
@@ -152,7 +152,7 @@ pdfplumber==0.11.4
     # via layoutparser
 pi-heif==0.18.0
     # via -r ./extra-pdf-image.in
-pikepdf==9.2.1
+pikepdf==9.3.0
     # via -r ./extra-pdf-image.in
 pillow==10.4.0
     # via
@@ -194,7 +194,7 @@ pycparser==2.22
     #   cffi
 pyparsing==3.1.4
     # via matplotlib
-pypdf==5.0.0
+pypdf==5.0.1
     # via
     #   -c ./base.txt
     #   -r ./extra-pdf-image.in
@@ -205,7 +205,7 @@ python-dateutil==2.9.0.post0
     #   -c ./base.txt
     #   matplotlib
     #   pandas
-python-multipart==0.0.9
+python-multipart==0.0.12
     # via unstructured-inference
 pytz==2024.2
     # via pandas
@@ -216,7 +216,7 @@ pyyaml==6.0.2
     #   omegaconf
     #   timm
     #   transformers
-rapidfuzz==3.9.7
+rapidfuzz==3.10.0
     # via
     #   -c ./base.txt
     #   unstructured-inference
@@ -279,7 +279,7 @@ typing-extensions==4.12.2
     #   iopath
     #   pypdf
     #   torch
-tzdata==2024.1
+tzdata==2024.2
     # via pandas
 unstructured-inference==0.7.36
     # via -r ./extra-pdf-image.in
diff --git a/requirements/extra-xlsx.txt b/requirements/extra-xlsx.txt
index bd931000a..ff08577a7 100644
--- a/requirements/extra-xlsx.txt
+++ b/requirements/extra-xlsx.txt
@@ -14,7 +14,7 @@ numpy==1.26.4
     #   pandas
 openpyxl==3.1.5
     # via -r ./extra-xlsx.in
-pandas==2.2.2
+pandas==2.2.3
     # via -r ./extra-xlsx.in
 python-dateutil==2.9.0.post0
     # via
@@ -26,7 +26,7 @@ six==1.16.0
     # via
     #   -c ./base.txt
     #   python-dateutil
-tzdata==2024.1
+tzdata==2024.2
     # via pandas
 xlrd==2.0.1
     # via -r ./extra-xlsx.in
diff --git a/requirements/huggingface.txt b/requirements/huggingface.txt
index daa466d3d..7b2e04bde 100644
--- a/requirements/huggingface.txt
+++ b/requirements/huggingface.txt
@@ -25,7 +25,7 @@ fsspec==2024.9.0
     # via
     #   huggingface-hub
     #   torch
-huggingface-hub==0.25.0
+huggingface-hub==0.25.1
     # via
     #   tokenizers
     #   transformers
diff --git a/requirements/ingest/airtable.in b/requirements/ingest/airtable.in
deleted file mode 100644
index e6e85c3c6..000000000
--- a/requirements/ingest/airtable.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-pyairtable
diff --git a/requirements/ingest/airtable.txt b/requirements/ingest/airtable.txt
deleted file mode 100644
index e45acd598..000000000
--- a/requirements/ingest/airtable.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/airtable.in
-#
-annotated-types==0.7.0
-    # via pydantic
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-inflection==0.5.1
-    # via pyairtable
-pyairtable==2.3.3
-    # via -r ./ingest/airtable.in
-pydantic==2.9.2
-    # via pyairtable
-pydantic-core==2.23.4
-    # via pydantic
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   pyairtable
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   pyairtable
-    #   pydantic
-    #   pydantic-core
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   pyairtable
-    #   requests
diff --git a/requirements/ingest/astradb.in b/requirements/ingest/astradb.in
deleted file mode 100644
index 0c99a4c93..000000000
--- a/requirements/ingest/astradb.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-astrapy
diff --git a/requirements/ingest/astradb.txt b/requirements/ingest/astradb.txt
deleted file mode 100644
index 46553972a..000000000
--- a/requirements/ingest/astradb.txt
+++ /dev/null
@@ -1,100 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/astradb.in
-#
-anyio==4.5.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-astrapy==1.4.2
-    # via -r ./ingest/astradb.in
-cassandra-driver==3.29.2
-    # via cassio
-cassio==0.1.9
-    # via astrapy
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-    #   httpx
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-click==8.1.7
-    # via
-    #   -c ./ingest/../base.txt
-    #   geomet
-deprecation==2.1.0
-    # via astrapy
-dnspython==2.6.1
-    # via pymongo
-exceptiongroup==1.2.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-geomet==0.2.1.post1
-    # via cassandra-driver
-h11==0.14.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-h2==4.1.0
-    # via httpx
-hpack==4.0.0
-    # via h2
-httpcore==1.0.5
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-httpx[http2]==0.27.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   astrapy
-hyperframe==6.0.1
-    # via h2
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-    #   requests
-numpy==1.26.4
-    # via
-    #   -c ./ingest/../base.txt
-    #   cassio
-packaging==24.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   deprecation
-pymongo==4.9.1
-    # via astrapy
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   cassio
-six==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   geomet
-sniffio==1.3.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-toml==0.10.2
-    # via astrapy
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
-uuid6==2024.7.10
-    # via astrapy
diff --git a/requirements/ingest/azure-cognitive-search.in b/requirements/ingest/azure-cognitive-search.in
deleted file mode 100644
index 226649fb3..000000000
--- a/requirements/ingest/azure-cognitive-search.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-azure-search-documents
diff --git a/requirements/ingest/azure-cognitive-search.txt b/requirements/ingest/azure-cognitive-search.txt
deleted file mode 100644
index ef220fca5..000000000
--- a/requirements/ingest/azure-cognitive-search.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/azure-cognitive-search.in
-#
-azure-common==1.1.28
-    # via azure-search-documents
-azure-core==1.31.0
-    # via azure-search-documents
-azure-search-documents==11.5.1
-    # via -r ./ingest/azure-cognitive-search.in
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-isodate==0.6.1
-    # via azure-search-documents
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   azure-core
-six==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   azure-core
-    #   isodate
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   azure-core
-    #   azure-search-documents
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
diff --git a/requirements/ingest/azure.in b/requirements/ingest/azure.in
deleted file mode 100644
index e90750100..000000000
--- a/requirements/ingest/azure.in
+++ /dev/null
@@ -1,4 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-adlfs
-fsspec
diff --git a/requirements/ingest/azure.txt b/requirements/ingest/azure.txt
deleted file mode 100644
index b02308977..000000000
--- a/requirements/ingest/azure.txt
+++ /dev/null
@@ -1,108 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/azure.in
-#
-adlfs==2024.7.0
-    # via -r ./ingest/azure.in
-aiohappyeyeballs==2.4.0
-    # via aiohttp
-aiohttp==3.10.5
-    # via adlfs
-aiosignal==1.3.1
-    # via aiohttp
-async-timeout==4.0.3
-    # via aiohttp
-attrs==24.2.0
-    # via aiohttp
-azure-core==1.31.0
-    # via
-    #   adlfs
-    #   azure-identity
-    #   azure-storage-blob
-azure-datalake-store==0.0.53
-    # via adlfs
-azure-identity==1.18.0
-    # via adlfs
-azure-storage-blob==12.23.0
-    # via adlfs
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-cffi==1.17.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   azure-datalake-store
-    #   cryptography
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-cryptography==43.0.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   azure-identity
-    #   azure-storage-blob
-    #   msal
-    #   pyjwt
-frozenlist==1.4.1
-    # via
-    #   aiohttp
-    #   aiosignal
-fsspec==2024.9.0
-    # via
-    #   -r ./ingest/azure.in
-    #   adlfs
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-    #   yarl
-isodate==0.6.1
-    # via azure-storage-blob
-msal==1.31.0
-    # via
-    #   azure-datalake-store
-    #   azure-identity
-    #   msal-extensions
-msal-extensions==1.2.0
-    # via azure-identity
-multidict==6.1.0
-    # via
-    #   aiohttp
-    #   yarl
-portalocker==2.10.1
-    # via msal-extensions
-pycparser==2.22
-    # via
-    #   -c ./ingest/../base.txt
-    #   cffi
-pyjwt[crypto]==2.9.0
-    # via msal
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   azure-core
-    #   azure-datalake-store
-    #   msal
-six==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   azure-core
-    #   isodate
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   azure-core
-    #   azure-identity
-    #   azure-storage-blob
-    #   multidict
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
-yarl==1.11.1
-    # via aiohttp
diff --git a/requirements/ingest/biomed.in b/requirements/ingest/biomed.in
deleted file mode 100644
index 7a231f44f..000000000
--- a/requirements/ingest/biomed.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-bs4
diff --git a/requirements/ingest/biomed.txt b/requirements/ingest/biomed.txt
deleted file mode 100644
index 770ec68a4..000000000
--- a/requirements/ingest/biomed.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/biomed.in
-#
-beautifulsoup4==4.12.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   bs4
-bs4==0.0.2
-    # via -r ./ingest/biomed.in
-soupsieve==2.6
-    # via
-    #   -c ./ingest/../base.txt
-    #   beautifulsoup4
diff --git a/requirements/ingest/box.in b/requirements/ingest/box.in
deleted file mode 100644
index 3b123f814..000000000
--- a/requirements/ingest/box.in
+++ /dev/null
@@ -1,4 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-boxfs
-fsspec
diff --git a/requirements/ingest/box.txt b/requirements/ingest/box.txt
deleted file mode 100644
index 297f02410..000000000
--- a/requirements/ingest/box.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/box.in
-#
-attrs==24.2.0
-    # via boxsdk
-boxfs==0.3.0
-    # via -r ./ingest/box.in
-boxsdk[jwt]==3.13.0
-    # via boxfs
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-cffi==1.17.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   cryptography
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-cryptography==43.0.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   boxsdk
-fsspec==2024.9.0
-    # via
-    #   -r ./ingest/box.in
-    #   boxfs
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-pycparser==2.22
-    # via
-    #   -c ./ingest/../base.txt
-    #   cffi
-pyjwt==2.9.0
-    # via boxsdk
-python-dateutil==2.9.0.post0
-    # via
-    #   -c ./ingest/../base.txt
-    #   boxsdk
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   boxsdk
-    #   requests-toolbelt
-requests-toolbelt==1.0.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   boxsdk
-six==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   python-dateutil
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   boxsdk
-    #   requests
diff --git a/requirements/ingest/chroma.in b/requirements/ingest/chroma.in
deleted file mode 100644
index b94a6b462..000000000
--- a/requirements/ingest/chroma.in
+++ /dev/null
@@ -1,10 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-chromadb>0.4.14
-importlib-metadata>=8.2.0
-# Future releases adds in typer-cli which breaks the resolution of typer as a library
-typer<=0.9.0
-# tenacity 9.0.0 is being installed via chroma, but other dependencies (langchain) restrict tenacity
-# to <9.0.0 and resolve to 8.5.0.
-# The original langchain pin: https://github.com/langchain-ai/langchain/pull/849/
-tenacity==8.5.0
diff --git a/requirements/ingest/chroma.txt b/requirements/ingest/chroma.txt
deleted file mode 100644
index e0bd8c909..000000000
--- a/requirements/ingest/chroma.txt
+++ /dev/null
@@ -1,256 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/chroma.in
-#
-annotated-types==0.7.0
-    # via pydantic
-anyio==4.5.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-    #   starlette
-    #   watchfiles
-backoff==2.2.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   opentelemetry-exporter-otlp-proto-grpc
-    #   posthog
-bcrypt==4.2.0
-    # via chromadb
-cachetools==5.5.0
-    # via google-auth
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-    #   kubernetes
-    #   pulsar-client
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-chroma-hnswlib==0.7.3
-    # via chromadb
-chromadb==0.4.17
-    # via -r ./ingest/chroma.in
-click==8.1.7
-    # via
-    #   -c ./ingest/../base.txt
-    #   typer
-    #   uvicorn
-coloredlogs==15.0.1
-    # via onnxruntime
-deprecated==1.2.14
-    # via opentelemetry-api
-durationpy==0.7
-    # via kubernetes
-exceptiongroup==1.2.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-fastapi==0.115.0
-    # via chromadb
-filelock==3.16.1
-    # via huggingface-hub
-flatbuffers==24.3.25
-    # via onnxruntime
-fsspec==2024.9.0
-    # via huggingface-hub
-google-auth==2.35.0
-    # via kubernetes
-googleapis-common-protos==1.65.0
-    # via opentelemetry-exporter-otlp-proto-grpc
-grpcio==1.66.1
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   chromadb
-    #   opentelemetry-exporter-otlp-proto-grpc
-h11==0.14.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-    #   uvicorn
-httptools==0.6.1
-    # via uvicorn
-huggingface-hub==0.25.0
-    # via tokenizers
-humanfriendly==10.0
-    # via coloredlogs
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-    #   requests
-importlib-metadata==8.5.0
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   -r ./ingest/chroma.in
-importlib-resources==6.4.5
-    # via chromadb
-kubernetes==31.0.0
-    # via chromadb
-monotonic==1.6
-    # via posthog
-mpmath==1.3.0
-    # via sympy
-numpy==1.26.4
-    # via
-    #   -c ./ingest/../base.txt
-    #   chroma-hnswlib
-    #   chromadb
-    #   onnxruntime
-oauthlib==3.2.2
-    # via
-    #   kubernetes
-    #   requests-oauthlib
-onnxruntime==1.19.2
-    # via chromadb
-opentelemetry-api==1.16.0
-    # via
-    #   chromadb
-    #   opentelemetry-exporter-otlp-proto-grpc
-    #   opentelemetry-sdk
-opentelemetry-exporter-otlp-proto-grpc==1.16.0
-    # via chromadb
-opentelemetry-proto==1.16.0
-    # via opentelemetry-exporter-otlp-proto-grpc
-opentelemetry-sdk==1.16.0
-    # via
-    #   chromadb
-    #   opentelemetry-exporter-otlp-proto-grpc
-opentelemetry-semantic-conventions==0.37b0
-    # via opentelemetry-sdk
-overrides==7.7.0
-    # via chromadb
-packaging==24.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   build
-    #   huggingface-hub
-    #   onnxruntime
-posthog==3.6.6
-    # via chromadb
-protobuf==4.25.5
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   googleapis-common-protos
-    #   onnxruntime
-    #   opentelemetry-proto
-pulsar-client==3.5.0
-    # via chromadb
-pyasn1==0.6.1
-    # via
-    #   pyasn1-modules
-    #   rsa
-pyasn1-modules==0.4.1
-    # via google-auth
-pydantic==2.9.2
-    # via
-    #   chromadb
-    #   fastapi
-pydantic-core==2.23.4
-    # via pydantic
-pypika==0.48.9
-    # via chromadb
-python-dateutil==2.9.0.post0
-    # via
-    #   -c ./ingest/../base.txt
-    #   kubernetes
-    #   posthog
-python-dotenv==1.0.1
-    # via uvicorn
-pyyaml==6.0.2
-    # via
-    #   chromadb
-    #   huggingface-hub
-    #   kubernetes
-    #   uvicorn
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   chromadb
-    #   huggingface-hub
-    #   kubernetes
-    #   posthog
-    #   requests-oauthlib
-requests-oauthlib==2.0.0
-    # via kubernetes
-rsa==4.9
-    # via google-auth
-six==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   kubernetes
-    #   posthog
-    #   python-dateutil
-sniffio==1.3.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-starlette==0.38.5
-    # via fastapi
-sympy==1.13.3
-    # via onnxruntime
-tenacity==8.5.0
-    # via
-    #   -r ./ingest/chroma.in
-    #   chromadb
-tokenizers==0.19.1
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   chromadb
-tqdm==4.66.5
-    # via
-    #   -c ./ingest/../base.txt
-    #   chromadb
-    #   huggingface-hub
-typer==0.9.0
-    # via
-    #   -r ./ingest/chroma.in
-    #   chromadb
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   chromadb
-    #   fastapi
-    #   huggingface-hub
-    #   opentelemetry-sdk
-    #   pydantic
-    #   pydantic-core
-    #   starlette
-    #   typer
-    #   uvicorn
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   kubernetes
-    #   requests
-uvicorn[standard]==0.30.6
-    # via chromadb
-uvloop==0.20.0
-    # via uvicorn
-watchfiles==0.24.0
-    # via uvicorn
-websocket-client==1.8.0
-    # via kubernetes
-websockets==13.0.1
-    # via uvicorn
-wrapt==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   deprecated
-    #   opentelemetry-instrumentation
-zipp==3.20.2
-    # via
-    #   importlib-metadata
-    #   importlib-resources
-
-# The following packages are considered to be unsafe in a requirements file:
-# setuptools
diff --git a/requirements/ingest/clarifai.in b/requirements/ingest/clarifai.in
deleted file mode 100644
index becc852ac..000000000
--- a/requirements/ingest/clarifai.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-clarifai
diff --git a/requirements/ingest/clarifai.txt b/requirements/ingest/clarifai.txt
deleted file mode 100644
index 15c99bf72..000000000
--- a/requirements/ingest/clarifai.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/clarifai.in
-#
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-clarifai==10.7.0
-    # via -r ./ingest/clarifai.in
-clarifai-grpc==10.7.1
-    # via clarifai
-contextlib2==21.6.0
-    # via schema
-googleapis-common-protos==1.65.0
-    # via clarifai-grpc
-grpcio==1.66.1
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   clarifai-grpc
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-inquirerpy==0.3.4
-    # via clarifai
-markdown-it-py==3.0.0
-    # via rich
-mdurl==0.1.2
-    # via markdown-it-py
-numpy==1.26.4
-    # via
-    #   -c ./ingest/../base.txt
-    #   clarifai
-    #   tritonclient
-pfzy==0.3.4
-    # via inquirerpy
-pillow==10.4.0
-    # via clarifai
-prompt-toolkit==3.0.47
-    # via inquirerpy
-protobuf==4.25.5
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   clarifai-grpc
-    #   googleapis-common-protos
-pygments==2.18.0
-    # via rich
-python-rapidjson==1.20
-    # via tritonclient
-pyyaml==6.0.2
-    # via clarifai
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   clarifai-grpc
-rich==13.8.1
-    # via clarifai
-schema==0.7.5
-    # via clarifai
-tabulate==0.9.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   clarifai
-tqdm==4.66.5
-    # via
-    #   -c ./ingest/../base.txt
-    #   clarifai
-tritonclient==2.41.1
-    # via clarifai
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
-wcwidth==0.2.13
-    # via prompt-toolkit
diff --git a/requirements/ingest/confluence.in b/requirements/ingest/confluence.in
deleted file mode 100644
index 37f92cb8c..000000000
--- a/requirements/ingest/confluence.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-atlassian-python-api
diff --git a/requirements/ingest/confluence.txt b/requirements/ingest/confluence.txt
deleted file mode 100644
index a54fa71d8..000000000
--- a/requirements/ingest/confluence.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/confluence.in
-#
-atlassian-python-api==3.41.16
-    # via -r ./ingest/confluence.in
-beautifulsoup4==4.12.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   atlassian-python-api
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-deprecated==1.2.14
-    # via atlassian-python-api
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-jmespath==1.0.1
-    # via atlassian-python-api
-oauthlib==3.2.2
-    # via
-    #   atlassian-python-api
-    #   requests-oauthlib
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   atlassian-python-api
-    #   requests-oauthlib
-requests-oauthlib==2.0.0
-    # via atlassian-python-api
-six==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   atlassian-python-api
-soupsieve==2.6
-    # via
-    #   -c ./ingest/../base.txt
-    #   beautifulsoup4
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
-wrapt==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   deprecated
diff --git a/requirements/ingest/databricks-volumes.in b/requirements/ingest/databricks-volumes.in
deleted file mode 100644
index 8bad8aec3..000000000
--- a/requirements/ingest/databricks-volumes.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-databricks-sdk
diff --git a/requirements/ingest/databricks-volumes.txt b/requirements/ingest/databricks-volumes.txt
deleted file mode 100644
index ac6f34cc5..000000000
--- a/requirements/ingest/databricks-volumes.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/databricks-volumes.in
-#
-cachetools==5.5.0
-    # via google-auth
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-databricks-sdk==0.32.3
-    # via -r ./ingest/databricks-volumes.in
-google-auth==2.35.0
-    # via databricks-sdk
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-pyasn1==0.6.1
-    # via
-    #   pyasn1-modules
-    #   rsa
-pyasn1-modules==0.4.1
-    # via google-auth
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   databricks-sdk
-rsa==4.9
-    # via google-auth
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
diff --git a/requirements/ingest/delta-table.in b/requirements/ingest/delta-table.in
deleted file mode 100644
index 47d4079bd..000000000
--- a/requirements/ingest/delta-table.in
+++ /dev/null
@@ -1,4 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-deltalake<=0.19.1
-fsspec
diff --git a/requirements/ingest/delta-table.txt b/requirements/ingest/delta-table.txt
deleted file mode 100644
index 68f8dfae7..000000000
--- a/requirements/ingest/delta-table.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/delta-table.in
-#
-deltalake==0.19.1
-    # via -r ./ingest/delta-table.in
-fsspec==2024.9.0
-    # via -r ./ingest/delta-table.in
-numpy==1.26.4
-    # via
-    #   -c ./ingest/../base.txt
-    #   pyarrow
-pyarrow==17.0.0
-    # via deltalake
diff --git a/requirements/ingest/discord.in b/requirements/ingest/discord.in
deleted file mode 100644
index 83bbeed43..000000000
--- a/requirements/ingest/discord.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-discord-py
diff --git a/requirements/ingest/discord.txt b/requirements/ingest/discord.txt
deleted file mode 100644
index 6368195ea..000000000
--- a/requirements/ingest/discord.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/discord.in
-#
-aiohappyeyeballs==2.4.0
-    # via aiohttp
-aiohttp==3.10.5
-    # via discord-py
-aiosignal==1.3.1
-    # via aiohttp
-async-timeout==4.0.3
-    # via aiohttp
-attrs==24.2.0
-    # via aiohttp
-discord-py==2.4.0
-    # via -r ./ingest/discord.in
-frozenlist==1.4.1
-    # via
-    #   aiohttp
-    #   aiosignal
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   yarl
-multidict==6.1.0
-    # via
-    #   aiohttp
-    #   yarl
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   multidict
-yarl==1.11.1
-    # via aiohttp
diff --git a/requirements/ingest/dropbox.in b/requirements/ingest/dropbox.in
deleted file mode 100644
index b9b0fe1d3..000000000
--- a/requirements/ingest/dropbox.in
+++ /dev/null
@@ -1,4 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-dropboxdrivefs
-fsspec
diff --git a/requirements/ingest/dropbox.txt b/requirements/ingest/dropbox.txt
deleted file mode 100644
index bbba1ba1b..000000000
--- a/requirements/ingest/dropbox.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/dropbox.in
-#
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-dropbox==12.0.2
-    # via dropboxdrivefs
-dropboxdrivefs==1.4.1
-    # via -r ./ingest/dropbox.in
-fsspec==2024.9.0
-    # via
-    #   -r ./ingest/dropbox.in
-    #   dropboxdrivefs
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-ply==3.11
-    # via stone
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   dropbox
-    #   dropboxdrivefs
-six==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   dropbox
-    #   stone
-stone==3.3.1
-    # via dropbox
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
diff --git a/requirements/ingest/elasticsearch.in b/requirements/ingest/elasticsearch.in
deleted file mode 100644
index 5b6d0db36..000000000
--- a/requirements/ingest/elasticsearch.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-elasticsearch[async]
diff --git a/requirements/ingest/elasticsearch.txt b/requirements/ingest/elasticsearch.txt
deleted file mode 100644
index b23d77117..000000000
--- a/requirements/ingest/elasticsearch.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/elasticsearch.in
-#
-aiohappyeyeballs==2.4.0
-    # via aiohttp
-aiohttp==3.10.5
-    # via elasticsearch
-aiosignal==1.3.1
-    # via aiohttp
-async-timeout==4.0.3
-    # via aiohttp
-attrs==24.2.0
-    # via aiohttp
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   elastic-transport
-elastic-transport==8.15.0
-    # via elasticsearch
-elasticsearch[async]==8.15.1
-    # via -r ./ingest/elasticsearch.in
-frozenlist==1.4.1
-    # via
-    #   aiohttp
-    #   aiosignal
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   yarl
-multidict==6.1.0
-    # via
-    #   aiohttp
-    #   yarl
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   multidict
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   elastic-transport
-yarl==1.11.1
-    # via aiohttp
diff --git a/requirements/ingest/embed-aws-bedrock.in b/requirements/ingest/embed-aws-bedrock.in
deleted file mode 100644
index dd73b768d..000000000
--- a/requirements/ingest/embed-aws-bedrock.in
+++ /dev/null
@@ -1,4 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-boto3
-langchain-community
diff --git a/requirements/ingest/embed-aws-bedrock.txt b/requirements/ingest/embed-aws-bedrock.txt
deleted file mode 100644
index 69d0e147e..000000000
--- a/requirements/ingest/embed-aws-bedrock.txt
+++ /dev/null
@@ -1,191 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/embed-aws-bedrock.in
-#
-aiohappyeyeballs==2.4.0
-    # via aiohttp
-aiohttp==3.10.5
-    # via
-    #   langchain
-    #   langchain-community
-aiosignal==1.3.1
-    # via aiohttp
-annotated-types==0.7.0
-    # via pydantic
-anyio==4.5.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-async-timeout==4.0.3
-    # via
-    #   aiohttp
-    #   langchain
-attrs==24.2.0
-    # via aiohttp
-boto3==1.34.131
-    # via -r ./ingest/embed-aws-bedrock.in
-botocore==1.34.131
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   boto3
-    #   s3transfer
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-    #   httpx
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-dataclasses-json==0.6.7
-    # via
-    #   -c ./ingest/../base.txt
-    #   langchain-community
-exceptiongroup==1.2.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-frozenlist==1.4.1
-    # via
-    #   aiohttp
-    #   aiosignal
-h11==0.14.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-httpcore==1.0.5
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-httpx==0.27.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   langsmith
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-    #   requests
-    #   yarl
-jmespath==1.0.1
-    # via
-    #   boto3
-    #   botocore
-jsonpatch==1.33
-    # via langchain-core
-jsonpointer==3.0.0
-    # via jsonpatch
-langchain==0.3.0
-    # via langchain-community
-langchain-community==0.3.0
-    # via -r ./ingest/embed-aws-bedrock.in
-langchain-core==0.3.2
-    # via
-    #   langchain
-    #   langchain-community
-    #   langchain-text-splitters
-langchain-text-splitters==0.3.0
-    # via langchain
-langsmith==0.1.125
-    # via
-    #   langchain
-    #   langchain-community
-    #   langchain-core
-marshmallow==3.22.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   dataclasses-json
-multidict==6.1.0
-    # via
-    #   aiohttp
-    #   yarl
-mypy-extensions==1.0.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   typing-inspect
-numpy==1.26.4
-    # via
-    #   -c ./ingest/../base.txt
-    #   langchain
-    #   langchain-community
-orjson==3.10.7
-    # via langsmith
-packaging==24.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   langchain-core
-    #   marshmallow
-pydantic==2.9.2
-    # via
-    #   langchain
-    #   langchain-core
-    #   langsmith
-    #   pydantic-settings
-pydantic-core==2.23.4
-    # via pydantic
-pydantic-settings==2.5.2
-    # via langchain-community
-python-dateutil==2.9.0.post0
-    # via
-    #   -c ./ingest/../base.txt
-    #   botocore
-python-dotenv==1.0.1
-    # via pydantic-settings
-pyyaml==6.0.2
-    # via
-    #   langchain
-    #   langchain-community
-    #   langchain-core
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   langchain
-    #   langchain-community
-    #   langsmith
-s3transfer==0.10.2
-    # via boto3
-six==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   python-dateutil
-sniffio==1.3.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-sqlalchemy==2.0.35
-    # via
-    #   langchain
-    #   langchain-community
-tenacity==8.5.0
-    # via
-    #   langchain
-    #   langchain-community
-    #   langchain-core
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   langchain-core
-    #   multidict
-    #   pydantic
-    #   pydantic-core
-    #   sqlalchemy
-    #   typing-inspect
-typing-inspect==0.9.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   dataclasses-json
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   botocore
-    #   requests
-yarl==1.11.1
-    # via aiohttp
diff --git a/requirements/ingest/embed-huggingface.in b/requirements/ingest/embed-huggingface.in
deleted file mode 100644
index 88b7218f8..000000000
--- a/requirements/ingest/embed-huggingface.in
+++ /dev/null
@@ -1,4 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-
-langchain-huggingface
diff --git a/requirements/ingest/embed-huggingface.txt b/requirements/ingest/embed-huggingface.txt
deleted file mode 100644
index 24756b413..000000000
--- a/requirements/ingest/embed-huggingface.txt
+++ /dev/null
@@ -1,170 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/embed-huggingface.in
-#
-annotated-types==0.7.0
-    # via pydantic
-anyio==4.5.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-    #   httpx
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-exceptiongroup==1.2.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-filelock==3.16.1
-    # via
-    #   huggingface-hub
-    #   torch
-    #   transformers
-fsspec==2024.9.0
-    # via
-    #   huggingface-hub
-    #   torch
-h11==0.14.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-httpcore==1.0.5
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-httpx==0.27.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   langsmith
-huggingface-hub==0.25.0
-    # via
-    #   langchain-huggingface
-    #   sentence-transformers
-    #   tokenizers
-    #   transformers
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-    #   requests
-jinja2==3.1.4
-    # via torch
-joblib==1.4.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   scikit-learn
-jsonpatch==1.33
-    # via langchain-core
-jsonpointer==3.0.0
-    # via jsonpatch
-langchain-core==0.3.2
-    # via langchain-huggingface
-langchain-huggingface==0.1.0
-    # via -r ./ingest/embed-huggingface.in
-langsmith==0.1.125
-    # via langchain-core
-markupsafe==2.1.5
-    # via jinja2
-mpmath==1.3.0
-    # via sympy
-networkx==3.2.1
-    # via torch
-numpy==1.26.4
-    # via
-    #   -c ./ingest/../base.txt
-    #   scikit-learn
-    #   scipy
-    #   transformers
-orjson==3.10.7
-    # via langsmith
-packaging==24.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   huggingface-hub
-    #   langchain-core
-    #   transformers
-pillow==10.4.0
-    # via sentence-transformers
-pydantic==2.9.2
-    # via
-    #   langchain-core
-    #   langsmith
-pydantic-core==2.23.4
-    # via pydantic
-pyyaml==6.0.2
-    # via
-    #   huggingface-hub
-    #   langchain-core
-    #   transformers
-regex==2024.9.11
-    # via
-    #   -c ./ingest/../base.txt
-    #   transformers
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   huggingface-hub
-    #   langsmith
-    #   transformers
-safetensors==0.4.5
-    # via transformers
-scikit-learn==1.5.2
-    # via sentence-transformers
-scipy==1.13.1
-    # via
-    #   scikit-learn
-    #   sentence-transformers
-sentence-transformers==3.1.1
-    # via langchain-huggingface
-sniffio==1.3.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-sympy==1.13.3
-    # via torch
-tenacity==8.5.0
-    # via langchain-core
-threadpoolctl==3.5.0
-    # via scikit-learn
-tokenizers==0.19.1
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   langchain-huggingface
-    #   transformers
-torch==2.4.1
-    # via sentence-transformers
-tqdm==4.66.5
-    # via
-    #   -c ./ingest/../base.txt
-    #   huggingface-hub
-    #   sentence-transformers
-    #   transformers
-transformers==4.44.2
-    # via
-    #   langchain-huggingface
-    #   sentence-transformers
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   huggingface-hub
-    #   langchain-core
-    #   pydantic
-    #   pydantic-core
-    #   torch
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
diff --git a/requirements/ingest/embed-mixedbreadai.in b/requirements/ingest/embed-mixedbreadai.in
deleted file mode 100644
index 929e3f0ae..000000000
--- a/requirements/ingest/embed-mixedbreadai.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-mixedbread-ai
\ No newline at end of file
diff --git a/requirements/ingest/embed-mixedbreadai.txt b/requirements/ingest/embed-mixedbreadai.txt
deleted file mode 100644
index da63dcbe9..000000000
--- a/requirements/ingest/embed-mixedbreadai.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/embed-mixedbreadai.in
-#
-annotated-types==0.7.0
-    # via pydantic
-anyio==4.5.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-    #   httpx
-exceptiongroup==1.2.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-h11==0.14.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-httpcore==1.0.5
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-httpx==0.27.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   mixedbread-ai
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-mixedbread-ai==2.2.6
-    # via -r ./ingest/embed-mixedbreadai.in
-pydantic==2.9.2
-    # via mixedbread-ai
-pydantic-core==2.23.4
-    # via pydantic
-sniffio==1.3.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   mixedbread-ai
-    #   pydantic
-    #   pydantic-core
diff --git a/requirements/ingest/embed-octoai.in b/requirements/ingest/embed-octoai.in
deleted file mode 100644
index ede6c81e8..000000000
--- a/requirements/ingest/embed-octoai.in
+++ /dev/null
@@ -1,4 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-openai
-tiktoken
diff --git a/requirements/ingest/embed-octoai.txt b/requirements/ingest/embed-octoai.txt
deleted file mode 100644
index 87d04cc36..000000000
--- a/requirements/ingest/embed-octoai.txt
+++ /dev/null
@@ -1,87 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/embed-octoai.in
-#
-annotated-types==0.7.0
-    # via pydantic
-anyio==4.5.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-    #   openai
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-    #   httpx
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-distro==1.9.0
-    # via openai
-exceptiongroup==1.2.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-h11==0.14.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-httpcore==1.0.5
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-httpx==0.27.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   openai
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-    #   requests
-jiter==0.5.0
-    # via openai
-openai==1.46.1
-    # via -r ./ingest/embed-octoai.in
-pydantic==2.9.2
-    # via openai
-pydantic-core==2.23.4
-    # via pydantic
-regex==2024.9.11
-    # via
-    #   -c ./ingest/../base.txt
-    #   tiktoken
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   tiktoken
-sniffio==1.3.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-    #   openai
-tiktoken==0.7.0
-    # via -r ./ingest/embed-octoai.in
-tqdm==4.66.5
-    # via
-    #   -c ./ingest/../base.txt
-    #   openai
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   openai
-    #   pydantic
-    #   pydantic-core
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
diff --git a/requirements/ingest/embed-openai.in b/requirements/ingest/embed-openai.in
deleted file mode 100644
index fb130e9cb..000000000
--- a/requirements/ingest/embed-openai.in
+++ /dev/null
@@ -1,4 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-
-langchain-openai
diff --git a/requirements/ingest/embed-openai.txt b/requirements/ingest/embed-openai.txt
deleted file mode 100644
index 7490efc76..000000000
--- a/requirements/ingest/embed-openai.txt
+++ /dev/null
@@ -1,113 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/embed-openai.in
-#
-annotated-types==0.7.0
-    # via pydantic
-anyio==4.5.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-    #   openai
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-    #   httpx
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-distro==1.9.0
-    # via openai
-exceptiongroup==1.2.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-h11==0.14.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-httpcore==1.0.5
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-httpx==0.27.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   langsmith
-    #   openai
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-    #   requests
-jiter==0.5.0
-    # via openai
-jsonpatch==1.33
-    # via langchain-core
-jsonpointer==3.0.0
-    # via jsonpatch
-langchain-core==0.3.2
-    # via langchain-openai
-langchain-openai==0.2.0
-    # via -r ./ingest/embed-openai.in
-langsmith==0.1.125
-    # via langchain-core
-openai==1.46.1
-    # via langchain-openai
-orjson==3.10.7
-    # via langsmith
-packaging==24.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   langchain-core
-pydantic==2.9.2
-    # via
-    #   langchain-core
-    #   langsmith
-    #   openai
-pydantic-core==2.23.4
-    # via pydantic
-pyyaml==6.0.2
-    # via langchain-core
-regex==2024.9.11
-    # via
-    #   -c ./ingest/../base.txt
-    #   tiktoken
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   langsmith
-    #   tiktoken
-sniffio==1.3.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-    #   openai
-tenacity==8.5.0
-    # via langchain-core
-tiktoken==0.7.0
-    # via langchain-openai
-tqdm==4.66.5
-    # via
-    #   -c ./ingest/../base.txt
-    #   openai
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   langchain-core
-    #   openai
-    #   pydantic
-    #   pydantic-core
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
diff --git a/requirements/ingest/embed-vertexai.in b/requirements/ingest/embed-vertexai.in
deleted file mode 100644
index ba68465a8..000000000
--- a/requirements/ingest/embed-vertexai.in
+++ /dev/null
@@ -1,5 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-langchain
-langchain-community
-langchain-google-vertexai
diff --git a/requirements/ingest/embed-vertexai.txt b/requirements/ingest/embed-vertexai.txt
deleted file mode 100644
index a912d25cb..000000000
--- a/requirements/ingest/embed-vertexai.txt
+++ /dev/null
@@ -1,275 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/embed-vertexai.in
-#
-aiohappyeyeballs==2.4.0
-    # via aiohttp
-aiohttp==3.10.5
-    # via
-    #   langchain
-    #   langchain-community
-aiosignal==1.3.1
-    # via aiohttp
-annotated-types==0.7.0
-    # via pydantic
-anyio==4.5.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-async-timeout==4.0.3
-    # via
-    #   aiohttp
-    #   langchain
-attrs==24.2.0
-    # via aiohttp
-cachetools==5.5.0
-    # via google-auth
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-    #   httpx
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-dataclasses-json==0.6.7
-    # via
-    #   -c ./ingest/../base.txt
-    #   langchain-community
-docstring-parser==0.16
-    # via google-cloud-aiplatform
-exceptiongroup==1.2.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-frozenlist==1.4.1
-    # via
-    #   aiohttp
-    #   aiosignal
-google-api-core[grpc]==2.20.0
-    # via
-    #   google-cloud-aiplatform
-    #   google-cloud-bigquery
-    #   google-cloud-core
-    #   google-cloud-resource-manager
-    #   google-cloud-storage
-google-auth==2.35.0
-    # via
-    #   google-api-core
-    #   google-cloud-aiplatform
-    #   google-cloud-bigquery
-    #   google-cloud-core
-    #   google-cloud-resource-manager
-    #   google-cloud-storage
-google-cloud-aiplatform==1.67.1
-    # via langchain-google-vertexai
-google-cloud-bigquery==3.25.0
-    # via google-cloud-aiplatform
-google-cloud-core==2.4.1
-    # via
-    #   google-cloud-bigquery
-    #   google-cloud-storage
-google-cloud-resource-manager==1.12.5
-    # via google-cloud-aiplatform
-google-cloud-storage==2.18.2
-    # via
-    #   google-cloud-aiplatform
-    #   langchain-google-vertexai
-google-crc32c==1.6.0
-    # via
-    #   google-cloud-storage
-    #   google-resumable-media
-google-resumable-media==2.7.2
-    # via
-    #   google-cloud-bigquery
-    #   google-cloud-storage
-googleapis-common-protos[grpc]==1.65.0
-    # via
-    #   google-api-core
-    #   grpc-google-iam-v1
-    #   grpcio-status
-grpc-google-iam-v1==0.13.1
-    # via google-cloud-resource-manager
-grpcio==1.66.1
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   google-api-core
-    #   googleapis-common-protos
-    #   grpc-google-iam-v1
-    #   grpcio-status
-grpcio-status==1.62.3
-    # via google-api-core
-h11==0.14.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-httpcore==1.0.5
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-httpx==0.27.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   langchain-google-vertexai
-    #   langsmith
-httpx-sse==0.4.0
-    # via langchain-google-vertexai
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-    #   requests
-    #   yarl
-jsonpatch==1.33
-    # via langchain-core
-jsonpointer==3.0.0
-    # via jsonpatch
-langchain==0.3.0
-    # via
-    #   -r ./ingest/embed-vertexai.in
-    #   langchain-community
-langchain-community==0.3.0
-    # via -r ./ingest/embed-vertexai.in
-langchain-core==0.3.2
-    # via
-    #   langchain
-    #   langchain-community
-    #   langchain-google-vertexai
-    #   langchain-text-splitters
-langchain-google-vertexai==2.0.1
-    # via -r ./ingest/embed-vertexai.in
-langchain-text-splitters==0.3.0
-    # via langchain
-langsmith==0.1.125
-    # via
-    #   langchain
-    #   langchain-community
-    #   langchain-core
-marshmallow==3.22.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   dataclasses-json
-multidict==6.1.0
-    # via
-    #   aiohttp
-    #   yarl
-mypy-extensions==1.0.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   typing-inspect
-numpy==1.26.4
-    # via
-    #   -c ./ingest/../base.txt
-    #   langchain
-    #   langchain-community
-    #   shapely
-orjson==3.10.7
-    # via langsmith
-packaging==24.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   google-cloud-aiplatform
-    #   google-cloud-bigquery
-    #   langchain-core
-    #   marshmallow
-proto-plus==1.24.0
-    # via
-    #   google-api-core
-    #   google-cloud-aiplatform
-    #   google-cloud-resource-manager
-protobuf==4.25.5
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   google-api-core
-    #   google-cloud-aiplatform
-    #   google-cloud-resource-manager
-    #   googleapis-common-protos
-    #   grpc-google-iam-v1
-    #   grpcio-status
-    #   proto-plus
-pyasn1==0.6.1
-    # via
-    #   pyasn1-modules
-    #   rsa
-pyasn1-modules==0.4.1
-    # via google-auth
-pydantic==2.9.2
-    # via
-    #   google-cloud-aiplatform
-    #   langchain
-    #   langchain-core
-    #   langchain-google-vertexai
-    #   langsmith
-    #   pydantic-settings
-pydantic-core==2.23.4
-    # via pydantic
-pydantic-settings==2.5.2
-    # via langchain-community
-python-dateutil==2.9.0.post0
-    # via
-    #   -c ./ingest/../base.txt
-    #   google-cloud-bigquery
-python-dotenv==1.0.1
-    # via pydantic-settings
-pyyaml==6.0.2
-    # via
-    #   langchain
-    #   langchain-community
-    #   langchain-core
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   google-api-core
-    #   google-cloud-bigquery
-    #   google-cloud-storage
-    #   langchain
-    #   langchain-community
-    #   langsmith
-rsa==4.9
-    # via google-auth
-shapely==2.0.6
-    # via google-cloud-aiplatform
-six==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   python-dateutil
-sniffio==1.3.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-sqlalchemy==2.0.35
-    # via
-    #   langchain
-    #   langchain-community
-tenacity==8.5.0
-    # via
-    #   langchain
-    #   langchain-community
-    #   langchain-core
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   langchain-core
-    #   multidict
-    #   pydantic
-    #   pydantic-core
-    #   sqlalchemy
-    #   typing-inspect
-typing-inspect==0.9.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   dataclasses-json
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
-yarl==1.11.1
-    # via aiohttp
diff --git a/requirements/ingest/embed-voyageai.in b/requirements/ingest/embed-voyageai.in
deleted file mode 100644
index efe01c7b0..000000000
--- a/requirements/ingest/embed-voyageai.in
+++ /dev/null
@@ -1,4 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-langchain
-langchain-voyageai
diff --git a/requirements/ingest/embed-voyageai.txt b/requirements/ingest/embed-voyageai.txt
deleted file mode 100644
index a1d3572e8..000000000
--- a/requirements/ingest/embed-voyageai.txt
+++ /dev/null
@@ -1,147 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/embed-voyageai.in
-#
-aiohappyeyeballs==2.4.0
-    # via aiohttp
-aiohttp==3.10.5
-    # via
-    #   langchain
-    #   voyageai
-aiolimiter==1.1.0
-    # via voyageai
-aiosignal==1.3.1
-    # via aiohttp
-annotated-types==0.7.0
-    # via pydantic
-anyio==4.5.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-async-timeout==4.0.3
-    # via
-    #   aiohttp
-    #   langchain
-attrs==24.2.0
-    # via aiohttp
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-    #   httpx
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-exceptiongroup==1.2.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-frozenlist==1.4.1
-    # via
-    #   aiohttp
-    #   aiosignal
-h11==0.14.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-httpcore==1.0.5
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-httpx==0.27.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   langsmith
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-    #   requests
-    #   yarl
-jsonpatch==1.33
-    # via langchain-core
-jsonpointer==3.0.0
-    # via jsonpatch
-langchain==0.3.0
-    # via -r ./ingest/embed-voyageai.in
-langchain-core==0.3.2
-    # via
-    #   langchain
-    #   langchain-text-splitters
-    #   langchain-voyageai
-langchain-text-splitters==0.3.0
-    # via langchain
-langchain-voyageai==0.1.2
-    # via -r ./ingest/embed-voyageai.in
-langsmith==0.1.125
-    # via
-    #   langchain
-    #   langchain-core
-multidict==6.1.0
-    # via
-    #   aiohttp
-    #   yarl
-numpy==1.26.4
-    # via
-    #   -c ./ingest/../base.txt
-    #   langchain
-    #   voyageai
-orjson==3.10.7
-    # via langsmith
-packaging==24.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   langchain-core
-pydantic==2.9.2
-    # via
-    #   langchain
-    #   langchain-core
-    #   langchain-voyageai
-    #   langsmith
-pydantic-core==2.23.4
-    # via pydantic
-pyyaml==6.0.2
-    # via
-    #   langchain
-    #   langchain-core
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   langchain
-    #   langsmith
-    #   voyageai
-sniffio==1.3.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-sqlalchemy==2.0.35
-    # via langchain
-tenacity==8.5.0
-    # via
-    #   langchain
-    #   langchain-core
-    #   voyageai
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   langchain-core
-    #   multidict
-    #   pydantic
-    #   pydantic-core
-    #   sqlalchemy
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
-voyageai==0.2.3
-    # via langchain-voyageai
-yarl==1.11.1
-    # via aiohttp
diff --git a/requirements/ingest/gcs.in b/requirements/ingest/gcs.in
deleted file mode 100644
index 842468b00..000000000
--- a/requirements/ingest/gcs.in
+++ /dev/null
@@ -1,5 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-gcsfs
-fsspec
-bs4
diff --git a/requirements/ingest/gcs.txt b/requirements/ingest/gcs.txt
deleted file mode 100644
index c2954c0b1..000000000
--- a/requirements/ingest/gcs.txt
+++ /dev/null
@@ -1,120 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/gcs.in
-#
-aiohappyeyeballs==2.4.0
-    # via aiohttp
-aiohttp==3.10.5
-    # via gcsfs
-aiosignal==1.3.1
-    # via aiohttp
-async-timeout==4.0.3
-    # via aiohttp
-attrs==24.2.0
-    # via aiohttp
-beautifulsoup4==4.12.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   bs4
-bs4==0.0.2
-    # via -r ./ingest/gcs.in
-cachetools==5.5.0
-    # via google-auth
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-decorator==5.1.1
-    # via gcsfs
-frozenlist==1.4.1
-    # via
-    #   aiohttp
-    #   aiosignal
-fsspec==2024.9.0
-    # via
-    #   -r ./ingest/gcs.in
-    #   gcsfs
-gcsfs==2024.9.0.post1
-    # via -r ./ingest/gcs.in
-google-api-core==2.20.0
-    # via
-    #   google-cloud-core
-    #   google-cloud-storage
-google-auth==2.35.0
-    # via
-    #   gcsfs
-    #   google-api-core
-    #   google-auth-oauthlib
-    #   google-cloud-core
-    #   google-cloud-storage
-google-auth-oauthlib==1.2.1
-    # via gcsfs
-google-cloud-core==2.4.1
-    # via google-cloud-storage
-google-cloud-storage==2.18.2
-    # via gcsfs
-google-crc32c==1.6.0
-    # via
-    #   google-cloud-storage
-    #   google-resumable-media
-google-resumable-media==2.7.2
-    # via google-cloud-storage
-googleapis-common-protos==1.65.0
-    # via google-api-core
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-    #   yarl
-multidict==6.1.0
-    # via
-    #   aiohttp
-    #   yarl
-oauthlib==3.2.2
-    # via requests-oauthlib
-proto-plus==1.24.0
-    # via google-api-core
-protobuf==4.25.5
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   google-api-core
-    #   googleapis-common-protos
-    #   proto-plus
-pyasn1==0.6.1
-    # via
-    #   pyasn1-modules
-    #   rsa
-pyasn1-modules==0.4.1
-    # via google-auth
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   gcsfs
-    #   google-api-core
-    #   google-cloud-storage
-    #   requests-oauthlib
-requests-oauthlib==2.0.0
-    # via google-auth-oauthlib
-rsa==4.9
-    # via google-auth
-soupsieve==2.6
-    # via
-    #   -c ./ingest/../base.txt
-    #   beautifulsoup4
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   multidict
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
-yarl==1.11.1
-    # via aiohttp
diff --git a/requirements/ingest/github.in b/requirements/ingest/github.in
deleted file mode 100644
index 599585d7a..000000000
--- a/requirements/ingest/github.in
+++ /dev/null
@@ -1,4 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-# NOTE - pygithub==1.58.0 fails due to https://github.com/PyGithub/PyGithub/issues/2436
-pygithub>1.58.0
diff --git a/requirements/ingest/github.txt b/requirements/ingest/github.txt
deleted file mode 100644
index 0d8fa1ac5..000000000
--- a/requirements/ingest/github.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/github.in
-#
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-cffi==1.17.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   cryptography
-    #   pynacl
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-cryptography==43.0.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   pyjwt
-deprecated==1.2.14
-    # via pygithub
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-pycparser==2.22
-    # via
-    #   -c ./ingest/../base.txt
-    #   cffi
-pygithub==2.4.0
-    # via -r ./ingest/github.in
-pyjwt[crypto]==2.9.0
-    # via pygithub
-pynacl==1.5.0
-    # via pygithub
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   pygithub
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   pygithub
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   pygithub
-    #   requests
-wrapt==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   deprecated
diff --git a/requirements/ingest/gitlab.in b/requirements/ingest/gitlab.in
deleted file mode 100644
index 86be2c44b..000000000
--- a/requirements/ingest/gitlab.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-python-gitlab
diff --git a/requirements/ingest/gitlab.txt b/requirements/ingest/gitlab.txt
deleted file mode 100644
index 77d5743ba..000000000
--- a/requirements/ingest/gitlab.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/gitlab.in
-#
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-python-gitlab==4.11.1
-    # via -r ./ingest/gitlab.in
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   python-gitlab
-    #   requests-toolbelt
-requests-toolbelt==1.0.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   python-gitlab
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
diff --git a/requirements/ingest/google-drive.in b/requirements/ingest/google-drive.in
deleted file mode 100644
index e95e27f71..000000000
--- a/requirements/ingest/google-drive.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-google-api-python-client
diff --git a/requirements/ingest/google-drive.txt b/requirements/ingest/google-drive.txt
deleted file mode 100644
index 54726088b..000000000
--- a/requirements/ingest/google-drive.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/google-drive.in
-#
-cachetools==5.5.0
-    # via google-auth
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-google-api-core==2.20.0
-    # via google-api-python-client
-google-api-python-client==2.146.0
-    # via -r ./ingest/google-drive.in
-google-auth==2.35.0
-    # via
-    #   google-api-core
-    #   google-api-python-client
-    #   google-auth-httplib2
-google-auth-httplib2==0.2.0
-    # via google-api-python-client
-googleapis-common-protos==1.65.0
-    # via google-api-core
-httplib2==0.22.0
-    # via
-    #   google-api-python-client
-    #   google-auth-httplib2
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-proto-plus==1.24.0
-    # via google-api-core
-protobuf==4.25.5
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   google-api-core
-    #   googleapis-common-protos
-    #   proto-plus
-pyasn1==0.6.1
-    # via
-    #   pyasn1-modules
-    #   rsa
-pyasn1-modules==0.4.1
-    # via google-auth
-pyparsing==3.1.4
-    # via httplib2
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   google-api-core
-rsa==4.9
-    # via google-auth
-uritemplate==4.1.1
-    # via google-api-python-client
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
diff --git a/requirements/ingest/hubspot.in b/requirements/ingest/hubspot.in
deleted file mode 100644
index 033413401..000000000
--- a/requirements/ingest/hubspot.in
+++ /dev/null
@@ -1,4 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-hubspot-api-client
-urllib3
diff --git a/requirements/ingest/hubspot.txt b/requirements/ingest/hubspot.txt
deleted file mode 100644
index da9b63b37..000000000
--- a/requirements/ingest/hubspot.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/hubspot.in
-#
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   hubspot-api-client
-hubspot-api-client==9.0.0
-    # via -r ./ingest/hubspot.in
-python-dateutil==2.9.0.post0
-    # via
-    #   -c ./ingest/../base.txt
-    #   hubspot-api-client
-six==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   hubspot-api-client
-    #   python-dateutil
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   -r ./ingest/hubspot.in
-    #   hubspot-api-client
diff --git a/requirements/ingest/ingest.txt b/requirements/ingest/ingest.txt
new file mode 100644
index 000000000..957f788f1
--- /dev/null
+++ b/requirements/ingest/ingest.txt
@@ -0,0 +1,5 @@
+unstructured-ingest[airtable, astradb, azure, azure-cognitive-search, bedrock, biomed, box, chroma, clarifai, confluence, couchbase, databricks-volumes, delta-table, discord, dropbox, elasticsearch, embed-huggingface, embed-octoai, embed-vertexai, embed-voyageai, gcs, github, gitlab, google-drive, hubspot, jira, kafka, kdbai, milvus, mongodb, notion, onedrive, openai, opensearch, outlook, pinecone, postgres, qdrant, reddit, remote, s3, salesforce, sftp, sharepoint, singlestore, slack, vectara, weaviate, wikipedia]
+s3fs>=2024.9.0
+urllib3>=1.26.20
+backoff>=2.2.1
+httpx>=0.27.2
diff --git a/requirements/ingest/jira.in b/requirements/ingest/jira.in
deleted file mode 100644
index 37f92cb8c..000000000
--- a/requirements/ingest/jira.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-atlassian-python-api
diff --git a/requirements/ingest/jira.txt b/requirements/ingest/jira.txt
deleted file mode 100644
index 7b2f98861..000000000
--- a/requirements/ingest/jira.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/jira.in
-#
-atlassian-python-api==3.41.16
-    # via -r ./ingest/jira.in
-beautifulsoup4==4.12.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   atlassian-python-api
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-deprecated==1.2.14
-    # via atlassian-python-api
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-jmespath==1.0.1
-    # via atlassian-python-api
-oauthlib==3.2.2
-    # via
-    #   atlassian-python-api
-    #   requests-oauthlib
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   atlassian-python-api
-    #   requests-oauthlib
-requests-oauthlib==2.0.0
-    # via atlassian-python-api
-six==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   atlassian-python-api
-soupsieve==2.6
-    # via
-    #   -c ./ingest/../base.txt
-    #   beautifulsoup4
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
-wrapt==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   deprecated
diff --git a/requirements/ingest/kafka.in b/requirements/ingest/kafka.in
deleted file mode 100644
index 25b9ad2f6..000000000
--- a/requirements/ingest/kafka.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-confluent-kafka
\ No newline at end of file
diff --git a/requirements/ingest/kafka.txt b/requirements/ingest/kafka.txt
deleted file mode 100644
index 4dbb8306d..000000000
--- a/requirements/ingest/kafka.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/kafka.in
-#
-confluent-kafka==2.5.3
-    # via -r ./ingest/kafka.in
diff --git a/requirements/ingest/mongodb.in b/requirements/ingest/mongodb.in
deleted file mode 100644
index 48f292290..000000000
--- a/requirements/ingest/mongodb.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-pymongo
diff --git a/requirements/ingest/mongodb.txt b/requirements/ingest/mongodb.txt
deleted file mode 100644
index 778a13fc4..000000000
--- a/requirements/ingest/mongodb.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/mongodb.in
-#
-dnspython==2.6.1
-    # via pymongo
-pymongo==4.9.1
-    # via -r ./ingest/mongodb.in
diff --git a/requirements/ingest/notion.in b/requirements/ingest/notion.in
deleted file mode 100644
index 47823a112..000000000
--- a/requirements/ingest/notion.in
+++ /dev/null
@@ -1,4 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-notion-client
-htmlBuilder
diff --git a/requirements/ingest/notion.txt b/requirements/ingest/notion.txt
deleted file mode 100644
index ea8a45578..000000000
--- a/requirements/ingest/notion.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/notion.in
-#
-anyio==4.5.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-    #   httpx
-exceptiongroup==1.2.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-h11==0.14.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-htmlbuilder==1.0.0
-    # via -r ./ingest/notion.in
-httpcore==1.0.5
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-httpx==0.27.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   notion-client
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-notion-client==2.2.1
-    # via -r ./ingest/notion.in
-sniffio==1.3.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
diff --git a/requirements/ingest/onedrive.in b/requirements/ingest/onedrive.in
deleted file mode 100644
index c53222881..000000000
--- a/requirements/ingest/onedrive.in
+++ /dev/null
@@ -1,5 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-msal
-Office365-REST-Python-Client
-bs4
diff --git a/requirements/ingest/onedrive.txt b/requirements/ingest/onedrive.txt
deleted file mode 100644
index 88330e86e..000000000
--- a/requirements/ingest/onedrive.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/onedrive.in
-#
-beautifulsoup4==4.12.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   bs4
-bs4==0.0.2
-    # via -r ./ingest/onedrive.in
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-cffi==1.17.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   cryptography
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-cryptography==43.0.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   msal
-    #   pyjwt
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-msal==1.31.0
-    # via
-    #   -r ./ingest/onedrive.in
-    #   office365-rest-python-client
-office365-rest-python-client==2.5.13
-    # via -r ./ingest/onedrive.in
-pycparser==2.22
-    # via
-    #   -c ./ingest/../base.txt
-    #   cffi
-pyjwt[crypto]==2.9.0
-    # via msal
-pytz==2024.2
-    # via office365-rest-python-client
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   msal
-    #   office365-rest-python-client
-soupsieve==2.6
-    # via
-    #   -c ./ingest/../base.txt
-    #   beautifulsoup4
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   office365-rest-python-client
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
diff --git a/requirements/ingest/opensearch.in b/requirements/ingest/opensearch.in
deleted file mode 100644
index ac336e8d1..000000000
--- a/requirements/ingest/opensearch.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-opensearch-py
diff --git a/requirements/ingest/opensearch.txt b/requirements/ingest/opensearch.txt
deleted file mode 100644
index 03a011830..000000000
--- a/requirements/ingest/opensearch.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/opensearch.in
-#
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   opensearch-py
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-events==0.5
-    # via opensearch-py
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-opensearch-py==2.7.1
-    # via -r ./ingest/opensearch.in
-python-dateutil==2.9.0.post0
-    # via
-    #   -c ./ingest/../base.txt
-    #   opensearch-py
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   opensearch-py
-six==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   python-dateutil
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   opensearch-py
-    #   requests
diff --git a/requirements/ingest/outlook.in b/requirements/ingest/outlook.in
deleted file mode 100644
index 3b65d3029..000000000
--- a/requirements/ingest/outlook.in
+++ /dev/null
@@ -1,4 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-msal
-Office365-REST-Python-Client
diff --git a/requirements/ingest/outlook.txt b/requirements/ingest/outlook.txt
deleted file mode 100644
index f73262791..000000000
--- a/requirements/ingest/outlook.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/outlook.in
-#
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-cffi==1.17.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   cryptography
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-cryptography==43.0.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   msal
-    #   pyjwt
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-msal==1.31.0
-    # via
-    #   -r ./ingest/outlook.in
-    #   office365-rest-python-client
-office365-rest-python-client==2.5.13
-    # via -r ./ingest/outlook.in
-pycparser==2.22
-    # via
-    #   -c ./ingest/../base.txt
-    #   cffi
-pyjwt[crypto]==2.9.0
-    # via msal
-pytz==2024.2
-    # via office365-rest-python-client
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   msal
-    #   office365-rest-python-client
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   office365-rest-python-client
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
diff --git a/requirements/ingest/pinecone.in b/requirements/ingest/pinecone.in
deleted file mode 100644
index 985accf43..000000000
--- a/requirements/ingest/pinecone.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-pinecone-client>=3.7.1
diff --git a/requirements/ingest/pinecone.txt b/requirements/ingest/pinecone.txt
deleted file mode 100644
index 68a6197ff..000000000
--- a/requirements/ingest/pinecone.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/pinecone.in
-#
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   pinecone-client
-pinecone-client==5.0.1
-    # via -r ./ingest/pinecone.in
-pinecone-plugin-inference==1.1.0
-    # via pinecone-client
-pinecone-plugin-interface==0.0.7
-    # via
-    #   pinecone-client
-    #   pinecone-plugin-inference
-tqdm==4.66.5
-    # via
-    #   -c ./ingest/../base.txt
-    #   pinecone-client
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   pinecone-client
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   pinecone-client
diff --git a/requirements/ingest/postgres.in b/requirements/ingest/postgres.in
deleted file mode 100644
index f57ac1a36..000000000
--- a/requirements/ingest/postgres.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-psycopg2-binary
diff --git a/requirements/ingest/postgres.txt b/requirements/ingest/postgres.txt
deleted file mode 100644
index 813ca1616..000000000
--- a/requirements/ingest/postgres.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/postgres.in
-#
-psycopg2-binary==2.9.9
-    # via -r ./ingest/postgres.in
diff --git a/requirements/ingest/qdrant.in b/requirements/ingest/qdrant.in
deleted file mode 100644
index 051f54715..000000000
--- a/requirements/ingest/qdrant.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-qdrant-client
diff --git a/requirements/ingest/qdrant.txt b/requirements/ingest/qdrant.txt
deleted file mode 100644
index 0ea8c17ae..000000000
--- a/requirements/ingest/qdrant.txt
+++ /dev/null
@@ -1,86 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/qdrant.in
-#
-annotated-types==0.7.0
-    # via pydantic
-anyio==4.5.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-    #   httpx
-exceptiongroup==1.2.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-grpcio==1.66.1
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   grpcio-tools
-    #   qdrant-client
-grpcio-tools==1.62.3
-    # via qdrant-client
-h11==0.14.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpcore
-h2==4.1.0
-    # via httpx
-hpack==4.0.0
-    # via h2
-httpcore==1.0.5
-    # via
-    #   -c ./ingest/../base.txt
-    #   httpx
-httpx[http2]==0.27.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   qdrant-client
-hyperframe==6.0.1
-    # via h2
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-numpy==1.26.4
-    # via
-    #   -c ./ingest/../base.txt
-    #   qdrant-client
-portalocker==2.10.1
-    # via qdrant-client
-protobuf==4.25.5
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   grpcio-tools
-pydantic==2.9.2
-    # via qdrant-client
-pydantic-core==2.23.4
-    # via pydantic
-qdrant-client==1.11.2
-    # via -r ./ingest/qdrant.in
-sniffio==1.3.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   httpx
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   anyio
-    #   pydantic
-    #   pydantic-core
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   qdrant-client
-
-# The following packages are considered to be unsafe in a requirements file:
-# setuptools
diff --git a/requirements/ingest/reddit.in b/requirements/ingest/reddit.in
deleted file mode 100644
index 5fa199c8c..000000000
--- a/requirements/ingest/reddit.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-praw
diff --git a/requirements/ingest/reddit.txt b/requirements/ingest/reddit.txt
deleted file mode 100644
index 2d5bd0302..000000000
--- a/requirements/ingest/reddit.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/reddit.in
-#
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-praw==7.7.1
-    # via -r ./ingest/reddit.in
-prawcore==2.4.0
-    # via praw
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   prawcore
-    #   update-checker
-update-checker==0.18.0
-    # via praw
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
-websocket-client==1.8.0
-    # via praw
diff --git a/requirements/ingest/s3.in b/requirements/ingest/s3.in
deleted file mode 100644
index 43c7b2ecf..000000000
--- a/requirements/ingest/s3.in
+++ /dev/null
@@ -1,4 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-s3fs
-fsspec
diff --git a/requirements/ingest/s3.txt b/requirements/ingest/s3.txt
deleted file mode 100644
index 98bb2313b..000000000
--- a/requirements/ingest/s3.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/s3.in
-#
-aiobotocore==2.13.3
-    # via s3fs
-aiohappyeyeballs==2.4.0
-    # via aiohttp
-aiohttp==3.10.5
-    # via
-    #   aiobotocore
-    #   s3fs
-aioitertools==0.12.0
-    # via aiobotocore
-aiosignal==1.3.1
-    # via aiohttp
-async-timeout==4.0.3
-    # via aiohttp
-attrs==24.2.0
-    # via aiohttp
-botocore==1.34.131
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   aiobotocore
-frozenlist==1.4.1
-    # via
-    #   aiohttp
-    #   aiosignal
-fsspec==2024.9.0
-    # via
-    #   -r ./ingest/s3.in
-    #   s3fs
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   yarl
-jmespath==1.0.1
-    # via botocore
-multidict==6.1.0
-    # via
-    #   aiohttp
-    #   yarl
-python-dateutil==2.9.0.post0
-    # via
-    #   -c ./ingest/../base.txt
-    #   botocore
-s3fs==2024.9.0
-    # via -r ./ingest/s3.in
-six==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   python-dateutil
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   aioitertools
-    #   multidict
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   botocore
-wrapt==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   aiobotocore
-yarl==1.11.1
-    # via aiohttp
diff --git a/requirements/ingest/salesforce.in b/requirements/ingest/salesforce.in
deleted file mode 100644
index 69547e5dd..000000000
--- a/requirements/ingest/salesforce.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-simple-salesforce
diff --git a/requirements/ingest/salesforce.txt b/requirements/ingest/salesforce.txt
deleted file mode 100644
index a87fe3bf4..000000000
--- a/requirements/ingest/salesforce.txt
+++ /dev/null
@@ -1,76 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/salesforce.in
-#
-attrs==24.2.0
-    # via zeep
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-cffi==1.17.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   cryptography
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-cryptography==43.0.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   pyjwt
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-isodate==0.6.1
-    # via zeep
-lxml==5.3.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   zeep
-more-itertools==10.5.0
-    # via simple-salesforce
-platformdirs==4.3.6
-    # via zeep
-pycparser==2.22
-    # via
-    #   -c ./ingest/../base.txt
-    #   cffi
-pyjwt[crypto]==2.9.0
-    # via simple-salesforce
-pytz==2024.2
-    # via zeep
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests-file
-    #   requests-toolbelt
-    #   simple-salesforce
-    #   zeep
-requests-file==2.1.0
-    # via zeep
-requests-toolbelt==1.0.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   zeep
-simple-salesforce==1.12.6
-    # via -r ./ingest/salesforce.in
-six==1.16.0
-    # via
-    #   -c ./ingest/../base.txt
-    #   isodate
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   simple-salesforce
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
-zeep==4.2.1
-    # via simple-salesforce
diff --git a/requirements/ingest/sftp.in b/requirements/ingest/sftp.in
deleted file mode 100644
index e91c3eb34..000000000
--- a/requirements/ingest/sftp.in
+++ /dev/null
@@ -1,4 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-fsspec
-paramiko
diff --git a/requirements/ingest/sftp.txt b/requirements/ingest/sftp.txt
deleted file mode 100644
index 149af18f6..000000000
--- a/requirements/ingest/sftp.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/sftp.in
-#
-bcrypt==4.2.0
-    # via paramiko
-cffi==1.17.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   cryptography
-    #   pynacl
-cryptography==43.0.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   paramiko
-fsspec==2024.9.0
-    # via -r ./ingest/sftp.in
-paramiko==3.5.0
-    # via -r ./ingest/sftp.in
-pycparser==2.22
-    # via
-    #   -c ./ingest/../base.txt
-    #   cffi
-pynacl==1.5.0
-    # via paramiko
diff --git a/requirements/ingest/sharepoint.in b/requirements/ingest/sharepoint.in
deleted file mode 100644
index 3b65d3029..000000000
--- a/requirements/ingest/sharepoint.in
+++ /dev/null
@@ -1,4 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-msal
-Office365-REST-Python-Client
diff --git a/requirements/ingest/sharepoint.txt b/requirements/ingest/sharepoint.txt
deleted file mode 100644
index 324fc52dd..000000000
--- a/requirements/ingest/sharepoint.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/sharepoint.in
-#
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-cffi==1.17.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   cryptography
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-cryptography==43.0.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   msal
-    #   pyjwt
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-msal==1.31.0
-    # via
-    #   -r ./ingest/sharepoint.in
-    #   office365-rest-python-client
-office365-rest-python-client==2.5.13
-    # via -r ./ingest/sharepoint.in
-pycparser==2.22
-    # via
-    #   -c ./ingest/../base.txt
-    #   cffi
-pyjwt[crypto]==2.9.0
-    # via msal
-pytz==2024.2
-    # via office365-rest-python-client
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   msal
-    #   office365-rest-python-client
-typing-extensions==4.12.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   office365-rest-python-client
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
diff --git a/requirements/ingest/singlestore.in b/requirements/ingest/singlestore.in
deleted file mode 100644
index 5a7e51c28..000000000
--- a/requirements/ingest/singlestore.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-singlestoredb
diff --git a/requirements/ingest/singlestore.txt b/requirements/ingest/singlestore.txt
deleted file mode 100644
index 226285577..000000000
--- a/requirements/ingest/singlestore.txt
+++ /dev/null
@@ -1,62 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/singlestore.in
-#
-build==1.2.2
-    # via singlestoredb
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-importlib-metadata==8.5.0
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   build
-packaging==24.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   build
-parsimonious==0.10.0
-    # via singlestoredb
-pyjwt==2.9.0
-    # via singlestoredb
-pyproject-hooks==1.1.0
-    # via build
-regex==2024.9.11
-    # via
-    #   -c ./ingest/../base.txt
-    #   parsimonious
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   singlestoredb
-singlestoredb==1.6.3
-    # via -r ./ingest/singlestore.in
-sqlparams==6.1.0
-    # via singlestoredb
-tomli==2.0.1
-    # via
-    #   build
-    #   singlestoredb
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
-wheel==0.44.0
-    # via singlestoredb
-zipp==3.20.2
-    # via importlib-metadata
-
-# The following packages are considered to be unsafe in a requirements file:
-# setuptools
diff --git a/requirements/ingest/slack.in b/requirements/ingest/slack.in
deleted file mode 100644
index 5eaa80bc7..000000000
--- a/requirements/ingest/slack.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-slack_sdk
diff --git a/requirements/ingest/slack.txt b/requirements/ingest/slack.txt
deleted file mode 100644
index f518e3e81..000000000
--- a/requirements/ingest/slack.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/slack.in
-#
-slack-sdk==3.33.1
-    # via -r ./ingest/slack.in
diff --git a/requirements/ingest/weaviate.in b/requirements/ingest/weaviate.in
deleted file mode 100644
index e487fcead..000000000
--- a/requirements/ingest/weaviate.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-weaviate-client
diff --git a/requirements/ingest/weaviate.txt b/requirements/ingest/weaviate.txt
deleted file mode 100644
index 8c457917f..000000000
--- a/requirements/ingest/weaviate.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/weaviate.in
-#
-authlib==1.3.2
-    # via weaviate-client
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-cffi==1.17.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   cryptography
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-cryptography==43.0.1
-    # via
-    #   -c ./ingest/../base.txt
-    #   authlib
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-pycparser==2.22
-    # via
-    #   -c ./ingest/../base.txt
-    #   cffi
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   weaviate-client
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
-validators==0.34.0
-    # via weaviate-client
-weaviate-client==3.26.7
-    # via -r ./ingest/weaviate.in
diff --git a/requirements/ingest/wikipedia.in b/requirements/ingest/wikipedia.in
deleted file mode 100644
index fb68f0930..000000000
--- a/requirements/ingest/wikipedia.in
+++ /dev/null
@@ -1,3 +0,0 @@
--c ../deps/constraints.txt
--c ../base.txt
-wikipedia
diff --git a/requirements/ingest/wikipedia.txt b/requirements/ingest/wikipedia.txt
deleted file mode 100644
index 790b132de..000000000
--- a/requirements/ingest/wikipedia.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile ./ingest/wikipedia.in
-#
-beautifulsoup4==4.12.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   wikipedia
-certifi==2024.8.30
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-charset-normalizer==3.3.2
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-idna==3.10
-    # via
-    #   -c ./ingest/../base.txt
-    #   requests
-requests==2.32.3
-    # via
-    #   -c ./ingest/../base.txt
-    #   wikipedia
-soupsieve==2.6
-    # via
-    #   -c ./ingest/../base.txt
-    #   beautifulsoup4
-urllib3==1.26.20
-    # via
-    #   -c ./ingest/../base.txt
-    #   -c ./ingest/../deps/constraints.txt
-    #   requests
-wikipedia==1.4.0
-    # via -r ./ingest/wikipedia.in
diff --git a/requirements/test.txt b/requirements/test.txt
index 9585aaa4d..6c9660091 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -6,7 +6,7 @@
 #
 annotated-types==0.7.0
     # via pydantic
-anyio==4.5.0
+anyio==4.6.0
     # via
     #   -c ./base.txt
     #   httpx
@@ -50,7 +50,7 @@ flake8-print==5.0.0
     # via -r ./test.in
 freezegun==1.5.1
     # via -r ./test.in
-grpcio==1.66.1
+grpcio==1.66.2
     # via
     #   -c ././deps/constraints.txt
     #   -r ./test.in
@@ -58,7 +58,7 @@ h11==0.14.0
     # via
     #   -c ./base.txt
     #   httpcore
-httpcore==1.0.5
+httpcore==1.0.6
     # via
     #   -c ./base.txt
     #   httpx
@@ -115,7 +115,7 @@ packaging==24.1
     #   -c ./base.txt
     #   black
     #   pytest
-pandas==2.2.2
+pandas==2.2.3
     # via label-studio-sdk
 pathspec==0.12.1
     # via black
@@ -185,7 +185,7 @@ sniffio==1.3.1
     #   httpx
 toml==0.10.2
     # via liccheck
-tomli==2.0.1
+tomli==2.0.2
     # via
     #   autoflake
     #   black
@@ -216,7 +216,7 @@ typing-extensions==4.12.2
     #   mypy
     #   pydantic
     #   pydantic-core
-tzdata==2024.1
+tzdata==2024.2
     # via pandas
 ujson==5.10.0
     # via label-studio-sdk
@@ -234,7 +234,7 @@ wrapt==1.16.0
     #   vcrpy
 xmljson==0.2.1
     # via label-studio-sdk
-yarl==1.11.1
+yarl==1.13.1
     # via vcrpy
 
 # The following packages are considered to be unsafe in a requirements file:
diff --git a/scripts/airtable-test-helpers/create_scale_test_components.py b/scripts/airtable-test-helpers/create_scale_test_components.py
index af279457c..e2e11cb86 100644
--- a/scripts/airtable-test-helpers/create_scale_test_components.py
+++ b/scripts/airtable-test-helpers/create_scale_test_components.py
@@ -3,7 +3,7 @@ import os
 # import pyairtable as pyair
 from pyairtable import Api
 
-from unstructured.ingest.logger import logger
+from unstructured.logger import logger
 
 SCALE_TEST_NUMBER_OF_RECORDS = 20_000
 
diff --git a/scripts/consistent-deps.sh b/scripts/consistent-deps.sh
index f210c6acb..81c772894 100755
--- a/scripts/consistent-deps.sh
+++ b/scripts/consistent-deps.sh
@@ -16,7 +16,7 @@ function join_by {
 }
 
 # NOTE(alan): Add any dependency files here we don't want to include in the resolution.
-excludefiles=("requirements//build.txt")
+excludefiles=("requirements/ingest/ingest.txt")
 
 # Build an array of requirements files.
 shopt -s nullglob
@@ -39,6 +39,8 @@ reqstring=$(join_by ' -r ' "${reqfiles[@]}")
 reqstring="-r ${reqstring}"
 # This pip command will attempt to resolve the dependencies without installing anything.
 pipcommand="pip install --dry-run --ignore-installed ${reqstring}"
+echo "dry run install of the following req files:"
+echo "${pipcommand}"
 if $pipcommand >>/dev/null; then
   echo "Everything looks fine!"
 else
diff --git a/setup.py b/setup.py
index 89813f7c1..3b698e12e 100644
--- a/setup.py
+++ b/setup.py
@@ -121,58 +121,10 @@ setup(
         "rst": rst_reqs,
         "tsv": tsv_reqs,
         "xlsx": xlsx_reqs,
-        # Extra requirements for data connectors
-        "airtable": load_requirements("requirements/ingest/airtable.in"),
-        "astradb": load_requirements("requirements/ingest/astradb.in"),
-        "azure": load_requirements("requirements/ingest/azure.in"),
-        "azure-cognitive-search": load_requirements(
-            "requirements/ingest/azure-cognitive-search.in",
-        ),
-        "biomed": load_requirements("requirements/ingest/biomed.in"),
-        "box": load_requirements("requirements/ingest/box.in"),
-        "chroma": load_requirements("requirements/ingest/chroma.in"),
-        "clarifai": load_requirements("requirements/ingest/clarifai.in"),
-        "confluence": load_requirements("requirements/ingest/confluence.in"),
-        "delta-table": load_requirements("requirements/ingest/delta-table.in"),
-        "discord": load_requirements("requirements/ingest/discord.in"),
-        "dropbox": load_requirements("requirements/ingest/dropbox.in"),
-        "elasticsearch": load_requirements("requirements/ingest/elasticsearch.in"),
-        "gcs": load_requirements("requirements/ingest/gcs.in"),
-        "github": load_requirements("requirements/ingest/github.in"),
-        "gitlab": load_requirements("requirements/ingest/gitlab.in"),
-        "google-drive": load_requirements("requirements/ingest/google-drive.in"),
-        "hubspot": load_requirements("requirements/ingest/hubspot.in"),
-        "jira": load_requirements("requirements/ingest/jira.in"),
-        "kafka": load_requirements("requirements/ingest/kafka.in"),
-        "mongodb": load_requirements("requirements/ingest/mongodb.in"),
-        "notion": load_requirements("requirements/ingest/notion.in"),
-        "onedrive": load_requirements("requirements/ingest/onedrive.in"),
-        "opensearch": load_requirements("requirements/ingest/opensearch.in"),
-        "outlook": load_requirements("requirements/ingest/outlook.in"),
-        "pinecone": load_requirements("requirements/ingest/pinecone.in"),
-        "postgres": load_requirements("requirements/ingest/postgres.in"),
-        "qdrant": load_requirements("requirements/ingest/qdrant.in"),
-        "reddit": load_requirements("requirements/ingest/reddit.in"),
-        "s3": load_requirements("requirements/ingest/s3.in"),
-        "sharepoint": load_requirements("requirements/ingest/sharepoint.in"),
-        "salesforce": load_requirements("requirements/ingest/salesforce.in"),
-        "sftp": load_requirements("requirements/ingest/sftp.in"),
-        "slack": load_requirements("requirements/ingest/slack.in"),
-        "wikipedia": load_requirements("requirements/ingest/wikipedia.in"),
-        "weaviate": load_requirements("requirements/ingest/weaviate.in"),
         # Legacy extra requirements
         "huggingface": load_requirements("requirements/huggingface.in"),
         "local-inference": all_doc_reqs,
         "paddleocr": load_requirements("requirements/extra-paddleocr.in"),
-        "embed-huggingface": load_requirements("requirements/ingest/embed-huggingface.in"),
-        "embed-mixedbreadai": load_requirements("requirements/ingest/embed-mixedbreadai.in"),
-        "embed-octoai": load_requirements("requirements/ingest/embed-octoai.in"),
-        "embed-vertexai": load_requirements("requirements/ingest/embed-vertexai.in"),
-        "embed-voyageai": load_requirements("requirements/ingest/embed-voyageai.in"),
-        "openai": load_requirements("requirements/ingest/embed-openai.in"),
-        "bedrock": load_requirements("requirements/ingest/embed-aws-bedrock.in"),
-        "databricks-volumes": load_requirements("requirements/ingest/databricks-volumes.in"),
-        "singlestore": load_requirements("requirements/ingest/singlestore.in"),
     },
     package_dir={"unstructured": "unstructured"},
     package_data={"unstructured": ["nlp/*.txt", "py.typed"]},
diff --git a/test_unstructured/embed/test_mixedbreadai.py b/test_unstructured/embed/test_mixedbreadai.py
index 015342677..0121d3d48 100644
--- a/test_unstructured/embed/test_mixedbreadai.py
+++ b/test_unstructured/embed/test_mixedbreadai.py
@@ -22,8 +22,8 @@ def test_embed_documents_does_not_break_element_to_dict(mocker):
 
     mock_client.embeddings.side_effect = mock_embeddings
 
-    # Mock create_client to return our mock_client
-    mocker.patch.object(MixedbreadAIEmbeddingEncoder, "create_client", return_value=mock_client)
+    # Mock get_client to return our mock_client
+    mocker.patch.object(MixedbreadAIEmbeddingConfig, "get_client", return_value=mock_client)
 
     encoder = MixedbreadAIEmbeddingEncoder(
         config=MixedbreadAIEmbeddingConfig(
diff --git a/test_unstructured/embed/test_octoai.py b/test_unstructured/embed/test_octoai.py
index df9b302e4..6b237ff84 100644
--- a/test_unstructured/embed/test_octoai.py
+++ b/test_unstructured/embed/test_octoai.py
@@ -7,8 +7,8 @@ def test_embed_documents_does_not_break_element_to_dict(mocker):
     mock_client = mocker.MagicMock()
     mock_client.embed_documents.return_value = [1, 2]
 
-    # Mock create_client to return our mock_client
-    mocker.patch.object(OctoAIEmbeddingEncoder, "create_client", return_value=mock_client)
+    # Mock get_client to return our mock_client
+    mocker.patch.object(OctoAiEmbeddingConfig, "get_client", return_value=mock_client)
 
     encoder = OctoAIEmbeddingEncoder(config=OctoAiEmbeddingConfig(api_key="api_key"))
     elements = encoder.embed_documents(
diff --git a/test_unstructured/embed/test_openai.py b/test_unstructured/embed/test_openai.py
index 7d37257b8..39148a454 100644
--- a/test_unstructured/embed/test_openai.py
+++ b/test_unstructured/embed/test_openai.py
@@ -7,8 +7,8 @@ def test_embed_documents_does_not_break_element_to_dict(mocker):
     mock_client = mocker.MagicMock()
     mock_client.embed_documents.return_value = [1, 2]
 
-    # Mock create_client to return our mock_client
-    mocker.patch.object(OpenAIEmbeddingEncoder, "create_client", return_value=mock_client)
+    # Mock get_client to return our mock_client
+    mocker.patch.object(OpenAIEmbeddingConfig, "get_client", return_value=mock_client)
 
     encoder = OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig(api_key="api_key"))
     elements = encoder.embed_documents(
diff --git a/test_unstructured/embed/test_vertexai.py b/test_unstructured/embed/test_vertexai.py
index f754b19a1..3899a1994 100644
--- a/test_unstructured/embed/test_vertexai.py
+++ b/test_unstructured/embed/test_vertexai.py
@@ -8,7 +8,7 @@ def test_embed_documents_does_not_break_element_to_dict(mocker):
     mock_client.embed_documents.return_value = [1, 2]
 
     # Mock create_client to return our mock_client
-    mocker.patch.object(VertexAIEmbeddingEncoder, "create_client", return_value=mock_client)
+    mocker.patch.object(VertexAIEmbeddingConfig, "get_client", return_value=mock_client)
 
     encoder = VertexAIEmbeddingEncoder(config=VertexAIEmbeddingConfig(api_key="api_key"))
     elements = encoder.embed_documents(
diff --git a/test_unstructured/embed/test_voyageai.py b/test_unstructured/embed/test_voyageai.py
index cd4bd0551..b759e6153 100644
--- a/test_unstructured/embed/test_voyageai.py
+++ b/test_unstructured/embed/test_voyageai.py
@@ -7,8 +7,8 @@ def test_embed_documents_does_not_break_element_to_dict(mocker):
     mock_client = mocker.MagicMock()
     mock_client.embed_documents.return_value = [1, 2]
 
-    # Mock create_client to return our mock_client
-    mocker.patch.object(VoyageAIEmbeddingEncoder, "create_client", return_value=mock_client)
+    # Mock get_client to return our mock_client
+    mocker.patch.object(VoyageAIEmbeddingConfig, "get_client", return_value=mock_client)
 
     encoder = VoyageAIEmbeddingEncoder(
         config=VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-law-2")
diff --git a/test_unstructured/ingest/utils/test_compression.py b/test_unstructured/ingest/utils/test_compression.py
deleted file mode 100644
index 7699a385e..000000000
--- a/test_unstructured/ingest/utils/test_compression.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import os
-import tarfile
-
-from unstructured.ingest.utils.compression import uncompress_tar_file
-
-
-def test_uncompress_tar_file(tmpdir):
-    tar_filename = os.path.join(tmpdir, "test.tar")
-    filename = "example-docs/fake-text.txt"
-
-    with tarfile.open(tar_filename, "w:gz") as tar:
-        tar.add(filename, arcname=os.path.basename(filename))
-
-    path = uncompress_tar_file(tar_filename, path=tmpdir.dirname)
-    assert path == tmpdir.dirname
diff --git a/test_unstructured/test_utils.py b/test_unstructured/test_utils.py
index 8d8f5a7eb..487b98b2c 100644
--- a/test_unstructured/test_utils.py
+++ b/test_unstructured/test_utils.py
@@ -2,7 +2,6 @@ from __future__ import annotations
 
 import json
 import os
-import re
 
 import pytest
 
@@ -313,32 +312,6 @@ def test_catch_overlapping_and_nested_bboxes_non_overlapping_case():
     assert overlapping_cases == []
 
 
-def test_validate_data_args():
-    assert utils.validate_date_args("2020-10-10") is True
-
-    with pytest.raises(ValueError):
-        utils.validate_date_args("blah")
-
-    with pytest.raises(ValueError):
-        utils.validate_date_args(None)
-
-
-@pytest.mark.parametrize(
-    "date", ["1990-12-01", "2050-01-01T00:00:00", "2050-01-01+00:00:00", "2022-02-12T14:30:00-0500"]
-)
-def test_validate_date_args_accepts_standard_formats(date):
-    assert utils.validate_date_args(date)
-
-
-@pytest.mark.parametrize("date", [None, "not a date", "1990-12-33"])
-def test_validate_date_args_raises_for_invalid_formats(date):
-    pattern1 = re.compile(r"The argument.*?(?:is None).*")
-    pattern2 = re.compile(r"The argument.*?(?:does not satisfy the format: YYYY-MM-DD).*")
-    combined_pattern = re.compile(f"({pattern1.pattern}|{pattern2.pattern})")
-    with pytest.raises(ValueError, match=combined_pattern):
-        assert utils.validate_date_args(date)
-
-
 def test_only_returns_singleton_iterable():
     singleton_iterable = [42]
     result = utils.only(singleton_iterable)
diff --git a/test_unstructured_ingest/dest/astradb.sh b/test_unstructured_ingest/dest/astradb.sh
deleted file mode 100755
index 77fc0e25e..000000000
--- a/test_unstructured_ingest/dest/astradb.sh
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-SRC_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$SRC_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=astradb-dest
-OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-
-if [ -z "$ASTRA_DB_APPLICATION_TOKEN" ]; then
-  echo "Skipping Astra DB ingest test because ASTRA_DB_APPLICATION_TOKEN env var is not set."
-  exit 0
-fi
-
-if [ -z "$ASTRA_DB_API_ENDPOINT" ]; then
-  echo "Skipping Astra DB ingest test because ASTRA_DB_API_ENDPOINT env var is not set."
-  exit 0
-fi
-
-RANDOM_SUFFIX=$((RANDOM % 100000 + 1))
-COLLECTION_NAME="astradb_test_output_$RANDOM_SUFFIX"
-EMBEDDING_DIMENSION=384
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-
-function cleanup() {
-  cleanup_dir "$OUTPUT_DIR"
-  cleanup_dir "$WORK_DIR"
-
-  python "$SCRIPT_DIR"/python/test-ingest-astradb-output.py \
-    --token "$ASTRA_DB_APPLICATION_TOKEN" \
-    --api-endpoint "$ASTRA_DB_API_ENDPOINT" \
-    --collection-name "$COLLECTION_NAME" down
-}
-
-trap cleanup EXIT
-
-PYTHONPATH=. ./unstructured/ingest/main.py \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --input-path example-docs/book-war-and-peace-1p.txt \
-  --work-dir "$WORK_DIR" \
-  --chunking-strategy by_title \
-  --chunk-max-characters 1500 \
-  --chunk-multipage-sections \
-  --embedding-provider "langchain-huggingface" \
-  astradb \
-  --token "$ASTRA_DB_APPLICATION_TOKEN" \
-  --api-endpoint "$ASTRA_DB_API_ENDPOINT" \
-  --collection-name "$COLLECTION_NAME" \
-  --embedding-dimension "$EMBEDDING_DIMENSION" \
-  --requested-indexing-policy '{"deny": ["metadata"]}'
-
-python "$SCRIPT_DIR"/python/test-ingest-astradb-output.py \
-  --token "$ASTRA_DB_APPLICATION_TOKEN" \
-  --api-endpoint "$ASTRA_DB_API_ENDPOINT" \
-  --collection-name "$COLLECTION_NAME" check
diff --git a/test_unstructured_ingest/dest/azure-cognitive-search.sh b/test_unstructured_ingest/dest/azure-cognitive-search.sh
deleted file mode 100755
index 8b534939f..000000000
--- a/test_unstructured_ingest/dest/azure-cognitive-search.sh
+++ /dev/null
@@ -1,121 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-SRC_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$SRC_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR}
-OUTPUT_FOLDER_NAME=azure-cog-search-dest
-OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME
-UPLOAD_DIR=$WORK_DIR/upload_stage
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-
-AZURE_SEARCH_ENDPOINT="https://ingest-test-azure-cognitive-search.search.windows.net"
-
-random_id=$(uuidgen)
-# index name must be all lowercase
-random_id=$(echo "$random_id" | tr '[:upper:]' '[:lower:]')
-DESTINATION_INDEX="utic-test-ingest-fixtures-output-$random_id"
-# The vector configs on the schema currently only exist on versions:
-# 2023-07-01-Preview, 2021-04-30-Preview, 2020-06-30-Preview
-API_VERSION=2023-07-01-Preview
-
-if [ -z "$AZURE_SEARCH_API_KEY" ] || [ -z "$AZURE_SEARCH_ENDPOINT" ]; then
-  echo "Skipping Azure Cognitive Search ingest test because AZURE_SEARCH_API_KEY or AZURE_SEARCH_ENDPOINT env var is not set."
-  exit 8
-fi
-
-endpoint="$AZURE_SEARCH_ENDPOINT/indexes/$DESTINATION_INDEX?api-version=$API_VERSION"
-echo "Connecting to endpoint: $endpoint"
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-function cleanup {
-  # Index cleanup
-  response_code=$(curl -s -o /dev/null -w "%{http_code}" \
-    "$endpoint" \
-    --header "api-key: $AZURE_SEARCH_API_KEY" \
-    --header 'content-type: application/json')
-  if [ "$response_code" == "200" ]; then
-    echo "deleting index $DESTINATION_INDEX"
-    curl -X DELETE \
-      "$endpoint" \
-      --header "api-key: $AZURE_SEARCH_API_KEY" \
-      --header 'content-type: application/json'
-  else
-    echo "Index $DESTINATION_INDEX does not exist, nothing to delete"
-  fi
-
-  # Local file cleanup
-  cleanup_dir "$WORK_DIR"
-  cleanup_dir "$OUTPUT_DIR"
-}
-
-trap cleanup EXIT
-
-# Create index
-echo "Creating index $DESTINATION_INDEX"
-response=$(curl -X PUT -s -w "\n%{http_code}" \
-  "$endpoint" \
-  --header "api-key: $AZURE_SEARCH_API_KEY" \
-  --header 'content-type: application/json' \
-  --data "@$SCRIPT_DIR/files/azure_cognitive_index_schema.json")
-response_code=$(echo "$response" | tail -n 1) # get the last line
-content=$(echo "$response" | head -n 1)       # get the first line
-if [ "$response_code" -lt 400 ]; then
-  echo "Index creation success: $response_code"
-else
-  echo "Index creation failure [$response_code]: $content"
-  exit 1
-fi
-
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
-PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --reprocess \
-  --input-path example-docs/pdf/fake-memo.pdf \
-  --work-dir "$WORK_DIR" \
-  --chunking-strategy by_title \
-  --chunk-combine-text-under-n-chars 150 \
-  --chunk-new-after-n-chars 1500 \
-  --chunk-max-characters 2500 \
-  --chunk-multipage-sections \
-  --chunk-no-include-orig-elements \
-  --embedding-provider "langchain-huggingface" \
-  azure-cognitive-search \
-  --key "$AZURE_SEARCH_API_KEY" \
-  --endpoint "$AZURE_SEARCH_ENDPOINT" \
-  --index "$DESTINATION_INDEX"
-
-# It can take some time for the index to catch up with the content that was written, this check between 10s sleeps
-# to give it that time process the writes. Will timeout after checking for a minute.
-docs_count_remote=0
-attempt=1
-while [ "$docs_count_remote" -eq 0 ] && [ "$attempt" -lt 6 ]; do
-  echo "attempt $attempt: sleeping 10 seconds to let index finish catching up after writes"
-  sleep 10
-
-  # Check the contents of the index
-  docs_count_remote=$(curl "$AZURE_SEARCH_ENDPOINT/indexes/$DESTINATION_INDEX/docs/\$count?api-version=$API_VERSION" \
-    --header "api-key: $AZURE_SEARCH_API_KEY" \
-    --header 'content-type: application/json' | jq)
-
-  echo "docs count pulled from Azure Cognitive Search: $docs_count_remote"
-
-  attempt=$((attempt + 1))
-done
-
-docs_count_local=0
-for i in $(jq length "$UPLOAD_DIR"/*.json); do
-  docs_count_local=$((docs_count_local + i))
-done
-
-if [ "$docs_count_remote" -ne "$docs_count_local" ]; then
-  echo "Number of docs in Azure Cognitive Search $docs_count_remote doesn't match the expected docs: $docs_count_local"
-  exit 1
-fi
diff --git a/test_unstructured_ingest/dest/azure.sh b/test_unstructured_ingest/dest/azure.sh
deleted file mode 100755
index 208b4a5a4..000000000
--- a/test_unstructured_ingest/dest/azure.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-DEST_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$DEST_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=azure-dest
-OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR}
-OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-
-if [ -z "$AZURE_DEST_CONNECTION_STR" ]; then
-  echo "Skipping Azure destination ingest test because the AZURE_DEST_CONNECTION_STR env var is not set."
-  exit 8
-fi
-
-CONTAINER=utic-ingest-test-fixtures-output
-DIRECTORY=$(uuidgen)
-REMOTE_URL_RAW="$CONTAINER/$DIRECTORY/"
-REMOTE_URL="abfs://$REMOTE_URL_RAW"
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-function cleanup() {
-  cleanup_dir "$OUTPUT_DIR"
-  cleanup_dir "$WORK_DIR"
-
-  python "$SCRIPT_DIR"/python/test-azure-output.py down \
-    --connection-string "$AZURE_DEST_CONNECTION_STR" \
-    --container "$CONTAINER" \
-    --blob-path "$DIRECTORY"
-
-}
-trap cleanup EXIT
-
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
-PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --reprocess \
-  --input-path example-docs/pdf/fake-memo.pdf \
-  --work-dir "$WORK_DIR" \
-  azure \
-  --overwrite \
-  --remote-url "$REMOTE_URL" \
-  --connection-string "$AZURE_DEST_CONNECTION_STR"
-
-# Simply check the number of files uploaded
-python "$SCRIPT_DIR"/python/test-azure-output.py check \
-  --expected-files 1 \
-  --connection-string "$AZURE_DEST_CONNECTION_STR" \
-  --container "$CONTAINER" \
-  --blob-path "$DIRECTORY"
diff --git a/test_unstructured_ingest/dest/box.sh b/test_unstructured_ingest/dest/box.sh
deleted file mode 100755
index 37ad702dd..000000000
--- a/test_unstructured_ingest/dest/box.sh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env bash
-#TODO currently box api/sdk does not work to create folders and check for content similar to other fsspec ingest tests
-
-#
-#set -e
-#
-#DEST_PATH=$(dirname "$(realpath "$0")")
-#SCRIPT_DIR=$(dirname "$DEST_PATH")
-#cd "$SCRIPT_DIR"/.. || exit 1
-#OUTPUT_FOLDER_NAME=box-dest
-#OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME
-#WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME
-#max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-#DESTINATION_BOX="box://utic-dev-tech-fixtures/utic-ingest-test-fixtures-output/$(uuidgen)/"
-#
-#CI=${CI:-"false"}
-#
-#if [ -z "$BOX_APP_CONFIG" ] && [ -z "$BOX_APP_CONFIG_PATH" ]; then
-#   echo "Skipping Box ingest test because neither BOX_APP_CONFIG nor BOX_APP_CONFIG_PATH env vars are set."
-#   exit 0
-#fi
-#
-#if [ -z "$BOX_APP_CONFIG_PATH" ]; then
-#    # Create temporary service key file
-#    BOX_APP_CONFIG_PATH=$(mktemp)
-#    echo "$BOX_APP_CONFIG" >"$BOX_APP_CONFIG_PATH"
-#fi
-#
-## shellcheck disable=SC1091
-#source "$SCRIPT_DIR"/cleanup.sh
-#function cleanup() {
-#  cleanup_dir "$OUTPUT_DIR"
-#  cleanup_dir "$WORK_DIR"
-#    if [ "$CI" == "true" ]; then
-#    cleanup_dir "$DOWNLOAD_DIR"
-#  fi
-#}
-#trap cleanup EXIT
-#
-#RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
-#PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
-#    local \
-#    --num-processes "$max_processes" \
-#    --output-dir "$OUTPUT_DIR" \
-#    --strategy fast \
-#    --verbose \
-#    --reprocess \
-#    --input-path example-docs/pdf/fake-memo.pdf \
-#    --work-dir "$WORK_DIR" \
-#    box \
-#    --box-app-config "$BOX_APP_CONFIG_PATH" \
-#    --remote-url "$DESTINATION_BOX" \
-#
-## Simply check the number of files uploaded
-#expected_num_files=1
diff --git a/test_unstructured_ingest/dest/chroma.sh b/test_unstructured_ingest/dest/chroma.sh
deleted file mode 100755
index 926cb4380..000000000
--- a/test_unstructured_ingest/dest/chroma.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-SRC_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$SRC_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=chroma-dest
-OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME
-DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME
-DESTINATION_PATH=$SCRIPT_DIR/chroma-dest
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-CI=${CI:-"false"}
-
-RANDOM_SUFFIX=$((RANDOM % 100000 + 1))
-
-COLLECTION_NAME="chroma-test-output-$RANDOM_SUFFIX"
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-
-function cleanup() {
-  # Kill chroma background process
-  pgrep -f chroma-dest | xargs kill
-  cleanup_dir "$DESTINATION_PATH"
-  cleanup_dir "$OUTPUT_DIR"
-  cleanup_dir "$WORK_DIR"
-  if [ "$CI" == "true" ]; then
-    cleanup_dir "$DOWNLOAD_DIR"
-  fi
-}
-
-trap cleanup EXIT
-
-# Run chroma from different script so it can be forced into background
-scripts/chroma-test-helpers/create-and-check-chroma.sh "$DESTINATION_PATH"
-wait
-sleep 5
-
-PYTHONPATH=. ./unstructured/ingest/main.py \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --input-path example-docs/book-war-and-peace-1p.txt \
-  --work-dir "$WORK_DIR" \
-  --chunking-strategy by_title \
-  --chunk-max-characters 1500 \
-  --chunk-multipage-sections \
-  --embedding-provider "langchain-huggingface" \
-  chroma \
-  --host "localhost" \
-  --port 8000 \
-  --collection-name "$COLLECTION_NAME" \
-  --tenant "default_tenant" \
-  --database "default_database" \
-  --batch-size 80
-
-python "$SCRIPT_DIR"/python/test-ingest-chroma-output.py --collection-name "$COLLECTION_NAME"
diff --git a/test_unstructured_ingest/dest/clarifai.sh b/test_unstructured_ingest/dest/clarifai.sh
deleted file mode 100755
index 2ed046aae..000000000
--- a/test_unstructured_ingest/dest/clarifai.sh
+++ /dev/null
@@ -1,114 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-DEST_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$DEST_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=clarifai-dest
-OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-
-if [ -z "$CLARIFAI_API_KEY" ]; then
-  echo "Skipping Clarifai ingest test because CLARIFAI_API_KEY env var is not set."
-  exit 0
-
-fi
-
-RANDOM_SUFFIX=$((RANDOM % 100000 + 1))
-# Set the variables with default values
-USER_ID="unstructured"
-APP_ID="test-app-unstructured-$RANDOM_SUFFIX"
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-function cleanup {
-  # Get response code to check if app really exists
-  response_code=$(curl \
-    -s -o /dev/null \
-    -w "%{http_code}" \
-    --request GET "https://api.clarifai.com/v2/users/$USER_ID/apps/$APP_ID" \
-    --header "Authorization: Key $CLARIFAI_API_KEY")
-
-  # Cleanup (delete) index if it exists
-  if [ "$response_code" == "200" ]; then
-    echo ""
-    echo "deleting clarifai app $APP_ID"
-    curl --request DELETE "https://api.clarifai.com/v2/users/$USER_ID/apps/$APP_ID" \
-      -H "Authorization: Key $CLARIFAI_API_KEY"
-
-  else
-    echo "There was an error during deletion of clarifai app $APP_ID, with response code: $response_code. App might not exists in your account."
-  fi
-  # Local file cleanup
-  cleanup_dir "$WORK_DIR"
-  cleanup_dir "$OUTPUT_DIR"
-}
-
-trap cleanup EXIT
-
-echo "Creating Clarifai app $APP_ID"
-response_code=$(
-  curl \
-    -s -o /dev/null \
-    -w "%{http_code}" \
-    --location --request POST "https://api.clarifai.com/v2/users/$USER_ID/apps/" \
-    --header "Content-Type: application/json" \
-    --header "Authorization: Key $CLARIFAI_API_KEY" \
-    --data-raw "{\"apps\": [{\"id\": \"$APP_ID\", \"default_workflow_id\": \"Universal\"}]}"
-)
-if [ "$response_code" -lt 400 ]; then
-  echo "App created successfully: $APP_ID"
-else
-  echo "Failed to create app $APP_ID: $response_code"
-  exit 1
-fi
-
-PYTHONPATH=. ./unstructured/ingest/main.py \
-  local \
-  --input-path example-docs/book-war-and-peace-1p.txt \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --chunking-strategy by_title \
-  --num-processes "$max_processes" \
-  --work-dir "$WORK_DIR" \
-  --verbose \
-  clarifai \
-  --app-id "$APP_ID" \
-  --user-id "$USER_ID" \
-  --api-key "$CLARIFAI_API_KEY" \
-  --batch-size 100
-
-no_of_inputs=0
-sleep_time=5
-
-max_retries=10
-retry_count=0
-
-while [ "$no_of_inputs" -eq 0 ]; do
-  echo "checking for no of inputs in clarifai app"
-  sleep $sleep_time
-
-  if [ "$retry_count" -eq "$max_retries" ]; then
-    echo "Reached maximum retries limit. Exiting..."
-    break
-  fi
-
-  resp=$(curl \
-    -s GET "https://api.clarifai.com/v2/users/$USER_ID/apps/$APP_ID/inputs/status" \
-    -H "Authorization: Key $CLARIFAI_API_KEY")
-
-  no_of_inputs=$(echo "$resp" | jq -r '.counts.processed' | sed 's/\x1b\[[0-9;]*m//g')
-  echo "Processed count: $no_of_inputs"
-  retry_count=$((retry_count + 1))
-
-done
-
-EXPECTED=8
-
-if [ "$no_of_inputs" -ne "$EXPECTED" ]; then
-  echo "Number of inputs in the clarifai app $APP_ID is not equal to expected. Test failed."
-  exit 1
-
-fi
diff --git a/test_unstructured_ingest/dest/databricks-volumes.sh b/test_unstructured_ingest/dest/databricks-volumes.sh
deleted file mode 100755
index 6cf6e38a2..000000000
--- a/test_unstructured_ingest/dest/databricks-volumes.sh
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-SRC_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$SRC_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=databricks-volumes
-OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME
-DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME
-DESTINATION_PATH=$SCRIPT_DIR/databricks-volumes
-CI=${CI:-"false"}
-
-RANDOM_SUFFIX=$((RANDOM % 100000 + 1))
-
-DATABRICKS_VOLUME="test-platform"
-DATABRICKS_VOLUME_PATH="databricks-volumes-test-output-$RANDOM_SUFFIX"
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-
-function cleanup() {
-  python "$SCRIPT_DIR"/python/test-databricks-volumes.py cleanup \
-    --host "$DATABRICKS_HOST" \
-    --username "$DATABRICKS_USERNAME" \
-    --password "$DATABRICKS_PASSWORD" \
-    --volume "$DATABRICKS_VOLUME" \
-    --catalog "$DATABRICKS_CATALOG" \
-    --volume-path "$DATABRICKS_VOLUME_PATH"
-
-  cleanup_dir "$DESTINATION_PATH"
-  cleanup_dir "$OUTPUT_DIR"
-  cleanup_dir "$WORK_DIR"
-  if [ "$CI" == "true" ]; then
-    cleanup_dir "$DOWNLOAD_DIR"
-  fi
-}
-
-trap cleanup EXIT
-
-PYTHONPATH=. ./unstructured/ingest/main.py \
-  local \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --input-path example-docs/pdf/fake-memo.pdf \
-  --work-dir "$WORK_DIR" \
-  databricks-volumes \
-  --host "$DATABRICKS_HOST" \
-  --username "$DATABRICKS_USERNAME" \
-  --password "$DATABRICKS_PASSWORD" \
-  --volume "$DATABRICKS_VOLUME" \
-  --catalog "$DATABRICKS_CATALOG" \
-  --volume-path "$DATABRICKS_VOLUME_PATH"
-
-python "$SCRIPT_DIR"/python/test-databricks-volumes.py test \
-  --host "$DATABRICKS_HOST" \
-  --username "$DATABRICKS_USERNAME" \
-  --password "$DATABRICKS_PASSWORD" \
-  --volume "$DATABRICKS_VOLUME" \
-  --catalog "$DATABRICKS_CATALOG" \
-  --volume-path "$DATABRICKS_VOLUME_PATH"
diff --git a/test_unstructured_ingest/dest/delta-table.sh b/test_unstructured_ingest/dest/delta-table.sh
deleted file mode 100755
index cf54e1054..000000000
--- a/test_unstructured_ingest/dest/delta-table.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-SRC_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$SRC_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=delta-table-dest
-OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME
-DESTINATION_TABLE=$SCRIPT_DIR/delta-table-dest
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-CI=${CI:-"false"}
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-
-function cleanup() {
-  cleanup_dir "$DESTINATION_TABLE"
-  cleanup_dir "$OUTPUT_DIR"
-  cleanup_dir "$WORK_DIR"
-}
-
-trap cleanup EXIT
-
-# Make sure directory doesn't exist at the beginning of script as this will cause it to break
-if [ -d "$DESTINATION_TABLE" ]; then
-  echo "cleaning up directory: $DESTINATION_TABLE"
-  rm -rf "$DESTINATION_TABLE"
-else
-  echo "$DESTINATION_TABLE does not exist or is not a directory, skipping deletion"
-fi
-
-PYTHONPATH=. ./unstructured/ingest/main.py \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --reprocess \
-  --input-path example-docs/pdf/fake-memo.pdf \
-  --work-dir "$WORK_DIR" \
-  delta-table \
-  --table-uri "$DESTINATION_TABLE"
-
-python "$SCRIPT_DIR"/python/test-ingest-delta-table-output.py --table-uri "$DESTINATION_TABLE"
diff --git a/test_unstructured_ingest/dest/dropbox.sh b/test_unstructured_ingest/dest/dropbox.sh
deleted file mode 100755
index 52ade6722..000000000
--- a/test_unstructured_ingest/dest/dropbox.sh
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-DEST_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$DEST_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=dropbox-dest
-OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR}
-OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-DESTINATION_DROPBOX="/test-output/$(uuidgen)"
-CI=${CI:-"false"}
-
-if [ -z "$DROPBOX_APP_KEY" ] || [ -z "$DROPBOX_APP_SECRET" ] || [ -z "$DROPBOX_REFRESH_TOKEN" ]; then
-  echo "Skipping Dropbox ingest test because one or more of these env vars is not set:"
-  echo "DROPBOX_APP_KEY, DROPBOX_APP_SECRET, DROPBOX_REFRESH_TOKEN"
-  exit 8
-fi
-
-# Get a new access token from Dropbox
-DROPBOX_RESPONSE=$(curl -s https://api.dropbox.com/oauth2/token -d refresh_token="$DROPBOX_REFRESH_TOKEN" -d grant_type=refresh_token -d client_id="$DROPBOX_APP_KEY" -d client_secret="$DROPBOX_APP_SECRET")
-DROPBOX_ACCESS_TOKEN=$(jq -r '.access_token' <<<"$DROPBOX_RESPONSE")
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-function cleanup() {
-  cleanup_dir "$OUTPUT_DIR"
-  cleanup_dir "$WORK_DIR"
-
-  echo "deleting test folder $DESTINATION_DROPBOX"
-  curl -X POST https://api.dropboxapi.com/2/files/delete_v2 \
-    --header "Content-Type: application/json" \
-    --header "Authorization: Bearer $DROPBOX_ACCESS_TOKEN" \
-    --data "{\"path\":\"$DESTINATION_DROPBOX\"}" | jq
-}
-trap cleanup EXIT
-
-# Create new folder for test
-echo "creating temp directory in dropbox for testing: $DESTINATION_DROPBOX"
-response=$(curl -X POST -s -w "\n%{http_code}" https://api.dropboxapi.com/2/files/create_folder_v2 \
-  --header "Content-Type: application/json" \
-  --header "Authorization: Bearer $DROPBOX_ACCESS_TOKEN" \
-  --data "{\"autorename\":false,\"path\":\"$DESTINATION_DROPBOX\"}")
-http_code=$(tail -n1 <<<"$response") # get the last line
-content=$(sed '$ d' <<<"$response")  # get all but the last line which contains the status code
-
-if [ "$http_code" -ge 300 ]; then
-  echo "Failed to create temp dir in dropbox: [$http_code] $content"
-  exit 1
-else
-  echo "$http_code:"
-  jq <<<"$content"
-fi
-
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
-PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --reprocess \
-  --input-path example-docs/pdf/fake-memo.pdf \
-  --work-dir "$WORK_DIR" \
-  dropbox \
-  --token "$DROPBOX_ACCESS_TOKEN" \
-  --remote-url "dropbox://$DESTINATION_DROPBOX"
-
-# Simply check the number of files uploaded
-expected_num_files=1
-num_files_in_dropbox=$(curl -X POST https://api.dropboxapi.com/2/files/list_folder \
-  --header "Content-Type: application/json" \
-  --header "Authorization: Bearer $DROPBOX_ACCESS_TOKEN" \
-  --data "{\"path\":\"$DESTINATION_DROPBOX/\"}" | jq '.entries | length')
-if [ "$num_files_in_dropbox" -ne "$expected_num_files" ]; then
-  echo "Expected $expected_num_files files to be uploaded to dropbox, but found $num_files_in_dropbox files."
-  exit 1
-fi
diff --git a/test_unstructured_ingest/dest/elasticsearch.sh b/test_unstructured_ingest/dest/elasticsearch.sh
deleted file mode 100755
index c4e6c8fe2..000000000
--- a/test_unstructured_ingest/dest/elasticsearch.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-DEST_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$DEST_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=elasticsearch-dest
-OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR}
-OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME
-CI=${CI:-"false"}
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-# shellcheck disable=SC1091
-source scripts/elasticsearch-test-helpers/common/es-dest-ingest-test-creds.env
-function cleanup {
-  # Index cleanup
-  echo "Stopping Elasticsearch Docker container"
-  docker-compose -f scripts/elasticsearch-test-helpers/common/docker-compose.yaml down --remove-orphans -v
-
-  # Local file cleanup
-  cleanup_dir "$WORK_DIR"
-  cleanup_dir "$OUTPUT_DIR"
-  if [ "$CI" == "true" ]; then
-    cleanup_dir "$DOWNLOAD_DIR"
-  fi
-}
-
-trap cleanup EXIT
-
-echo "Creating elasticsearch instance"
-# shellcheck source=/dev/null
-scripts/elasticsearch-test-helpers/destination_connector/create-elasticsearch-instance.sh
-wait
-
-PYTHONPATH=. ./unstructured/ingest/main.py \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --reprocess \
-  --input-path example-docs/book-war-and-peace-1225p.txt \
-  --work-dir "$WORK_DIR" \
-  --chunking-strategy by_title \
-  --chunk-combine-text-under-n-chars 200 \
-  --chunk-new-after-n-chars 2500 \
-  --chunk-max-characters 38000 \
-  --chunk-multipage-sections \
-  --embedding-provider "langchain-huggingface" \
-  elasticsearch \
-  --hosts http://localhost:9200 \
-  --index-name ingest-test-destination \
-  --username "$ELASTIC_USER" \
-  --password "$ELASTIC_PASSWORD" \
-  --batch-size-bytes 15000000 \
-  --num-threads "$max_processes"
-
-desired_count=$(cat "$WORK_DIR"/upload_stage/* | jq 'length')
-desired_embeddings=$(cat "$WORK_DIR"/upload_stage/* | jq '.[0]._source.embeddings' | tr -d '\n')
-
-PYTHONPATH=. scripts/elasticsearch-test-helpers/destination_connector/test-ingest-elasticsearch-output.py \
-  --num-elements "$desired_count" \
-  --embeddings "$desired_embeddings"
diff --git a/test_unstructured_ingest/dest/gcs.sh b/test_unstructured_ingest/dest/gcs.sh
deleted file mode 100755
index 21571a937..000000000
--- a/test_unstructured_ingest/dest/gcs.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-DEST_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$DEST_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=gcs-dest
-OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR}
-OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-BUCKET="utic-test-ingest-fixtures-output"
-DIRECTORY=$(uuidgen)
-DESTINATION_GCS="gs://$BUCKET/$DIRECTORY"
-CI=${CI:-"false"}
-
-if [ -z "$GCP_INGEST_SERVICE_KEY" ]; then
-  echo "Skipping Google Drive ingest test because the GCP_INGEST_SERVICE_KEY env var is not set."
-  exit 8
-fi
-
-# Create temporary service key file
-GCP_INGEST_SERVICE_KEY_FILE=$(mktemp)
-echo "$GCP_INGEST_SERVICE_KEY" >"$GCP_INGEST_SERVICE_KEY_FILE"
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-function cleanup() {
-  cleanup_dir "$OUTPUT_DIR"
-  cleanup_dir "$WORK_DIR"
-
-  python "$SCRIPT_DIR"/python/test-gcs-output.py down \
-    --service-account-file "$GCP_INGEST_SERVICE_KEY_FILE" \
-    --bucket "$BUCKET" \
-    --blob-path "$DIRECTORY"
-
-}
-
-trap cleanup EXIT
-
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
-PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --reprocess \
-  --input-path example-docs/pdf/fake-memo.pdf \
-  --work-dir "$WORK_DIR" \
-  gcs \
-  --service-account-key "$GCP_INGEST_SERVICE_KEY_FILE" \
-  --remote-url "$DESTINATION_GCS"
-
-# Simply check the number of files uploaded
-python "$SCRIPT_DIR"/python/test-gcs-output.py check \
-  --expected-files 1 \
-  --service-account-file "$GCP_INGEST_SERVICE_KEY_FILE" \
-  --bucket "$BUCKET" \
-  --blob-path "$DIRECTORY"
diff --git a/test_unstructured_ingest/dest/kafka-local.sh b/test_unstructured_ingest/dest/kafka-local.sh
deleted file mode 100755
index 9086687ed..000000000
--- a/test_unstructured_ingest/dest/kafka-local.sh
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-DEST_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$DEST_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=local-kafka-dest
-OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-
-RANDOM_SUFFIX=$((RANDOM % 100000 + 1))
-
-LC_ALL=C
-
-# Set the variables with default values if they're not set in the environment
-KAFKA_TOPIC=${KAFKA_TOPIC:-"ingest-test-$RANDOM_SUFFIX"}
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-function cleanup {
-  # Local file cleanup
-  cleanup_dir "$WORK_DIR"
-  cleanup_dir "$OUTPUT_DIR"
-
-  echo "Stopping local Kafka instance"
-  docker-compose -f scripts/kafka-test-helpers/docker-compose.yml down --remove-orphans -v
-}
-
-trap cleanup EXIT
-
-echo "Creating local Kafka instance"
-# shellcheck source=/dev/null
-scripts/kafka-test-helpers/create-kafka-instance.sh
-wait
-
-PYTHONPATH=. ./unstructured/ingest/main.py \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --reprocess \
-  --input-path example-docs/pdf/layout-parser-paper.pdf \
-  --work-dir "$WORK_DIR" \
-  --chunking-strategy basic \
-  --chunk-combine-text-under-n-chars 200 \
-  --chunk-new-after-n-chars 2500 \
-  --chunk-max-characters 38000 \
-  --chunk-multipage-sections \
-  --embedding-provider "langchain-huggingface" \
-  kafka \
-  --topic "$KAFKA_TOPIC" \
-  --bootstrap-server "$KAFKA_BOOTSTRAP_SERVER" \
-  --port 29092 \
-  --confluent false
-
-echo "Checking for matching messages in Kafka"
-
-#Check the number of messages in destination topic
-python "$SCRIPT_DIR"/python/test-kafka-output.py check \
-  --bootstrap-server "$KAFKA_BOOTSTRAP_SERVER" \
-  --topic "$KAFKA_TOPIC" \
-  --confluent false \
-  --port 29092
diff --git a/test_unstructured_ingest/dest/mongodb.sh b/test_unstructured_ingest/dest/mongodb.sh
deleted file mode 100755
index 938af0d5f..000000000
--- a/test_unstructured_ingest/dest/mongodb.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/usr/bin/env bash
-# shellcheck disable=SC2012
-
-set -e
-
-DEST_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$DEST_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=mongodb-dest
-OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR}
-OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-DESTINATION_MONGO_COLLECTION="utic-test-ingest-fixtures-output-$(uuidgen)"
-CI=${CI:-"false"}
-
-if [ -z "$MONGODB_URI" ] && [ -z "$MONGODB_DATABASE_NAME" ]; then
-  echo "Skipping MongoDB destination ingest test because the MONGODB_URI and MONGODB_DATABASE_NAME env var are not set."
-  exit 8
-fi
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-function cleanup() {
-  cleanup_dir "$OUTPUT_DIR"
-  cleanup_dir "$WORK_DIR"
-
-  python "$SCRIPT_DIR"/python/test-ingest-mongodb.py \
-    --uri "$MONGODB_URI" \
-    --database "$MONGODB_DATABASE_NAME" \
-    --collection "$DESTINATION_MONGO_COLLECTION" down
-
-}
-
-trap cleanup EXIT
-
-# NOTE(robinson) - per pymongo docs, pymongo ships with its own version of the bson library,
-# which is incompatible with the bson installed from pypi. bson is installed as part of the
-# astradb dependencies.
-# ref: https://pymongo.readthedocs.io/en/stable/installation.html
-python -m pip uninstall -y bson pymongo
-make install-ingest-mongodb
-
-python "$SCRIPT_DIR"/python/test-ingest-mongodb.py \
-  --uri "$MONGODB_URI" \
-  --database "$MONGODB_DATABASE_NAME" \
-  --collection "$DESTINATION_MONGO_COLLECTION" up
-
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
-PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --reprocess \
-  --input-path example-docs/pdf/fake-memo.pdf \
-  --work-dir "$WORK_DIR" \
-  --embedding-provider "langchain-huggingface" \
-  mongodb \
-  --uri "$MONGODB_URI" \
-  --database "$MONGODB_DATABASE_NAME" \
-  --collection "$DESTINATION_MONGO_COLLECTION"
-
-python "$SCRIPT_DIR"/python/test-ingest-mongodb.py \
-  --uri "$MONGODB_URI" \
-  --database "$MONGODB_DATABASE_NAME" \
-  --collection "$DESTINATION_MONGO_COLLECTION" \
-  check --expected-records 5
-
-stage_file=$(ls -1 "$WORK_DIR"/upload_stage | head -n 1)
-python "$SCRIPT_DIR"/python/test-ingest-mongodb.py \
-  --uri "$MONGODB_URI" \
-  --database "$MONGODB_DATABASE_NAME" \
-  --collection "$DESTINATION_MONGO_COLLECTION" \
-  check-vector \
-  --output-json "$WORK_DIR"/upload_stage/"$stage_file"
diff --git a/test_unstructured_ingest/dest/opensearch.sh b/test_unstructured_ingest/dest/opensearch.sh
deleted file mode 100755
index 003e4f286..000000000
--- a/test_unstructured_ingest/dest/opensearch.sh
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-DEST_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$DEST_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=opensearch-dest
-OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR}
-OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME
-CI=${CI:-"false"}
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-function cleanup {
-  # Index cleanup
-  echo "Stopping OpenSearch Docker container"
-  docker-compose -f scripts/opensearch-test-helpers/common/docker-compose.yaml down --remove-orphans -v
-
-  # Local file cleanup
-  cleanup_dir "$WORK_DIR"
-  cleanup_dir "$OUTPUT_DIR"
-  if [ "$CI" == "true" ]; then
-    cleanup_dir "$DOWNLOAD_DIR"
-  fi
-}
-
-trap cleanup EXIT
-
-echo "Creating opensearch instance"
-# shellcheck source=/dev/null
-scripts/opensearch-test-helpers/destination_connector/create-opensearch-instance.sh
-wait
-
-PYTHONPATH=. ./unstructured/ingest/main.py \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --reprocess \
-  --input-path example-docs/pdf/fake-memo.pdf \
-  --work-dir "$WORK_DIR" \
-  --embedding-provider "langchain-huggingface" \
-  opensearch \
-  --hosts http://localhost:9247 \
-  --index-name ingest-test-destination \
-  --username "admin" \
-  --password "admin" \
-  --use-ssl \
-  --batch-size-bytes 150 \
-  --num-threads "$max_processes"
-
-scripts/opensearch-test-helpers/destination_connector/test-ingest-opensearch-output.py
diff --git a/test_unstructured_ingest/dest/pgvector.sh b/test_unstructured_ingest/dest/pgvector.sh
deleted file mode 100755
index 25836cf1d..000000000
--- a/test_unstructured_ingest/dest/pgvector.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-SRC_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$SRC_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=sql-dest
-OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR}
-OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-CI=${CI:-"false"}
-DATABASE_TYPE="pgvector"
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-function cleanup {
-  echo "Stopping SQL DB Docker container"
-  docker-compose -f scripts/sql-test-helpers/docker-compose-"$DATABASE_TYPE".yaml down --remove-orphans -v
-  # Local file cleanup
-  cleanup_dir "$WORK_DIR"
-  cleanup_dir "$OUTPUT_DIR"
-  if [ "$CI" == "true" ]; then
-    cleanup_dir "$DOWNLOAD_DIR"
-  fi
-}
-
-trap cleanup EXIT
-
-# Create sql instance and create `elements` class
-echo "Creating SQL DB instance"
-# shellcheck source=/dev/null
-scripts/sql-test-helpers/create-sql-instance.sh "$DATABASE_TYPE"
-wait
-
-PYTHONPATH=. ./unstructured/ingest/main.py \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --input-path example-docs/pdf/fake-memo.pdf \
-  --work-dir "$WORK_DIR" \
-  --embedding-provider "langchain-huggingface" \
-  sql \
-  --db-type "postgresql" \
-  --username unstructured \
-  --password test \
-  --host localhost \
-  --port 5433 \
-  --database elements
-
-"$SCRIPT_DIR"/python/test-ingest-sql-output.py "$DATABASE_TYPE" "5433"
diff --git a/test_unstructured_ingest/dest/pinecone.sh b/test_unstructured_ingest/dest/pinecone.sh
deleted file mode 100755
index 45adaca83..000000000
--- a/test_unstructured_ingest/dest/pinecone.sh
+++ /dev/null
@@ -1,134 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-DEST_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$DEST_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=s3-pinecone-dest
-OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-writer_processes=$(((max_processes - 1) > 1 ? (max_processes - 1) : 2))
-
-if [ -z "$PINECONE_API_KEY" ]; then
-  echo "Skipping Pinecone ingest test because PINECONE_API_KEY env var is not set."
-  exit 0
-fi
-
-RANDOM_SUFFIX=$((RANDOM % 100000 + 1))
-
-# Set the variables with default values if they're not set in the environment
-PINECONE_INDEX=${PINECONE_INDEX:-"ingest-test-$RANDOM_SUFFIX"}
-PINECONE_HOST_POSTFIX=${PINECONE_HOST_POSTFIX:-"4627-b74a"}
-PINECONE_ENVIRONMENT=${PINECONE_ENVIRONMENT:-"us-east1-gcp"}
-PINECONE_PROJECT_ID=${PINECONE_PROJECT_ID:-"art8iaj"}
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-function cleanup {
-
-  # Get response code to check if index exists
-  response_code=$(curl \
-    -s -o /dev/null \
-    -w "%{http_code}" \
-    --request GET \
-    --url "https://api.pinecone.io/indexes/$PINECONE_INDEX" \
-    --header 'accept: application/json' \
-    --header "Api-Key: $PINECONE_API_KEY")
-
-  # Cleanup (delete) index if it exists
-  if [ "$response_code" == "200" ]; then
-    echo ""
-    echo "deleting index $PINECONE_INDEX"
-    curl --request DELETE \
-      "https://api.pinecone.io/indexes/$PINECONE_INDEX" \
-      --header "Api-Key: $PINECONE_API_KEY" \
-      --header 'content-type: application/json'
-
-  else
-    echo "There was an error during index deletion for index $PINECONE_INDEX, with response code: $response_code. It might be that index $PINECONE_INDEX does not exist, so there is nothing to delete."
-  fi
-
-  # Local file cleanup
-  cleanup_dir "$WORK_DIR"
-  cleanup_dir "$OUTPUT_DIR"
-}
-
-trap cleanup EXIT
-
-echo "Creating index $PINECONE_INDEX"
-response_code=$(curl \
-  -s -o /dev/null \
-  -w "%{http_code}" \
-  --request POST \
-  --url "https://api.pinecone.io/indexes" \
-  --header "accept: application/json" \
-  --header "content-type: application/json" \
-  --header "Api-Key: $PINECONE_API_KEY" \
-  --data '
-{
-  "name": "'"$PINECONE_INDEX"'",
-  "dimension": 384,
-  "metric": "cosine",
-  "spec": {
-    "serverless": {
-      "cloud": "aws",
-      "region": "us-east-1"
-    }
-  }
-}
-')
-
-if [ "$response_code" -lt 400 ]; then
-  echo "Index creation success: $response_code"
-else
-  echo "Index creation failure: $response_code"
-  exit 1
-fi
-
-PYTHONPATH=. ./unstructured/ingest/main.py \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --reprocess \
-  --input-path example-docs/book-war-and-peace-1225p.txt \
-  --work-dir "$WORK_DIR" \
-  --chunking-strategy by_title \
-  --chunk-combine-text-under-n-chars 150 --chunk-new-after-n-chars 1500 --chunk-max-characters 2500 --chunk-multipage-sections \
-  --embedding-provider "langchain-huggingface" \
-  pinecone \
-  --api-key "$PINECONE_API_KEY" \
-  --index-name "$PINECONE_INDEX" \
-  --environment "$PINECONE_ENVIRONMENT" \
-  --batch-size 80 \
-  --num-processes "$writer_processes"
-
-# It can take some time for the index to catch up with the content that was written, this check between 10s sleeps
-# to give it that time process the writes. Will timeout after checking for a minute.
-num_of_vectors_remote=0
-attempt=1
-sleep_amount=30
-while [ "$num_of_vectors_remote" -eq 0 ] && [ "$attempt" -lt 4 ]; do
-  echo "attempt $attempt: sleeping $sleep_amount seconds to let index finish catching up after writes"
-  sleep $sleep_amount
-
-  num_of_vectors_remote=$(curl --request POST \
-    -s \
-    --url "https://$PINECONE_INDEX-$PINECONE_PROJECT_ID.svc.aped-$PINECONE_HOST_POSTFIX.pinecone.io/describe_index_stats" \
-    --header "accept: application/json" \
-    --header "content-type: application/json" \
-    --header "Api-Key: $PINECONE_API_KEY" | jq -r '.totalVectorCount')
-
-  echo "vector count in Pinecone: $num_of_vectors_remote"
-  attempt=$((attempt + 1))
-done
-
-EXPECTED=1835
-
-if [ "$num_of_vectors_remote" -ne $EXPECTED ]; then
-  echo "Number of vectors in Pinecone are $num_of_vectors_remote when the expected number is $EXPECTED. Test failed."
-  exit 1
-fi
diff --git a/test_unstructured_ingest/dest/qdrant.sh b/test_unstructured_ingest/dest/qdrant.sh
deleted file mode 100755
index ec9cf7cee..000000000
--- a/test_unstructured_ingest/dest/qdrant.sh
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-DEST_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$DEST_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=qdrant-dest
-OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-writer_processes=$(((max_processes - 1) > 1 ? (max_processes - 1) : 2))
-CONTAINTER_NAME="qdrant_test"
-QDRANT_PORT=6333
-QDRANT_HOST=localhost:$QDRANT_PORT
-COLLECTION_NAME="qdrant-test-$(date +%s)"
-EXPECTED_POINTS_COUNT=1387
-RETRIES=5
-
-function stop_docker() {
-  docker stop $CONTAINTER_NAME
-}
-
-docker run -d --rm \
-  -p 6333:$QDRANT_PORT \
-  --name $CONTAINTER_NAME qdrant/qdrant:latest
-
-trap stop_docker SIGINT
-trap stop_docker ERR
-
-until curl --output /dev/null --silent --get --fail http://$QDRANT_HOST/collections; do
-  RETRIES=$((RETRIES - 1))
-  if [ "$RETRIES" -le 0 ]; then
-    echo "Qdrant server failed to start"
-    stop_docker
-    exit 1
-  fi
-  printf 'Waiting for Qdrant server to start...'
-  sleep 5
-done
-
-curl -X PUT \
-  http://$QDRANT_HOST/collections/"$COLLECTION_NAME" \
-  -H 'Content-Type: application/json' \
-  -d '{
-    "vectors": {
-      "size": 384,
-      "distance": "Cosine"
-    }
-}'
-
-EMBEDDING_PROVIDER=${EMBEDDING_PROVIDER:-"langchain-huggingface"}
-
-PYTHONPATH=. ./unstructured/ingest/main.py \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --reprocess \
-  --input-path example-docs/book-war-and-peace-1225p.txt \
-  --work-dir "$WORK_DIR" \
-  --chunking-strategy by_title \
-  --chunk-combine-text-under-n-chars 200 --chunk-new-after-n-chars 2500 --chunk-max-characters 38000 --chunk-multipage-sections \
-  --embedding-provider "langchain-huggingface" \
-  qdrant \
-  --collection-name "$COLLECTION_NAME" \
-  --location "http://"$QDRANT_HOST \
-  --batch-size 80 \
-  --num-processes "$writer_processes"
-
-response=$(curl -s -X POST \
-  $QDRANT_HOST/collections/"$COLLECTION_NAME"/points/count \
-  -H 'Content-Type: application/json' \
-  -d '{
-     "exact": true
-}')
-
-count=$(echo "$response" | jq -r '.result.count')
-
-if [ "$count" -ne $EXPECTED_POINTS_COUNT ]; then
-  echo "Points count assertion failed. Expected: $EXPECTED. Got: $count. Test failed."
-  stop_docker
-  exit 1
-fi
-
-stop_docker
diff --git a/test_unstructured_ingest/dest/s3.sh b/test_unstructured_ingest/dest/s3.sh
deleted file mode 100755
index b8d0b901e..000000000
--- a/test_unstructured_ingest/dest/s3.sh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-DEST_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$DEST_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=s3-dest
-OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR}
-WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-DESTINATION_S3="s3://utic-ingest-test-fixtures/destination/$(uuidgen)/"
-CI=${CI:-"false"}
-
-if [ -z "$S3_INGEST_TEST_ACCESS_KEY" ] || [ -z "$S3_INGEST_TEST_SECRET_KEY" ]; then
-  echo "Skipping S3 ingest test because S3_INGEST_TEST_ACCESS_KEY or S3_INGEST_TEST_SECRET_KEY env var is not set."
-  exit 8
-fi
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-function cleanup() {
-  cleanup_dir "$WORK_DIR"
-
-  if AWS_ACCESS_KEY_ID="$S3_INGEST_TEST_ACCESS_KEY" AWS_SECRET_ACCESS_KEY="$S3_INGEST_TEST_SECRET_KEY" aws s3 ls "$DESTINATION_S3" --region us-east-2; then
-    echo "deleting destination s3 location: $DESTINATION_S3"
-    AWS_ACCESS_KEY_ID="$S3_INGEST_TEST_ACCESS_KEY" AWS_SECRET_ACCESS_KEY="$S3_INGEST_TEST_SECRET_KEY" aws s3 rm "$DESTINATION_S3" --recursive --region us-east-2
-  fi
-
-}
-trap cleanup EXIT
-
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
-PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
-  local \
-  --num-processes "$max_processes" \
-  --strategy fast \
-  --verbose \
-  --reprocess \
-  --input-path example-docs/pdf/fake-memo.pdf \
-  --work-dir "$WORK_DIR" \
-  s3 \
-  --key "$S3_INGEST_TEST_ACCESS_KEY" \
-  --secret "$S3_INGEST_TEST_SECRET_KEY" \
-  --remote-url "$DESTINATION_S3"
-
-# Simply check the number of files uploaded
-expected_num_files=1
-num_files_in_s3=$(AWS_ACCESS_KEY_ID="$S3_INGEST_TEST_ACCESS_KEY" AWS_SECRET_ACCESS_KEY="$S3_INGEST_TEST_SECRET_KEY" aws s3 ls "${DESTINATION_S3}" --region us-east-2 | grep -c "\.json$")
-if [ "$num_files_in_s3" -ne "$expected_num_files" ]; then
-  echo "Expected $expected_num_files files to be uploaded to s3, but found $num_files_in_s3 files."
-  exit 1
-else
-  echo "Expected number of files found: $num_files_in_s3/$expected_num_files"
-fi
diff --git a/test_unstructured_ingest/dest/sharepoint-embed-cog-index.sh b/test_unstructured_ingest/dest/sharepoint-embed-cog-index.sh
deleted file mode 100755
index 5c222a459..000000000
--- a/test_unstructured_ingest/dest/sharepoint-embed-cog-index.sh
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-SRC_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$SRC_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=sharepoint-azure-dest
-OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR}
-OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME
-DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME
-DESTINATION_INDEX="utic-test-ingest-fixtures-output-$(uuidgen)"
-# The vector configs on the schema currently only exist on versions:
-# 2023-07-01-Preview, 2021-04-30-Preview, 2020-06-30-Preview
-API_VERSION=2023-07-01-Preview
-CI=${CI:-"false"}
-
-if [ -z "$SHAREPOINT_CLIENT_ID" ] || [ -z "$SHAREPOINT_CRED" ]; then
-  echo "Skipping Sharepoint ingest test because the SHAREPOINT_CLIENT_ID or SHAREPOINT_CRED env var is not set."
-  exit 8
-fi
-
-if [ -z "$SHAREPOINT_PERMISSIONS_APP_ID" ] || [ -z "$SHAREPOINT_PERMISSIONS_APP_CRED" ] || [ -z "$SHAREPOINT_PERMISSIONS_TENANT" ]; then
-  echo "Skipping Sharepoint ingest test because the SHAREPOINT_PERMISSIONS_APP_ID, SHAREPOINT_PERMISSIONS_APP_CRED, or SHAREPOINT_PERMISSIONS_TENANT env var is not set."
-  exit 8
-fi
-
-if [ -z "$OPENAI_API_KEY" ]; then
-  echo "Skipping Sharepoint embedding ingest test because the OPENAI_API_KEY env var is not set."
-  exit 8
-fi
-
-if [ -z "$AZURE_SEARCH_ENDPOINT" ] && [ -z "$AZURE_SEARCH_API_KEY" ]; then
-  echo "Skipping Sharepoint Azure Cognitive Search ingest test because neither AZURE_SEARCH_ENDPOINT nor AZURE_SEARCH_API_KEY env vars are set."
-  exit 8
-fi
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-
-function cleanup {
-  response_code=$(curl -s -o /dev/null -w "%{http_code}" \
-    "https://utic-test-ingest-fixtures.search.windows.net/indexes/$DESTINATION_INDEX?api-version=$API_VERSION" \
-    --header "api-key: $AZURE_SEARCH_API_KEY" \
-    --header 'content-type: application/json')
-  if [ "$response_code" == "200" ]; then
-    echo "deleting index $DESTINATION_INDEX"
-    curl -X DELETE \
-      "https://utic-test-ingest-fixtures.search.windows.net/indexes/$DESTINATION_INDEX?api-version=$API_VERSION" \
-      --header "api-key: $AZURE_SEARCH_API_KEY" \
-      --header 'content-type: application/json'
-  else
-    echo "Index $DESTINATION_INDEX does not exist, nothing to delete"
-  fi
-
-  cleanup_dir "$OUTPUT_DIR"
-  cleanup_dir "$WORK_DIR"
-  if [ "$CI" == "true" ]; then
-    cleanup_dir "$DOWNLOAD_DIR"
-  fi
-}
-
-trap cleanup EXIT
-
-# Create index
-echo "Creating index $DESTINATION_INDEX"
-response_code=$(curl -s -o /dev/null -w "%{http_code}" -X PUT \
-  "https://utic-test-ingest-fixtures.search.windows.net/indexes/$DESTINATION_INDEX?api-version=$API_VERSION" \
-  --header "api-key: $AZURE_SEARCH_API_KEY" \
-  --header 'content-type: application/json' \
-  --data "@$SCRIPT_DIR/files/azure_cognitive_index_schema.json")
-
-if [ "$response_code" -lt 400 ]; then
-  echo "Index creation success: $response_code"
-else
-  echo "Index creation failure: $response_code"
-  exit 1
-fi
-
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
-PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
-  sharepoint \
-  --download-dir "$DOWNLOAD_DIR" \
-  --metadata-exclude file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
-  --num-processes 2 \
-  --strategy hi_res \
-  --preserve-downloads \
-  --reprocess \
-  --output-dir "$OUTPUT_DIR" \
-  --verbose \
-  --client-cred "$SHAREPOINT_CRED" \
-  --client-id "$SHAREPOINT_CLIENT_ID" \
-  --site "$SHAREPOINT_SITE" \
-  --permissions-application-id "$SHAREPOINT_PERMISSIONS_APP_ID" \
-  --permissions-client-cred "$SHAREPOINT_PERMISSIONS_APP_CRED" \
-  --permissions-tenant "$SHAREPOINT_PERMISSIONS_TENANT" \
-  --path "Shared Documents" \
-  --recursive \
-  --embedding-provider "langchain-huggingface" \
-  --chunking-strategy by_title \
-  --chunk-multipage-sections \
-  --work-dir "$WORK_DIR" \
-  azure-cognitive-search \
-  --key "$AZURE_SEARCH_API_KEY" \
-  --endpoint "$AZURE_SEARCH_ENDPOINT" \
-  --index "$DESTINATION_INDEX"
-
-# It can take some time for the index to catch up with the content that was written, this check between 10s sleeps
-# to give it that time process the writes. Will timeout after checking for a minute.
-docs_count_remote=0
-attempt=1
-while [ "$docs_count_remote" -eq 0 ] && [ "$attempt" -lt 6 ]; do
-  echo "attempt $attempt: sleeping 10 seconds to let index finish catching up after writes"
-  sleep 10
-
-  # Check the contents of the index
-  docs_count_remote=$(curl "https://utic-test-ingest-fixtures.search.windows.net/indexes/$DESTINATION_INDEX/docs/\$count?api-version=$API_VERSION" \
-    --header "api-key: $AZURE_SEARCH_API_KEY" \
-    --header 'content-type: application/json' | jq)
-
-  echo "docs count pulled from Azure: $docs_count_remote"
-
-  attempt=$((attempt + 1))
-done
-
-docs_count_local=0
-for i in $(jq length "$OUTPUT_DIR"/**/*.json); do
-  docs_count_local=$((docs_count_local + i))
-done
-
-if [ "$docs_count_remote" -ne "$docs_count_local" ]; then
-  echo "Number of docs $docs_count_remote doesn't match the expected docs: $docs_count_local"
-  exit 1
-fi
diff --git a/test_unstructured_ingest/dest/singlestore.sh b/test_unstructured_ingest/dest/singlestore.sh
deleted file mode 100755
index a04f81370..000000000
--- a/test_unstructured_ingest/dest/singlestore.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-DEST_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$DEST_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=singlestore-dest
-OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR}
-OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME
-CI=${CI:-"false"}
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-function cleanup {
-  # Index cleanup
-  echo "Stopping Singlestore Docker container"
-  docker compose -f scripts/singlestore-test-helpers/docker-compose.yml down --remove-orphans -v
-
-  # Local file cleanup
-  cleanup_dir "$WORK_DIR"
-  cleanup_dir "$OUTPUT_DIR"
-
-}
-
-trap cleanup EXIT
-
-# Create singlestore instance and create `elements` class
-echo "Creating singlestore instance"
-# shellcheck source=/dev/null
-docker compose -f scripts/singlestore-test-helpers/docker-compose.yml up -d --wait-timeout 60
-
-DATABASE=ingest_test
-USER=root
-HOST=localhost
-PASSWORD=password
-PORT=3306
-TABLE=elements
-
-PYTHONPATH=. ./unstructured/ingest/main.py \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --reprocess \
-  --input-path example-docs/pdf/fake-memo.pdf \
-  --work-dir "$WORK_DIR" \
-  --embedding-provider "langchain-huggingface" \
-  singlestore \
-  --host $HOST \
-  --user $USER \
-  --password $PASSWORD \
-  --database $DATABASE \
-  --port $PORT \
-  --table-name $TABLE \
-  --drop-empty-cols
-
-expected_num_elements=$(cat "$WORK_DIR"/embed/* | jq 'length')
-./scripts/singlestore-test-helpers/test_outputs.py \
-  --table-name $TABLE \
-  --database $DATABASE \
-  --num-elements "$expected_num_elements"
diff --git a/test_unstructured_ingest/dest/sqlite.sh b/test_unstructured_ingest/dest/sqlite.sh
deleted file mode 100755
index 9cd54b35e..000000000
--- a/test_unstructured_ingest/dest/sqlite.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-SRC_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$SRC_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=sql-dest
-OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR}
-OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-CI=${CI:-"false"}
-DATABASE_TYPE="sqlite"
-DB_PATH=$SCRIPT_DIR/elements.db
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-function cleanup {
-  # Local file cleanup
-  cleanup_dir "$WORK_DIR"
-  cleanup_dir "$OUTPUT_DIR"
-  rm -rf "$DB_PATH"
-  if [ "$CI" == "true" ]; then
-    cleanup_dir "$DOWNLOAD_DIR"
-
-  fi
-}
-
-trap cleanup EXIT
-
-# Create sql instance and create `elements` class
-echo "Creating SQL DB instance"
-# shellcheck source=/dev/null
-scripts/sql-test-helpers/create-sql-instance.sh "$DATABASE_TYPE" "$DB_PATH"
-wait
-
-PYTHONPATH=. ./unstructured/ingest/main.py \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --reprocess \
-  --input-path example-docs/pdf/fake-memo.pdf \
-  --work-dir "$WORK_DIR" \
-  sql \
-  --db-type "$DATABASE_TYPE" \
-  --username unstructured \
-  --database "$DB_PATH"
-
-"$SCRIPT_DIR"/python/test-ingest-sql-output.py "$DATABASE_TYPE" "$DB_PATH"
diff --git a/test_unstructured_ingest/dest/vectara.sh b/test_unstructured_ingest/dest/vectara.sh
deleted file mode 100755
index 0ba223d44..000000000
--- a/test_unstructured_ingest/dest/vectara.sh
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-DEST_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$DEST_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=local-vectara-dest
-OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-
-RANDOM_SUFFIX=$((RANDOM % 100000 + 1))
-CORPUS_NAME="test-corpus-vectara-"$RANDOM_SUFFIX
-
-# Expected size of the uploaded document
-EXPECTED_CORPUS_SIZE=8843308
-
-if [ -z "$VECTARA_OAUTH_CLIENT_ID" ] && [ -z "$VECTARA_OAUTH_SECRET" ] && [ -z "$VECTARA_CUSTOMER_ID" ]; then
-  echo "Skipping VECTARA ingest test because VECTARA_OAUTH_CLIENT_ID, VECTARA_OAUTH_SECRET, or VECTARA_CUSTOMER_ID env var is not set."
-  exit 8
-fi
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-function cleanup {
-  echo "Deleting corpus $corpus_id ($CORPUS_NAME)"
-  curl -sS -L -X POST 'https://api.vectara.io/v1/delete-corpus' \
-    -H 'Content-Type: application/json' \
-    -H 'Accept: application/json' \
-    -H "Authorization: Bearer $access_token" \
-    -H "customer-id: $VECTARA_CUSTOMER_ID" \
-    --data-raw "{
-    \"corpusId\": $corpus_id
-    }"
-
-  # Local file cleanup
-  cleanup_dir "$WORK_DIR"
-  cleanup_dir "$OUTPUT_DIR"
-}
-
-trap cleanup EXIT
-
-PYTHONPATH=. ./unstructured/ingest/main.py \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --reprocess \
-  --input-path example-docs/book-war-and-peace-1225p.txt \
-  --work-dir "$WORK_DIR" \
-  vectara \
-  --customer-id "$VECTARA_CUSTOMER_ID" \
-  --oauth-client-id "$VECTARA_OAUTH_CLIENT_ID" \
-  --oauth-secret "$VECTARA_OAUTH_SECRET" \
-  --corpus-name "$CORPUS_NAME"
-
-# Get JWT token
-jwt_token_resp=$(curl -sS -XPOST -H "Content-type: application/x-www-form-urlencoded" -d \
-  "grant_type=client_credentials&client_id=$VECTARA_OAUTH_CLIENT_ID&client_secret=$VECTARA_OAUTH_SECRET" \
-  "https://vectara-prod-$VECTARA_CUSTOMER_ID.auth.us-west-2.amazoncognito.com/oauth2/token")
-access_token=$(echo "$jwt_token_resp" | jq -r '.access_token')
-
-# Get corpus ID from name
-corpora_resp=$(curl -sS -L -X POST 'https://api.vectara.io/v1/list-corpora' \
-  -H 'Content-Type: application/json' \
-  -H 'Accept: application/json' \
-  -H "customer-id: $VECTARA_CUSTOMER_ID" \
-  -H "Authorization: Bearer $access_token" \
-  --data-raw "{
-                    \"numResults\": 100,
-                    \"filter\": \"$CORPUS_NAME\"
-                    }")
-corpus_id=$(echo "$corpora_resp" | jq -r '.corpus[0].id')
-
-# Check that the size of the corpus is as expected
-get_corpus_size=$(curl -L -X POST 'https://api.vectara.io/v1/compute-corpus-size' \
-  -H 'Content-Type: application/json' \
-  -H 'Accept: application/json' \
-  -H "customer-id: $VECTARA_CUSTOMER_ID" \
-  -H "Authorization: Bearer $access_token" \
-  --data-raw "{
-  \"corpusId\": $corpus_id
-}")
-corpus_size=$(echo "$get_corpus_size" | jq -r '.size.size')
-
-if [ "$corpus_size" == "$EXPECTED_CORPUS_SIZE" ]; then
-  echo "Corpus size is as expected: $corpus_size"
-else
-  echo "Corpus size is not as expected: $corpus_size"
-  echo "vs $EXPECTED_CORPUS_SIZE"
-  exit 1
-fi
diff --git a/test_unstructured_ingest/dest/weaviate.sh b/test_unstructured_ingest/dest/weaviate.sh
deleted file mode 100755
index 7dfa3281a..000000000
--- a/test_unstructured_ingest/dest/weaviate.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-DEST_PATH=$(dirname "$(realpath "$0")")
-SCRIPT_DIR=$(dirname "$DEST_PATH")
-cd "$SCRIPT_DIR"/.. || exit 1
-OUTPUT_FOLDER_NAME=weaviate-dest
-OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR}
-OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME
-WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME
-CI=${CI:-"false"}
-max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
-
-# shellcheck disable=SC1091
-source "$SCRIPT_DIR"/cleanup.sh
-function cleanup {
-  # Index cleanup
-  echo "Stopping Weaviate Docker container"
-  docker-compose -f scripts/weaviate-test-helpers/docker-compose.yml down --remove-orphans -v
-
-  # Local file cleanup
-  cleanup_dir "$WORK_DIR"
-  cleanup_dir "$OUTPUT_DIR"
-
-}
-
-trap cleanup EXIT
-
-# Create weaviate instance and create `elements` class
-echo "Creating weaviate instance"
-# shellcheck source=/dev/null
-scripts/weaviate-test-helpers/create-weaviate-instance.sh
-wait
-
-PYTHONPATH=. ./unstructured/ingest/main.py \
-  local \
-  --num-processes "$max_processes" \
-  --output-dir "$OUTPUT_DIR" \
-  --strategy fast \
-  --verbose \
-  --reprocess \
-  --input-path example-docs/pdf/fake-memo.pdf \
-  --work-dir "$WORK_DIR" \
-  --embedding-provider "langchain-huggingface" \
-  weaviate \
-  --host-url http://localhost:8080 \
-  --class-name elements \
-  --anonymous
-
-"$SCRIPT_DIR"/python/test-ingest-weaviate-output.py
diff --git a/test_unstructured_ingest/expected-structured-output/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json b/test_unstructured_ingest/expected-structured-output/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json
index 92d6daaa1..06e6a9009 100644
--- a/test_unstructured_ingest/expected-structured-output/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json
+++ b/test_unstructured_ingest/expected-structured-output/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json
@@ -11,7 +11,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -33,7 +33,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -55,7 +55,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -77,7 +77,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -99,7 +99,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -121,7 +121,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -143,7 +143,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -165,7 +165,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -187,7 +187,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -209,7 +209,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -231,7 +231,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -253,7 +253,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -275,7 +275,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -297,7 +297,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -319,7 +319,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -341,7 +341,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -363,7 +363,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -385,7 +385,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -407,7 +407,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -429,7 +429,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -451,7 +451,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -473,7 +473,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -495,7 +495,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -517,7 +517,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -539,7 +539,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -561,7 +561,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf",
-        "version": "237960874052008560436652606947751982249",
+        "version": "0x8DB214A673DD8D8",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
diff --git a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json
index 3cad0fd85..cca8a4dd1 100644
--- a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json
+++ b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json
@@ -11,7 +11,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -33,7 +33,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -55,7 +55,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -77,7 +77,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -99,7 +99,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -121,7 +121,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -143,7 +143,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -165,7 +165,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -187,7 +187,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -209,7 +209,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -231,7 +231,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -253,7 +253,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -275,7 +275,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -297,7 +297,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -319,7 +319,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -341,7 +341,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -363,7 +363,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -385,7 +385,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -407,7 +407,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -429,7 +429,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -451,7 +451,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -473,7 +473,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -495,7 +495,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -517,7 +517,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -539,7 +539,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -561,7 +561,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -583,7 +583,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -605,7 +605,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -627,7 +627,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -649,7 +649,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -671,7 +671,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -693,7 +693,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -715,7 +715,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -737,7 +737,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -759,7 +759,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -781,7 +781,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -803,7 +803,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -825,7 +825,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -847,7 +847,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -869,7 +869,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -891,7 +891,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -913,7 +913,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -935,7 +935,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -957,7 +957,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -979,7 +979,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1001,7 +1001,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1023,7 +1023,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1045,7 +1045,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1067,7 +1067,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1089,7 +1089,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1111,7 +1111,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1133,7 +1133,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1155,7 +1155,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1177,7 +1177,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1199,7 +1199,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1221,7 +1221,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1243,7 +1243,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1265,7 +1265,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1287,7 +1287,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1309,7 +1309,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1331,7 +1331,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1353,7 +1353,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1375,7 +1375,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1397,7 +1397,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1419,7 +1419,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1441,7 +1441,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1463,7 +1463,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1485,7 +1485,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1507,7 +1507,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1529,7 +1529,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1551,7 +1551,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1573,7 +1573,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1595,7 +1595,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1617,7 +1617,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1639,7 +1639,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1661,7 +1661,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1683,7 +1683,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1705,7 +1705,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1727,7 +1727,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1749,7 +1749,7 @@
       "page_number": 2,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.pdf",
-        "version": "337148261958285544336683139132069637358",
+        "version": "0x8DB214AEE092B1E",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
diff --git a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.png.json b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.png.json
index 4c72f31bb..870978812 100644
--- a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.png.json
+++ b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.png.json
@@ -11,7 +11,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -33,7 +33,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -55,7 +55,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -77,7 +77,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -99,7 +99,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -121,7 +121,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -143,7 +143,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -165,7 +165,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -187,7 +187,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -209,7 +209,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -231,7 +231,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -253,7 +253,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -275,7 +275,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -297,7 +297,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -319,7 +319,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -341,7 +341,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -363,7 +363,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -385,7 +385,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -407,7 +407,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -429,7 +429,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -451,7 +451,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -473,7 +473,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -495,7 +495,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -517,7 +517,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -539,7 +539,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -561,7 +561,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -583,7 +583,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -605,7 +605,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -627,7 +627,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -649,7 +649,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -671,7 +671,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -693,7 +693,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -715,7 +715,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -737,7 +737,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -759,7 +759,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -781,7 +781,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -803,7 +803,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -825,7 +825,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -847,7 +847,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -869,7 +869,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -891,7 +891,7 @@
       "page_number": 1,
       "data_source": {
         "url": "abfs://container1/IRS-form-1987.png",
-        "version": "178514357676599756686300559820761454543",
+        "version": "0x8DB214C1B270B0D",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
diff --git a/test_unstructured_ingest/expected-structured-output/azure/rfc854.txt.json b/test_unstructured_ingest/expected-structured-output/azure/rfc854.txt.json
index 0cd30210b..91374854e 100644
--- a/test_unstructured_ingest/expected-structured-output/azure/rfc854.txt.json
+++ b/test_unstructured_ingest/expected-structured-output/azure/rfc854.txt.json
@@ -10,7 +10,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -31,7 +31,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -52,7 +52,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -73,7 +73,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -94,7 +94,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -115,7 +115,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -136,7 +136,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -157,7 +157,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -178,7 +178,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -199,7 +199,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -220,7 +220,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -241,7 +241,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -262,7 +262,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -283,7 +283,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -304,7 +304,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -325,7 +325,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -346,7 +346,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -367,7 +367,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -388,7 +388,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -409,7 +409,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -430,7 +430,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -451,7 +451,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -472,7 +472,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -493,7 +493,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -514,7 +514,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -535,7 +535,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -556,7 +556,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -577,7 +577,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -598,7 +598,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -619,7 +619,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -640,7 +640,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -661,7 +661,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -682,7 +682,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -703,7 +703,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -724,7 +724,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -745,7 +745,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -766,7 +766,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -787,7 +787,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -808,7 +808,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -829,7 +829,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -850,7 +850,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -871,7 +871,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -892,7 +892,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -913,7 +913,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -934,7 +934,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -955,7 +955,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -976,7 +976,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -997,7 +997,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1018,7 +1018,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1039,7 +1039,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1060,7 +1060,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1081,7 +1081,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1102,7 +1102,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1123,7 +1123,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1144,7 +1144,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1165,7 +1165,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1186,7 +1186,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1207,7 +1207,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1228,7 +1228,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1249,7 +1249,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1270,7 +1270,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1291,7 +1291,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1312,7 +1312,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1333,7 +1333,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1354,7 +1354,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1375,7 +1375,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1396,7 +1396,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1417,7 +1417,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1438,7 +1438,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1459,7 +1459,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1480,7 +1480,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1501,7 +1501,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1522,7 +1522,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1543,7 +1543,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1564,7 +1564,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1585,7 +1585,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1606,7 +1606,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1627,7 +1627,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1648,7 +1648,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1669,7 +1669,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1690,7 +1690,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1711,7 +1711,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1732,7 +1732,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1753,7 +1753,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1774,7 +1774,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1795,7 +1795,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1816,7 +1816,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1837,7 +1837,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1858,7 +1858,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1879,7 +1879,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1900,7 +1900,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1921,7 +1921,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1942,7 +1942,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1963,7 +1963,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1984,7 +1984,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2005,7 +2005,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2026,7 +2026,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2047,7 +2047,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2068,7 +2068,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2089,7 +2089,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2110,7 +2110,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2131,7 +2131,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2152,7 +2152,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2173,7 +2173,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2194,7 +2194,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2215,7 +2215,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2236,7 +2236,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2257,7 +2257,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2278,7 +2278,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2299,7 +2299,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2320,7 +2320,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2341,7 +2341,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2362,7 +2362,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2383,7 +2383,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2404,7 +2404,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2425,7 +2425,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2446,7 +2446,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2467,7 +2467,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2488,7 +2488,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2509,7 +2509,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2530,7 +2530,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2551,7 +2551,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2572,7 +2572,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2593,7 +2593,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2614,7 +2614,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2635,7 +2635,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2656,7 +2656,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2677,7 +2677,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2698,7 +2698,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2719,7 +2719,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2740,7 +2740,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2761,7 +2761,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2782,7 +2782,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2803,7 +2803,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2824,7 +2824,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2845,7 +2845,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2866,7 +2866,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2887,7 +2887,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2908,7 +2908,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2929,7 +2929,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "abfs://container1/rfc854.txt",
-        "version": "252402046838802114392575683859882596254",
+        "version": "0x8DB214DA15CE591",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
diff --git a/test_unstructured_ingest/expected-structured-output/azure/spring-weather.html.json b/test_unstructured_ingest/expected-structured-output/azure/spring-weather.html.json
index 387857ab5..e62bb1938 100644
--- a/test_unstructured_ingest/expected-structured-output/azure/spring-weather.html.json
+++ b/test_unstructured_ingest/expected-structured-output/azure/spring-weather.html.json
@@ -10,7 +10,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -31,7 +31,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -52,7 +52,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -81,7 +81,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -108,7 +108,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -135,7 +135,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -162,7 +162,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -189,7 +189,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -216,7 +216,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -243,7 +243,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -270,7 +270,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -297,7 +297,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -324,7 +324,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -351,7 +351,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -378,7 +378,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -405,7 +405,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -432,7 +432,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -459,7 +459,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -486,7 +486,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -513,7 +513,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -540,7 +540,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -567,7 +567,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -594,7 +594,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -621,7 +621,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -648,7 +648,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -675,7 +675,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -702,7 +702,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -729,7 +729,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -756,7 +756,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -783,7 +783,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -810,7 +810,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -837,7 +837,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -864,7 +864,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -891,7 +891,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -918,7 +918,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -945,7 +945,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -972,7 +972,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -999,7 +999,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1026,7 +1026,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1053,7 +1053,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1080,7 +1080,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1107,7 +1107,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1134,7 +1134,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1161,7 +1161,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1188,7 +1188,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1215,7 +1215,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1242,7 +1242,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1269,7 +1269,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1296,7 +1296,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1323,7 +1323,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1350,7 +1350,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1377,7 +1377,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1404,7 +1404,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1431,7 +1431,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1458,7 +1458,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1485,7 +1485,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1512,7 +1512,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1539,7 +1539,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1566,7 +1566,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1593,7 +1593,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1620,7 +1620,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1647,7 +1647,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1674,7 +1674,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1701,7 +1701,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1728,7 +1728,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1755,7 +1755,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1782,7 +1782,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1809,7 +1809,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1836,7 +1836,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1863,7 +1863,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1890,7 +1890,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1917,7 +1917,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1944,7 +1944,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1971,7 +1971,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -1998,7 +1998,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2025,7 +2025,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2052,7 +2052,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2079,7 +2079,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2106,7 +2106,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2133,7 +2133,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2160,7 +2160,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2187,7 +2187,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2214,7 +2214,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2235,7 +2235,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2256,7 +2256,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2283,7 +2283,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2316,7 +2316,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2343,7 +2343,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2364,7 +2364,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2397,7 +2397,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2430,7 +2430,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
@@ -2463,7 +2463,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "abfs://container1/spring-weather.html",
-        "version": "162215905222974206637545574128436022861",
+        "version": "0x8DB214B74525BB6",
         "record_locator": {
           "protocol": "abfs",
           "remote_file_path": "abfs://container1/"
diff --git a/test_unstructured_ingest/expected-structured-output/box/handbook-1p.docx.json b/test_unstructured_ingest/expected-structured-output/box/handbook-1p.docx.json
index 39646d9a7..3c7ca733b 100644
--- a/test_unstructured_ingest/expected-structured-output/box/handbook-1p.docx.json
+++ b/test_unstructured_ingest/expected-structured-output/box/handbook-1p.docx.json
@@ -11,7 +11,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
-        "version": "83125548004193369404829885052395764226",
+        "version": "1255888824139",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -39,7 +39,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
-        "version": "83125548004193369404829885052395764226",
+        "version": "1255888824139",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -67,7 +67,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
-        "version": "83125548004193369404829885052395764226",
+        "version": "1255888824139",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -89,7 +89,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
-        "version": "83125548004193369404829885052395764226",
+        "version": "1255888824139",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -111,7 +111,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
-        "version": "83125548004193369404829885052395764226",
+        "version": "1255888824139",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -133,7 +133,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
-        "version": "83125548004193369404829885052395764226",
+        "version": "1255888824139",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -155,7 +155,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
-        "version": "83125548004193369404829885052395764226",
+        "version": "1255888824139",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -177,7 +177,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
-        "version": "83125548004193369404829885052395764226",
+        "version": "1255888824139",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -199,7 +199,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
-        "version": "83125548004193369404829885052395764226",
+        "version": "1255888824139",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -221,7 +221,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
-        "version": "83125548004193369404829885052395764226",
+        "version": "1255888824139",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -243,7 +243,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
-        "version": "83125548004193369404829885052395764226",
+        "version": "1255888824139",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -265,7 +265,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
-        "version": "83125548004193369404829885052395764226",
+        "version": "1255888824139",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -287,7 +287,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
-        "version": "83125548004193369404829885052395764226",
+        "version": "1255888824139",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -309,7 +309,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
-        "version": "83125548004193369404829885052395764226",
+        "version": "1255888824139",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -332,7 +332,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
-        "version": "83125548004193369404829885052395764226",
+        "version": "1255888824139",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
diff --git a/test_unstructured_ingest/expected-structured-output/box/nested-1/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/box/nested-1/ideas-page.html.json
index 1b9c8bad3..d0025fcee 100644
--- a/test_unstructured_ingest/expected-structured-output/box/nested-1/ideas-page.html.json
+++ b/test_unstructured_ingest/expected-structured-output/box/nested-1/ideas-page.html.json
@@ -11,7 +11,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/nested-1/ideas-page.html",
-        "version": "77943175838335685751163845636763163681",
+        "version": "1255892530552",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
diff --git a/test_unstructured_ingest/expected-structured-output/box/nested-1/nested-2/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/box/nested-1/nested-2/ideas-page.html.json
index ef9902dfb..e9bc64409 100644
--- a/test_unstructured_ingest/expected-structured-output/box/nested-1/nested-2/ideas-page.html.json
+++ b/test_unstructured_ingest/expected-structured-output/box/nested-1/nested-2/ideas-page.html.json
@@ -11,7 +11,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/nested-1/nested-2/ideas-page.html",
-        "version": "293680985726204769765169474511274942733",
+        "version": "1255884723846",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
diff --git a/test_unstructured_ingest/expected-structured-output/box/science-exploration-1p.pptx.json b/test_unstructured_ingest/expected-structured-output/box/science-exploration-1p.pptx.json
index 23a1ddae7..2e6dbf696 100644
--- a/test_unstructured_ingest/expected-structured-output/box/science-exploration-1p.pptx.json
+++ b/test_unstructured_ingest/expected-structured-output/box/science-exploration-1p.pptx.json
@@ -11,7 +11,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
-        "version": "309546934335254463247992132065898582121",
+        "version": "1255894255490",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -34,7 +34,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
-        "version": "309546934335254463247992132065898582121",
+        "version": "1255894255490",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -57,7 +57,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
-        "version": "309546934335254463247992132065898582121",
+        "version": "1255894255490",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -80,7 +80,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
-        "version": "309546934335254463247992132065898582121",
+        "version": "1255894255490",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -103,7 +103,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
-        "version": "309546934335254463247992132065898582121",
+        "version": "1255894255490",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -126,7 +126,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
-        "version": "309546934335254463247992132065898582121",
+        "version": "1255894255490",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -149,7 +149,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
-        "version": "309546934335254463247992132065898582121",
+        "version": "1255894255490",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -172,7 +172,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
-        "version": "309546934335254463247992132065898582121",
+        "version": "1255894255490",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -195,7 +195,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
-        "version": "309546934335254463247992132065898582121",
+        "version": "1255894255490",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -218,7 +218,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
-        "version": "309546934335254463247992132065898582121",
+        "version": "1255894255490",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -241,7 +241,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
-        "version": "309546934335254463247992132065898582121",
+        "version": "1255894255490",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -264,7 +264,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
-        "version": "309546934335254463247992132065898582121",
+        "version": "1255894255490",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
@@ -287,7 +287,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
         "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
-        "version": "309546934335254463247992132065898582121",
+        "version": "1255894255490",
         "record_locator": {
           "protocol": "box",
           "remote_file_path": "box://utic-test-ingest-fixtures",
diff --git a/test_unstructured_ingest/expected-structured-output/dropbox/handbook-1p.docx.json b/test_unstructured_ingest/expected-structured-output/dropbox/handbook-1p.docx.json
index 94e1c93f4..9e61bf43b 100644
--- a/test_unstructured_ingest/expected-structured-output/dropbox/handbook-1p.docx.json
+++ b/test_unstructured_ingest/expected-structured-output/dropbox/handbook-1p.docx.json
@@ -10,13 +10,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
-        "url": "dropbox:///test-input/handbook-1p.docx",
-        "version": "134700592086487568162605251521926324397",
+        "url": "dropbox://test-input/handbook-1p.docx",
+        "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ"
-        }
+        },
+        "date_created": "1687394168.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -36,13 +38,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
-        "url": "dropbox:///test-input/handbook-1p.docx",
-        "version": "134700592086487568162605251521926324397",
+        "url": "dropbox://test-input/handbook-1p.docx",
+        "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ"
-        }
+        },
+        "date_created": "1687394168.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -62,13 +66,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
-        "url": "dropbox:///test-input/handbook-1p.docx",
-        "version": "134700592086487568162605251521926324397",
+        "url": "dropbox://test-input/handbook-1p.docx",
+        "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ"
-        }
+        },
+        "date_created": "1687394168.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -82,13 +88,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
-        "url": "dropbox:///test-input/handbook-1p.docx",
-        "version": "134700592086487568162605251521926324397",
+        "url": "dropbox://test-input/handbook-1p.docx",
+        "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ"
-        }
+        },
+        "date_created": "1687394168.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -102,13 +110,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
-        "url": "dropbox:///test-input/handbook-1p.docx",
-        "version": "134700592086487568162605251521926324397",
+        "url": "dropbox://test-input/handbook-1p.docx",
+        "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ"
-        }
+        },
+        "date_created": "1687394168.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -122,13 +132,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
-        "url": "dropbox:///test-input/handbook-1p.docx",
-        "version": "134700592086487568162605251521926324397",
+        "url": "dropbox://test-input/handbook-1p.docx",
+        "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ"
-        }
+        },
+        "date_created": "1687394168.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -142,13 +154,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
-        "url": "dropbox:///test-input/handbook-1p.docx",
-        "version": "134700592086487568162605251521926324397",
+        "url": "dropbox://test-input/handbook-1p.docx",
+        "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ"
-        }
+        },
+        "date_created": "1687394168.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -162,13 +176,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
-        "url": "dropbox:///test-input/handbook-1p.docx",
-        "version": "134700592086487568162605251521926324397",
+        "url": "dropbox://test-input/handbook-1p.docx",
+        "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ"
-        }
+        },
+        "date_created": "1687394168.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -182,13 +198,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
-        "url": "dropbox:///test-input/handbook-1p.docx",
-        "version": "134700592086487568162605251521926324397",
+        "url": "dropbox://test-input/handbook-1p.docx",
+        "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ"
-        }
+        },
+        "date_created": "1687394168.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -202,13 +220,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
-        "url": "dropbox:///test-input/handbook-1p.docx",
-        "version": "134700592086487568162605251521926324397",
+        "url": "dropbox://test-input/handbook-1p.docx",
+        "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ"
-        }
+        },
+        "date_created": "1687394168.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -222,13 +242,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
-        "url": "dropbox:///test-input/handbook-1p.docx",
-        "version": "134700592086487568162605251521926324397",
+        "url": "dropbox://test-input/handbook-1p.docx",
+        "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ"
-        }
+        },
+        "date_created": "1687394168.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -242,13 +264,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
-        "url": "dropbox:///test-input/handbook-1p.docx",
-        "version": "134700592086487568162605251521926324397",
+        "url": "dropbox://test-input/handbook-1p.docx",
+        "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ"
-        }
+        },
+        "date_created": "1687394168.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -262,13 +286,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
-        "url": "dropbox:///test-input/handbook-1p.docx",
-        "version": "134700592086487568162605251521926324397",
+        "url": "dropbox://test-input/handbook-1p.docx",
+        "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ"
-        }
+        },
+        "date_created": "1687394168.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -282,13 +308,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
-        "url": "dropbox:///test-input/handbook-1p.docx",
-        "version": "134700592086487568162605251521926324397",
+        "url": "dropbox://test-input/handbook-1p.docx",
+        "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ"
-        }
+        },
+        "date_created": "1687394168.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -303,13 +331,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
       "data_source": {
-        "url": "dropbox:///test-input/handbook-1p.docx",
-        "version": "134700592086487568162605251521926324397",
+        "url": "dropbox://test-input/handbook-1p.docx",
+        "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ"
-        }
+        },
+        "date_created": "1687394168.0",
+        "date_modified": "1697632567.0"
       }
     }
   }
diff --git a/test_unstructured_ingest/expected-structured-output/dropbox/nested-1/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/dropbox/nested-1/ideas-page.html.json
index fb02cb1ff..1c500c276 100644
--- a/test_unstructured_ingest/expected-structured-output/dropbox/nested-1/ideas-page.html.json
+++ b/test_unstructured_ingest/expected-structured-output/dropbox/nested-1/ideas-page.html.json
@@ -10,13 +10,15 @@
       ],
       "filetype": "text/html",
       "data_source": {
-        "url": "dropbox:///test-input/nested-1/ideas-page.html",
-        "version": "67356979305728150851855820427694668063",
+        "url": "dropbox://test-input/nested-1/ideas-page.html",
+        "version": "7a31fe250cc57a9733f8d50e61b9b265c53f5dd12faedf4829e559e2c3a8845e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACw"
-        }
+        },
+        "date_created": "1687394194.0",
+        "date_modified": "1697632566.0"
       }
     }
   }
diff --git a/test_unstructured_ingest/expected-structured-output/dropbox/nested-2/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/dropbox/nested-2/ideas-page.html.json
index 564bd2577..0fa649855 100644
--- a/test_unstructured_ingest/expected-structured-output/dropbox/nested-2/ideas-page.html.json
+++ b/test_unstructured_ingest/expected-structured-output/dropbox/nested-2/ideas-page.html.json
@@ -10,13 +10,15 @@
       ],
       "filetype": "text/html",
       "data_source": {
-        "url": "dropbox:///test-input/nested-2/ideas-page.html",
-        "version": "145453788782335405288844961545898675998",
+        "url": "dropbox://test-input/nested-2/ideas-page.html",
+        "version": "7a31fe250cc57a9733f8d50e61b9b265c53f5dd12faedf4829e559e2c3a8845e",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAADQ"
-        }
+        },
+        "date_created": "1687394213.0",
+        "date_modified": "1697632566.0"
       }
     }
   }
diff --git a/test_unstructured_ingest/expected-structured-output/dropbox/science-exploration-1p.pptx.json b/test_unstructured_ingest/expected-structured-output/dropbox/science-exploration-1p.pptx.json
index c5a44f158..8e59883c8 100644
--- a/test_unstructured_ingest/expected-structured-output/dropbox/science-exploration-1p.pptx.json
+++ b/test_unstructured_ingest/expected-structured-output/dropbox/science-exploration-1p.pptx.json
@@ -10,13 +10,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
-        "url": "dropbox:///test-input/science-exploration-1p.pptx",
-        "version": "26035320120182381452247268381589958225",
+        "url": "dropbox://test-input/science-exploration-1p.pptx",
+        "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACA"
-        }
+        },
+        "date_created": "1687394162.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -31,13 +33,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
-        "url": "dropbox:///test-input/science-exploration-1p.pptx",
-        "version": "26035320120182381452247268381589958225",
+        "url": "dropbox://test-input/science-exploration-1p.pptx",
+        "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACA"
-        }
+        },
+        "date_created": "1687394162.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -52,13 +56,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
-        "url": "dropbox:///test-input/science-exploration-1p.pptx",
-        "version": "26035320120182381452247268381589958225",
+        "url": "dropbox://test-input/science-exploration-1p.pptx",
+        "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACA"
-        }
+        },
+        "date_created": "1687394162.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -73,13 +79,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
-        "url": "dropbox:///test-input/science-exploration-1p.pptx",
-        "version": "26035320120182381452247268381589958225",
+        "url": "dropbox://test-input/science-exploration-1p.pptx",
+        "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACA"
-        }
+        },
+        "date_created": "1687394162.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -94,13 +102,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
-        "url": "dropbox:///test-input/science-exploration-1p.pptx",
-        "version": "26035320120182381452247268381589958225",
+        "url": "dropbox://test-input/science-exploration-1p.pptx",
+        "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACA"
-        }
+        },
+        "date_created": "1687394162.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -115,13 +125,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
-        "url": "dropbox:///test-input/science-exploration-1p.pptx",
-        "version": "26035320120182381452247268381589958225",
+        "url": "dropbox://test-input/science-exploration-1p.pptx",
+        "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACA"
-        }
+        },
+        "date_created": "1687394162.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -136,13 +148,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
-        "url": "dropbox:///test-input/science-exploration-1p.pptx",
-        "version": "26035320120182381452247268381589958225",
+        "url": "dropbox://test-input/science-exploration-1p.pptx",
+        "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACA"
-        }
+        },
+        "date_created": "1687394162.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -157,13 +171,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
-        "url": "dropbox:///test-input/science-exploration-1p.pptx",
-        "version": "26035320120182381452247268381589958225",
+        "url": "dropbox://test-input/science-exploration-1p.pptx",
+        "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACA"
-        }
+        },
+        "date_created": "1687394162.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -178,13 +194,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
-        "url": "dropbox:///test-input/science-exploration-1p.pptx",
-        "version": "26035320120182381452247268381589958225",
+        "url": "dropbox://test-input/science-exploration-1p.pptx",
+        "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACA"
-        }
+        },
+        "date_created": "1687394162.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -199,13 +217,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
-        "url": "dropbox:///test-input/science-exploration-1p.pptx",
-        "version": "26035320120182381452247268381589958225",
+        "url": "dropbox://test-input/science-exploration-1p.pptx",
+        "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACA"
-        }
+        },
+        "date_created": "1687394162.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -220,13 +240,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
-        "url": "dropbox:///test-input/science-exploration-1p.pptx",
-        "version": "26035320120182381452247268381589958225",
+        "url": "dropbox://test-input/science-exploration-1p.pptx",
+        "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACA"
-        }
+        },
+        "date_created": "1687394162.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -241,13 +263,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
-        "url": "dropbox:///test-input/science-exploration-1p.pptx",
-        "version": "26035320120182381452247268381589958225",
+        "url": "dropbox://test-input/science-exploration-1p.pptx",
+        "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACA"
-        }
+        },
+        "date_created": "1687394162.0",
+        "date_modified": "1697632567.0"
       }
     }
   },
@@ -262,13 +286,15 @@
       ],
       "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
       "data_source": {
-        "url": "dropbox:///test-input/science-exploration-1p.pptx",
-        "version": "26035320120182381452247268381589958225",
+        "url": "dropbox://test-input/science-exploration-1p.pptx",
+        "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510",
         "record_locator": {
           "protocol": "dropbox",
           "remote_file_path": "dropbox://test-input/",
           "file_id": "id:De4ZYtDd-JoAAAAAAAAACA"
-        }
+        },
+        "date_created": "1687394162.0",
+        "date_modified": "1697632567.0"
       }
     }
   }
diff --git a/test_unstructured_ingest/expected-structured-output/gcs/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/gcs/ideas-page.html.json
index d3a3f0854..bcd7ef201 100644
--- a/test_unstructured_ingest/expected-structured-output/gcs/ideas-page.html.json
+++ b/test_unstructured_ingest/expected-structured-output/gcs/ideas-page.html.json
@@ -11,7 +11,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/ideas-page.html",
-        "version": "199523943725186047835150971481714294476",
+        "version": "CJXRtOuE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
diff --git a/test_unstructured_ingest/expected-structured-output/gcs/nested-1/fake-text.txt.json b/test_unstructured_ingest/expected-structured-output/gcs/nested-1/fake-text.txt.json
index d49564e20..8c8d34a2f 100644
--- a/test_unstructured_ingest/expected-structured-output/gcs/nested-1/fake-text.txt.json
+++ b/test_unstructured_ingest/expected-structured-output/gcs/nested-1/fake-text.txt.json
@@ -10,7 +10,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt",
-        "version": "180263070579038859328651626981788275889",
+        "version": "CKyIrMaE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
@@ -32,7 +32,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt",
-        "version": "180263070579038859328651626981788275889",
+        "version": "CKyIrMaE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
@@ -54,7 +54,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt",
-        "version": "180263070579038859328651626981788275889",
+        "version": "CKyIrMaE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
@@ -76,7 +76,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt",
-        "version": "180263070579038859328651626981788275889",
+        "version": "CKyIrMaE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
@@ -98,7 +98,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt",
-        "version": "180263070579038859328651626981788275889",
+        "version": "CKyIrMaE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
@@ -120,7 +120,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt",
-        "version": "180263070579038859328651626981788275889",
+        "version": "CKyIrMaE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
diff --git a/test_unstructured_ingest/expected-structured-output/gcs/nested-1/nested/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/gcs/nested-1/nested/ideas-page.html.json
index 662caae8c..e31d5a5e0 100644
--- a/test_unstructured_ingest/expected-structured-output/gcs/nested-1/nested/ideas-page.html.json
+++ b/test_unstructured_ingest/expected-structured-output/gcs/nested-1/nested/ideas-page.html.json
@@ -11,7 +11,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-1/nested/ideas-page.html",
-        "version": "310890354306462681752199911957569001015",
+        "version": "CMWrx8aE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
diff --git a/test_unstructured_ingest/expected-structured-output/gcs/nested-2/fake-text.txt.json b/test_unstructured_ingest/expected-structured-output/gcs/nested-2/fake-text.txt.json
index 7f5a3c007..22bcb125b 100644
--- a/test_unstructured_ingest/expected-structured-output/gcs/nested-2/fake-text.txt.json
+++ b/test_unstructured_ingest/expected-structured-output/gcs/nested-2/fake-text.txt.json
@@ -10,7 +10,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-2/fake-text.txt",
-        "version": "198731266903969902154134165613731741332",
+        "version": "CPXPxMuE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
@@ -32,7 +32,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-2/fake-text.txt",
-        "version": "198731266903969902154134165613731741332",
+        "version": "CPXPxMuE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
@@ -54,7 +54,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-2/fake-text.txt",
-        "version": "198731266903969902154134165613731741332",
+        "version": "CPXPxMuE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
@@ -76,7 +76,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-2/fake-text.txt",
-        "version": "198731266903969902154134165613731741332",
+        "version": "CPXPxMuE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
@@ -98,7 +98,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-2/fake-text.txt",
-        "version": "198731266903969902154134165613731741332",
+        "version": "CPXPxMuE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
@@ -120,7 +120,7 @@
       "filetype": "text/plain",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-2/fake-text.txt",
-        "version": "198731266903969902154134165613731741332",
+        "version": "CPXPxMuE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
diff --git a/test_unstructured_ingest/expected-structured-output/gcs/nested-2/nested/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/gcs/nested-2/nested/ideas-page.html.json
index 4b34ff850..b318f7a12 100644
--- a/test_unstructured_ingest/expected-structured-output/gcs/nested-2/nested/ideas-page.html.json
+++ b/test_unstructured_ingest/expected-structured-output/gcs/nested-2/nested/ideas-page.html.json
@@ -11,7 +11,7 @@
       "filetype": "text/html",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-2/nested/ideas-page.html",
-        "version": "113813498010717860141768546590661839404",
+        "version": "COXZ3MuE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
diff --git a/test_unstructured_ingest/expected-structured-output/gcs/nested-2/stanley-cups.xlsx.json b/test_unstructured_ingest/expected-structured-output/gcs/nested-2/stanley-cups.xlsx.json
index c7a6b9d3b..4931718ff 100644
--- a/test_unstructured_ingest/expected-structured-output/gcs/nested-2/stanley-cups.xlsx.json
+++ b/test_unstructured_ingest/expected-structured-output/gcs/nested-2/stanley-cups.xlsx.json
@@ -12,7 +12,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-2/stanley-cups.xlsx",
-        "version": "25646232132200560657189097157576319365",
+        "version": "COul9MuE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
@@ -37,7 +37,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-2/stanley-cups.xlsx",
-        "version": "25646232132200560657189097157576319365",
+        "version": "COul9MuE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
@@ -61,7 +61,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-2/stanley-cups.xlsx",
-        "version": "25646232132200560657189097157576319365",
+        "version": "COul9MuE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
@@ -86,7 +86,7 @@
       "filetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
       "data_source": {
         "url": "gs://utic-test-ingest-fixtures/nested-2/stanley-cups.xlsx",
-        "version": "25646232132200560657189097157576319365",
+        "version": "COul9MuE0/8CEAE=",
         "record_locator": {
           "protocol": "gs",
           "remote_file_path": "gs://utic-test-ingest-fixtures/",
diff --git a/test_unstructured_ingest/src/against-api.sh b/test_unstructured_ingest/src/against-api.sh
index a4ff8f3ad..7f2d6a944 100755
--- a/test_unstructured_ingest/src/against-api.sh
+++ b/test_unstructured_ingest/src/against-api.sh
@@ -27,7 +27,7 @@ trap cleanup EXIT
 TEST_FILE_NAME=layout-parser-paper-with-table.pdf
 
 # including pdf-infer-table-structure to validate partition arguments are passed to the api
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   local \
   --api-key "$UNS_API_KEY" \
diff --git a/test_unstructured_ingest/src/airtable-diff.sh b/test_unstructured_ingest/src/airtable-diff.sh
index 3aa9bb638..3cd81eff7 100755
--- a/test_unstructured_ingest/src/airtable-diff.sh
+++ b/test_unstructured_ingest/src/airtable-diff.sh
@@ -35,7 +35,7 @@ if [ -z "$AIRTABLE_PERSONAL_ACCESS_TOKEN" ]; then
   exit 8
 fi
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   airtable \
   --download-dir "$DOWNLOAD_DIR" \
@@ -47,7 +47,6 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   --reprocess \
   --output-dir "$OUTPUT_DIR" \
   --work-dir "$WORK_DIR" \
-  --max-retry-time 10 \
   --verbose
 
 "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
diff --git a/test_unstructured_ingest/src/airtable-large.sh b/test_unstructured_ingest/src/airtable-large.sh
index d15fed2b9..c0bf06fe4 100755
--- a/test_unstructured_ingest/src/airtable-large.sh
+++ b/test_unstructured_ingest/src/airtable-large.sh
@@ -38,7 +38,7 @@ fi
 # shellcheck disable=SC1091
 source ./scripts/airtable-test-helpers/component_ids.sh
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   airtable \
   --download-dir "$DOWNLOAD_DIR" \
diff --git a/test_unstructured_ingest/src/astradb.sh b/test_unstructured_ingest/src/astradb.sh
index 9aa89c48f..1ea211a6b 100755
--- a/test_unstructured_ingest/src/astradb.sh
+++ b/test_unstructured_ingest/src/astradb.sh
@@ -22,7 +22,8 @@ fi
 
 COLLECTION_NAME="ingest_test_src"
 
-PYTHONPATH=. ./unstructured/ingest/main.py \
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
+PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   astradb \
   --token "$ASTRA_DB_APPLICATION_TOKEN" \
   --api-endpoint "$ASTRA_DB_API_ENDPOINT" \
diff --git a/test_unstructured_ingest/src/azure.sh b/test_unstructured_ingest/src/azure.sh
index 602f2de43..6744805d6 100755
--- a/test_unstructured_ingest/src/azure.sh
+++ b/test_unstructured_ingest/src/azure.sh
@@ -21,11 +21,11 @@ function cleanup() {
 }
 trap cleanup EXIT
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   azure \
   --download-dir "$DOWNLOAD_DIR" \
-  --metadata-exclude coordinates,filename,file_directory,metadata.last_modified,metadata.data_source.date_processed,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
+  --metadata-exclude coordinates,filename,file_directory,metadata.last_modified,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --num-processes "$max_processes" \
   --strategy hi_res \
   --preserve-downloads \
diff --git a/test_unstructured_ingest/src/biomed-api.sh b/test_unstructured_ingest/src/biomed-api.sh
index 75db5294e..82b29f887 100755
--- a/test_unstructured_ingest/src/biomed-api.sh
+++ b/test_unstructured_ingest/src/biomed-api.sh
@@ -23,7 +23,7 @@ trap cleanup EXIT
 
 "$SCRIPT_DIR"/check-num-files-expected-output.sh 2 $OUTPUT_FOLDER_NAME 10k
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   biomed \
   --download-dir "$DOWNLOAD_DIR" \
diff --git a/test_unstructured_ingest/src/biomed-path.sh b/test_unstructured_ingest/src/biomed-path.sh
index 95effb0b6..12401ed8a 100755
--- a/test_unstructured_ingest/src/biomed-path.sh
+++ b/test_unstructured_ingest/src/biomed-path.sh
@@ -23,7 +23,7 @@ trap cleanup EXIT
 
 "$SCRIPT_DIR"/check-num-files-expected-output.sh 1 $OUTPUT_FOLDER_NAME 10k
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   biomed \
   --download-dir "$DOWNLOAD_DIR" \
diff --git a/test_unstructured_ingest/src/box.sh b/test_unstructured_ingest/src/box.sh
index e9f2462b5..3ab2f44b4 100755
--- a/test_unstructured_ingest/src/box.sh
+++ b/test_unstructured_ingest/src/box.sh
@@ -38,13 +38,13 @@ if [ -z "$BOX_APP_CONFIG_PATH" ]; then
   echo "$BOX_APP_CONFIG" >"$BOX_APP_CONFIG_PATH"
 fi
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   box \
   --download-dir "$DOWNLOAD_DIR" \
   --box-app-config "$BOX_APP_CONFIG_PATH" \
   --remote-url box://utic-test-ingest-fixtures \
-  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
+  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --output-dir "$OUTPUT_DIR" \
   --num-processes "$max_processes" \
   --preserve-downloads \
diff --git a/test_unstructured_ingest/src/confluence-diff.sh b/test_unstructured_ingest/src/confluence-diff.sh
index 5cc54f93b..dc0f71cd1 100755
--- a/test_unstructured_ingest/src/confluence-diff.sh
+++ b/test_unstructured_ingest/src/confluence-diff.sh
@@ -31,7 +31,7 @@ if [ -z "$CONFLUENCE_USER_EMAIL" ] || [ -z "$CONFLUENCE_API_TOKEN" ]; then
   exit 8
 fi
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   confluence \
   --download-dir "$DOWNLOAD_DIR" \
diff --git a/test_unstructured_ingest/src/confluence-large.sh b/test_unstructured_ingest/src/confluence-large.sh
index 7b20d0ee0..790d675b9 100755
--- a/test_unstructured_ingest/src/confluence-large.sh
+++ b/test_unstructured_ingest/src/confluence-large.sh
@@ -37,7 +37,7 @@ fi
 # are being provided at the same time, which is a wrong way to use the connector.
 
 # We expect the test to ignore --confluence-num-of-spaces and use --confluence-list-of-spaces.
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   confluence \
   --download-dir "$DOWNLOAD_DIR" \
diff --git a/test_unstructured_ingest/src/delta-table.sh b/test_unstructured_ingest/src/delta-table.sh
index 7faf23c40..d8ac97145 100755
--- a/test_unstructured_ingest/src/delta-table.sh
+++ b/test_unstructured_ingest/src/delta-table.sh
@@ -31,7 +31,7 @@ function cleanup() {
 
 trap cleanup EXIT
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   delta-table \
   --num-processes "$max_processes" \
diff --git a/test_unstructured_ingest/src/discord.sh b/test_unstructured_ingest/src/discord.sh
index 64bf18364..ca986e3b0 100755
--- a/test_unstructured_ingest/src/discord.sh
+++ b/test_unstructured_ingest/src/discord.sh
@@ -29,7 +29,7 @@ if [ -z "$DISCORD_TOKEN" ]; then
   exit 8
 fi
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   discord \
   --num-processes "$max_processes" \
diff --git a/test_unstructured_ingest/src/dropbox.sh b/test_unstructured_ingest/src/dropbox.sh
index 414ce0846..ff2c82998 100755
--- a/test_unstructured_ingest/src/dropbox.sh
+++ b/test_unstructured_ingest/src/dropbox.sh
@@ -34,12 +34,12 @@ fi
 DROPBOX_RESPONSE=$(curl https://api.dropbox.com/oauth2/token -d refresh_token="$DROPBOX_REFRESH_TOKEN" -d grant_type=refresh_token -d client_id="$DROPBOX_APP_KEY" -d client_secret="$DROPBOX_APP_SECRET")
 DROPBOX_ACCESS_TOKEN=$(jq -r '.access_token' <<<"$DROPBOX_RESPONSE")
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   dropbox \
   --num-processes "$max_processes" \
   --download-dir "$DOWNLOAD_DIR" \
-  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
+  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --preserve-downloads \
   --reprocess \
   --output-dir "$OUTPUT_DIR" \
diff --git a/test_unstructured_ingest/src/elasticsearch.sh b/test_unstructured_ingest/src/elasticsearch.sh
index 1534f0018..9141cde57 100755
--- a/test_unstructured_ingest/src/elasticsearch.sh
+++ b/test_unstructured_ingest/src/elasticsearch.sh
@@ -37,11 +37,11 @@ trap cleanup EXIT
 scripts/elasticsearch-test-helpers/source_connector/create-fill-and-check-es.sh
 wait
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   elasticsearch \
   --download-dir "$DOWNLOAD_DIR" \
-  --metadata-exclude filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
+  --metadata-exclude filename,file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --num-processes "$max_processes" \
   --preserve-downloads \
   --reprocess \
diff --git a/test_unstructured_ingest/src/gcs.sh b/test_unstructured_ingest/src/gcs.sh
index 77d2d86c6..5261c1169 100755
--- a/test_unstructured_ingest/src/gcs.sh
+++ b/test_unstructured_ingest/src/gcs.sh
@@ -34,12 +34,12 @@ fi
 GCP_INGEST_SERVICE_KEY_FILE=$(mktemp)
 echo "$GCP_INGEST_SERVICE_KEY" >"$GCP_INGEST_SERVICE_KEY_FILE"
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   gcs \
   --num-processes "$max_processes" \
   --download-dir "$DOWNLOAD_DIR" \
-  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
+  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --preserve-downloads \
   --reprocess \
   --output-dir "$OUTPUT_DIR" \
diff --git a/test_unstructured_ingest/src/github.sh b/test_unstructured_ingest/src/github.sh
index a34355333..bea75f359 100755
--- a/test_unstructured_ingest/src/github.sh
+++ b/test_unstructured_ingest/src/github.sh
@@ -37,7 +37,7 @@ elif [[ "$CI" == "true" ]]; then
   echo
 fi
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 #shellcheck disable=SC2086
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   github \
diff --git a/test_unstructured_ingest/src/gitlab.sh b/test_unstructured_ingest/src/gitlab.sh
index 64ac21353..1bd01b488 100755
--- a/test_unstructured_ingest/src/gitlab.sh
+++ b/test_unstructured_ingest/src/gitlab.sh
@@ -24,7 +24,7 @@ function cleanup() {
 }
 trap cleanup EXIT
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   gitlab \
   --num-processes "$max_processes" \
diff --git a/test_unstructured_ingest/src/google-drive.sh b/test_unstructured_ingest/src/google-drive.sh
index 36a6ab79b..7e580e8a1 100755
--- a/test_unstructured_ingest/src/google-drive.sh
+++ b/test_unstructured_ingest/src/google-drive.sh
@@ -35,11 +35,11 @@ fi
 GCP_INGEST_SERVICE_KEY_FILE=$(mktemp)
 echo "$GCP_INGEST_SERVICE_KEY" >"$GCP_INGEST_SERVICE_KEY_FILE"
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   google-drive \
   --download-dir "$DOWNLOAD_DIR" \
-  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth,metadata.data_source.version \
+  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth,metadata.data_source.version \
   --num-processes "$max_processes" \
   --strategy hi_res \
   --preserve-downloads \
@@ -47,7 +47,7 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   --output-dir "$OUTPUT_DIR" \
   --verbose \
   --drive-id 1OQZ66OHBE30rNsNa7dweGLfRmXvkT_jr \
-  --service-account-key "$GCP_INGEST_SERVICE_KEY_FILE" \
+  --service-account-key-path "$GCP_INGEST_SERVICE_KEY_FILE" \
   --recursive \
   --extensions "pdf,docx" \
   --work-dir "$WORK_DIR"
diff --git a/test_unstructured_ingest/src/hubspot.sh b/test_unstructured_ingest/src/hubspot.sh
index 86a75630c..d5b617569 100755
--- a/test_unstructured_ingest/src/hubspot.sh
+++ b/test_unstructured_ingest/src/hubspot.sh
@@ -39,7 +39,8 @@ fi
 #   Can be used multiple times to specify multiple objects.
 # --custom-properties Custom property to process information from. Comma separated list.
 
-PYTHONPATH=. ./unstructured/ingest/main.py \
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
+PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   hubspot \
   --metadata-exclude file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.date_created,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --num-processes "$max_processes" \
diff --git a/test_unstructured_ingest/src/jira.sh b/test_unstructured_ingest/src/jira.sh
index 533fc3224..ce6b4e049 100755
--- a/test_unstructured_ingest/src/jira.sh
+++ b/test_unstructured_ingest/src/jira.sh
@@ -50,7 +50,7 @@ fi
 # Note: When any of the optional arguments are provided, connector will ingest only those components, and nothing else.
 #       When none of the optional arguments are provided, all issues in all projects will be ingested.
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   jira \
   --download-dir "$DOWNLOAD_DIR" \
diff --git a/test_unstructured_ingest/src/kafka-local.sh b/test_unstructured_ingest/src/kafka-local.sh
index c2ed84d0b..36b21754f 100755
--- a/test_unstructured_ingest/src/kafka-local.sh
+++ b/test_unstructured_ingest/src/kafka-local.sh
@@ -57,7 +57,7 @@ python "$SCRIPT_DIR"/python/test-produce-kafka-message.py up \
   --confluent false \
   --port 29092
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   kafka \
   --bootstrap-server localhost \
diff --git a/test_unstructured_ingest/src/local-embed-bedrock.sh b/test_unstructured_ingest/src/local-embed-bedrock.sh
index 1d23431cf..285d15a56 100755
--- a/test_unstructured_ingest/src/local-embed-bedrock.sh
+++ b/test_unstructured_ingest/src/local-embed-bedrock.sh
@@ -24,17 +24,17 @@ if [ -z "$AWS_ACCESS_KEY_ID" ] || [ -z "$AWS_SECRET_ACCESS_KEY" ]; then
   exit 8
 fi
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   local \
   --num-processes "$max_processes" \
-  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
+  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --output-dir "$OUTPUT_DIR" \
   --verbose \
   --reprocess \
   --input-path example-docs/book-war-and-peace-1p.txt \
   --work-dir "$WORK_DIR" \
-  --embedding-provider "langchain-aws-bedrock" \
+  --embedding-provider "aws-bedrock" \
   --embedding-aws-access-key-id "$AWS_ACCESS_KEY_ID" \
   --embedding-aws-secret-access-key "$AWS_SECRET_ACCESS_KEY"
 
diff --git a/test_unstructured_ingest/src/local-embed-mixedbreadai.sh b/test_unstructured_ingest/src/local-embed-mixedbreadai.sh
index 75d949c89..99168d7dd 100755
--- a/test_unstructured_ingest/src/local-embed-mixedbreadai.sh
+++ b/test_unstructured_ingest/src/local-embed-mixedbreadai.sh
@@ -22,10 +22,8 @@ function cleanup() {
 }
 trap cleanup EXIT
 
-# Define the run script
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
-
 # Run the ingestion script with the specified parameters
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   local \
   --num-processes "$max_processes" \
diff --git a/test_unstructured_ingest/src/local-embed-octoai.sh b/test_unstructured_ingest/src/local-embed-octoai.sh
index e75ee6dc5..54ff3e2a0 100755
--- a/test_unstructured_ingest/src/local-embed-octoai.sh
+++ b/test_unstructured_ingest/src/local-embed-octoai.sh
@@ -25,7 +25,7 @@ if [ -z "$OCTOAI_API_KEY" ]; then
   exit 8
 fi
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   local \
   --num-processes "$max_processes" \
diff --git a/test_unstructured_ingest/src/local-embed-vertexai.sh b/test_unstructured_ingest/src/local-embed-vertexai.sh
index b7342fa75..4ef499bc5 100755
--- a/test_unstructured_ingest/src/local-embed-vertexai.sh
+++ b/test_unstructured_ingest/src/local-embed-vertexai.sh
@@ -25,17 +25,17 @@ if [ -z "$GCP_INGEST_SERVICE_KEY" ]; then
   exit 8
 fi
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   local \
   --num-processes "$max_processes" \
-  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
+  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --output-dir "$OUTPUT_DIR" \
   --verbose \
   --reprocess \
   --input-path example-docs/book-war-and-peace-1p.txt \
   --work-dir "$WORK_DIR" \
-  --embedding-provider "langchain-vertexai" \
+  --embedding-provider "vertexai" \
   --embedding-api-key "$GCP_INGEST_SERVICE_KEY" \
   --embedding-model-name "textembedding-gecko@001"
 
diff --git a/test_unstructured_ingest/src/local-embed-voyageai.sh b/test_unstructured_ingest/src/local-embed-voyageai.sh
index 62f5c60d3..c5f3be1fe 100755
--- a/test_unstructured_ingest/src/local-embed-voyageai.sh
+++ b/test_unstructured_ingest/src/local-embed-voyageai.sh
@@ -25,7 +25,7 @@ if [ -z "$VOYAGE_API_KEY" ]; then
   exit 8
 fi
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   local \
   --num-processes "$max_processes" \
@@ -35,7 +35,7 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   --reprocess \
   --input-path example-docs/book-war-and-peace-1p.txt \
   --work-dir "$WORK_DIR" \
-  --embedding-provider "langchain-voyageai" \
+  --embedding-provider "voyageai" \
   --embedding-api-key "$VOYAGE_API_KEY" \
   --embedding-model-name "voyage-large-2"
 
diff --git a/test_unstructured_ingest/src/local-embed.sh b/test_unstructured_ingest/src/local-embed.sh
index 0b8d540e3..210a7111c 100755
--- a/test_unstructured_ingest/src/local-embed.sh
+++ b/test_unstructured_ingest/src/local-embed.sh
@@ -19,17 +19,17 @@ function cleanup() {
 }
 trap cleanup EXIT
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   local \
   --num-processes "$max_processes" \
-  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
+  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --output-dir "$OUTPUT_DIR" \
   --verbose \
   --reprocess \
   --input-path example-docs/book-war-and-peace-1p.txt \
   --work-dir "$WORK_DIR" \
-  --embedding-provider "langchain-huggingface"
+  --embedding-provider "huggingface"
 
 set +e
 
diff --git a/test_unstructured_ingest/src/local-failed-partition.sh b/test_unstructured_ingest/src/local-failed-partition.sh
index dbe4f1c77..a230888b3 100755
--- a/test_unstructured_ingest/src/local-failed-partition.sh
+++ b/test_unstructured_ingest/src/local-failed-partition.sh
@@ -38,7 +38,7 @@ function check() {
   fi
 }
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   local \
   --num-processes "$max_processes" \
diff --git a/test_unstructured_ingest/src/local-single-file-basic-chunking.sh b/test_unstructured_ingest/src/local-single-file-basic-chunking.sh
index 7786e1c63..575bd876f 100755
--- a/test_unstructured_ingest/src/local-single-file-basic-chunking.sh
+++ b/test_unstructured_ingest/src/local-single-file-basic-chunking.sh
@@ -22,8 +22,7 @@ function cleanup() {
 }
 trap cleanup EXIT
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
-
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   local \
   --chunking-strategy basic \
diff --git a/test_unstructured_ingest/src/local-single-file-chunk-no-orig-elements.sh b/test_unstructured_ingest/src/local-single-file-chunk-no-orig-elements.sh
index 452686eeb..051c5fba2 100755
--- a/test_unstructured_ingest/src/local-single-file-chunk-no-orig-elements.sh
+++ b/test_unstructured_ingest/src/local-single-file-chunk-no-orig-elements.sh
@@ -33,14 +33,13 @@ function cleanup() {
 }
 trap cleanup EXIT
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
-
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   local \
   --chunking-strategy by_title \
-  --chunk-no-include-orig-elements \
+  --no-chunk-include-orig-elements \
   --chunk-max-characters 2000 \
-  --chunk-no-multipage-sections \
+  --no-chunk-multipage-sections \
   --input-path "$ABS_INPUT_PATH" \
   --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --num-processes "$max_processes" \
diff --git a/test_unstructured_ingest/src/local-single-file-with-encoding.sh b/test_unstructured_ingest/src/local-single-file-with-encoding.sh
index 016177073..3cf91223e 100755
--- a/test_unstructured_ingest/src/local-single-file-with-encoding.sh
+++ b/test_unstructured_ingest/src/local-single-file-with-encoding.sh
@@ -20,11 +20,11 @@ function cleanup() {
 }
 trap cleanup EXIT
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   local \
   --num-processes "$max_processes" \
-  --metadata-exclude filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
+  --metadata-exclude filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --output-dir "$OUTPUT_DIR" \
   --encoding cp1252 \
   --verbose \
diff --git a/test_unstructured_ingest/src/local-single-file-with-pdf-infer-table-structure.sh b/test_unstructured_ingest/src/local-single-file-with-pdf-infer-table-structure.sh
index 4265d0c4f..4c0ab5b36 100755
--- a/test_unstructured_ingest/src/local-single-file-with-pdf-infer-table-structure.sh
+++ b/test_unstructured_ingest/src/local-single-file-with-pdf-infer-table-structure.sh
@@ -20,11 +20,11 @@ function cleanup() {
 }
 trap cleanup EXIT
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   local \
   --num-processes "$max_processes" \
-  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
+  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --output-dir "$OUTPUT_DIR" \
   --skip-infer-table-types "xls,xlsx" \
   --strategy hi_res \
diff --git a/test_unstructured_ingest/src/local-single-file.sh b/test_unstructured_ingest/src/local-single-file.sh
index 14804f085..249746ed8 100755
--- a/test_unstructured_ingest/src/local-single-file.sh
+++ b/test_unstructured_ingest/src/local-single-file.sh
@@ -22,11 +22,11 @@ function cleanup() {
 }
 trap cleanup EXIT
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   local \
   --num-processes "$max_processes" \
-  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
+  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --output-dir "$OUTPUT_DIR" \
   --additional-partition-args '{"strategy":"ocr_only", "languages":["ind", "est"]}' \
   --verbose \
diff --git a/test_unstructured_ingest/src/local.sh b/test_unstructured_ingest/src/local.sh
index deac065b5..ac725144c 100755
--- a/test_unstructured_ingest/src/local.sh
+++ b/test_unstructured_ingest/src/local.sh
@@ -19,7 +19,7 @@ function cleanup() {
 }
 trap cleanup EXIT
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   local \
   --num-processes "$max_processes" \
diff --git a/test_unstructured_ingest/src/mongodb.sh b/test_unstructured_ingest/src/mongodb.sh
index 553014266..8429d7e1f 100755
--- a/test_unstructured_ingest/src/mongodb.sh
+++ b/test_unstructured_ingest/src/mongodb.sh
@@ -25,9 +25,10 @@ fi
 # astradb dependencies.
 # ref: https://pymongo.readthedocs.io/en/stable/installation.html
 python -m pip uninstall -y bson pymongo
-make install-ingest-mongodb
+pip install "unstructured-ingest[mongodb]"
 
-PYTHONPATH=. ./unstructured/ingest/main.py \
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
+PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   mongodb \
   --metadata-exclude file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.date_created,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --num-processes "$max_processes" \
diff --git a/test_unstructured_ingest/src/notion.sh b/test_unstructured_ingest/src/notion.sh
index 063a9199e..e80a11bfa 100755
--- a/test_unstructured_ingest/src/notion.sh
+++ b/test_unstructured_ingest/src/notion.sh
@@ -29,7 +29,7 @@ if [ -z "$NOTION_API_KEY" ]; then
   exit 8
 fi
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   notion \
   --metadata-exclude coordinates,filename,file_directory,metadata.last_modified,metadata.data_source.date_processed,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
diff --git a/test_unstructured_ingest/src/onedrive.sh b/test_unstructured_ingest/src/onedrive.sh
index 0dfa3263a..d38b7ab80 100755
--- a/test_unstructured_ingest/src/onedrive.sh
+++ b/test_unstructured_ingest/src/onedrive.sh
@@ -29,11 +29,11 @@ if [ -z "$MS_CLIENT_ID" ] || [ -z "$MS_CLIENT_CRED" ] || [ -z "$MS_USER_PNAME" ]
   exit 8
 fi
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   onedrive \
   --download-dir "$DOWNLOAD_DIR" \
-  --metadata-exclude file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
+  --metadata-exclude file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --num-processes "$max_processes" \
   --strategy hi_res \
   --preserve-downloads \
diff --git a/test_unstructured_ingest/src/opensearch.sh b/test_unstructured_ingest/src/opensearch.sh
index 0b0a412a3..f1d7c150e 100755
--- a/test_unstructured_ingest/src/opensearch.sh
+++ b/test_unstructured_ingest/src/opensearch.sh
@@ -35,11 +35,11 @@ trap cleanup EXIT
 scripts/opensearch-test-helpers/source_connector/create-and-check-opensearch.sh
 wait
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   opensearch \
   --download-dir "$DOWNLOAD_DIR" \
-  --metadata-exclude filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
+  --metadata-exclude filename,file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --num-processes "$max_processes" \
   --preserve-downloads \
   --reprocess \
diff --git a/test_unstructured_ingest/src/outlook.sh b/test_unstructured_ingest/src/outlook.sh
index 890037070..a1a5a4878 100755
--- a/test_unstructured_ingest/src/outlook.sh
+++ b/test_unstructured_ingest/src/outlook.sh
@@ -29,7 +29,7 @@ if [ -z "$MS_CLIENT_ID" ] || [ -z "$MS_CLIENT_CRED" ] || [ -z "$MS_TENANT_ID" ]
   exit 8
 fi
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   outlook \
   --download-dir "$DOWNLOAD_DIR" \
diff --git a/test_unstructured_ingest/src/pdf-fast-reprocess.sh b/test_unstructured_ingest/src/pdf-fast-reprocess.sh
index a0dda9375..b27e32e8e 100755
--- a/test_unstructured_ingest/src/pdf-fast-reprocess.sh
+++ b/test_unstructured_ingest/src/pdf-fast-reprocess.sh
@@ -28,10 +28,10 @@ trap cleanup EXIT
 echo "REPROCESS INPUT PATH"
 ls "$INPUT_PATH"
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   local \
-  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
+  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --num-processes "$max_processes" \
   --strategy fast \
   --reprocess \
diff --git a/test_unstructured_ingest/src/s3-compression.sh b/test_unstructured_ingest/src/s3-compression.sh
index 1d1faabee..7ee066f3a 100755
--- a/test_unstructured_ingest/src/s3-compression.sh
+++ b/test_unstructured_ingest/src/s3-compression.sh
@@ -20,7 +20,7 @@ function cleanup() {
 }
 trap cleanup EXIT
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   s3 \
   --num-processes "$max_processes" \
diff --git a/test_unstructured_ingest/src/s3-minio.sh b/test_unstructured_ingest/src/s3-minio.sh
index c6011be05..85dd8f85d 100755
--- a/test_unstructured_ingest/src/s3-minio.sh
+++ b/test_unstructured_ingest/src/s3-minio.sh
@@ -32,13 +32,13 @@ trap cleanup EXIT
 scripts/minio-test-helpers/create-and-check-minio.sh
 wait
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 AWS_SECRET_ACCESS_KEY=$secret_key AWS_ACCESS_KEY_ID=$access_key \
   PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   s3 \
   --num-processes "$max_processes" \
   --download-dir "$DOWNLOAD_DIR" \
-  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.date_modified,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth,metadata.data_source.date_created \
+  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.date_modified,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth,metadata.data_source.date_created \
   --strategy hi_res \
   --preserve-downloads \
   --reprocess \
diff --git a/test_unstructured_ingest/src/s3.sh b/test_unstructured_ingest/src/s3.sh
index 61e0fe13d..bfdc72c1c 100755
--- a/test_unstructured_ingest/src/s3.sh
+++ b/test_unstructured_ingest/src/s3.sh
@@ -23,12 +23,12 @@ trap cleanup EXIT
 
 "$SCRIPT_DIR"/check-num-files-expected-output.sh 3 $OUTPUT_FOLDER_NAME 20k
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   s3 \
   --num-processes "$max_processes" \
   --download-dir "$DOWNLOAD_DIR" \
-  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
+  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --strategy hi_res \
   --preserve-downloads \
   --reprocess \
diff --git a/test_unstructured_ingest/src/salesforce.sh b/test_unstructured_ingest/src/salesforce.sh
index 8ebce46a1..54ebd0555 100755
--- a/test_unstructured_ingest/src/salesforce.sh
+++ b/test_unstructured_ingest/src/salesforce.sh
@@ -43,15 +43,15 @@ if [ -z "$SALESFORCE_PRIVATE_KEY_PATH" ]; then
   echo "$SALESFORCE_PRIVATE_KEY" >"$SALESFORCE_PRIVATE_KEY_PATH"
 fi
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   salesforce \
   --categories "EmailMessage,Campaign" \
   --download-dir "$DOWNLOAD_DIR" \
   --username "$SALESFORCE_USERNAME" \
   --consumer-key "$SALESFORCE_CONSUMER_KEY" \
-  --private-key "$SALESFORCE_PRIVATE_KEY_PATH" \
-  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
+  --private-key-path "$SALESFORCE_PRIVATE_KEY_PATH" \
+  --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
   --num-processes "$max_processes" \
   --preserve-downloads \
   --reprocess \
diff --git a/test_unstructured_ingest/src/sftp.sh b/test_unstructured_ingest/src/sftp.sh
index 3386b3a8c..e3312224d 100755
--- a/test_unstructured_ingest/src/sftp.sh
+++ b/test_unstructured_ingest/src/sftp.sh
@@ -33,12 +33,12 @@ trap cleanup EXIT
 scripts/sftp-test-helpers/create-and-check-sftp.sh
 wait
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   sftp \
   --num-processes "$max_processes" \
   --download-dir "$DOWNLOAD_DIR" \
-  --metadata-exclude file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.data_source.version \
+  --metadata-exclude file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.last_modified,metadata.data_source.version \
   --preserve-downloads \
   --reprocess \
   --output-dir "$OUTPUT_DIR" \
diff --git a/test_unstructured_ingest/src/sharepoint-with-permissions.sh b/test_unstructured_ingest/src/sharepoint-with-permissions.sh
index 1b00bdd96..cc16c1135 100755
--- a/test_unstructured_ingest/src/sharepoint-with-permissions.sh
+++ b/test_unstructured_ingest/src/sharepoint-with-permissions.sh
@@ -39,7 +39,7 @@ fi
 
 # excluding metadata.last_modified since this will always update as date processed because the Sharepoint connector creates documents on the fly
 # excluding metadata.data_source.permissions_data since the api has deprecation warnings. Will want to do a separate test for permissions data
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   sharepoint \
   --download-dir "$DOWNLOAD_DIR" \
diff --git a/test_unstructured_ingest/src/sharepoint.sh b/test_unstructured_ingest/src/sharepoint.sh
index ff5d0dd83..ea07410d2 100755
--- a/test_unstructured_ingest/src/sharepoint.sh
+++ b/test_unstructured_ingest/src/sharepoint.sh
@@ -31,7 +31,7 @@ if [ -z "$SHAREPOINT_CLIENT_ID" ] || [ -z "$SHAREPOINT_CRED" ]; then
 fi
 
 # excluding metadata.last_modified since this will always update as date processed because the Sharepoint connector creates documents on the fly
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   sharepoint \
   --download-dir "$DOWNLOAD_DIR" \
diff --git a/test_unstructured_ingest/src/slack.sh b/test_unstructured_ingest/src/slack.sh
index 6e76e0f34..503e67240 100755
--- a/test_unstructured_ingest/src/slack.sh
+++ b/test_unstructured_ingest/src/slack.sh
@@ -29,7 +29,7 @@ if [ -z "$SLACK_TOKEN" ]; then
   exit 8
 fi
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   slack \
   --num-processes "$max_processes" \
diff --git a/test_unstructured_ingest/src/wikipedia.sh b/test_unstructured_ingest/src/wikipedia.sh
index 24f8c0855..21a55e572 100755
--- a/test_unstructured_ingest/src/wikipedia.sh
+++ b/test_unstructured_ingest/src/wikipedia.sh
@@ -24,7 +24,7 @@ function cleanup() {
 }
 trap cleanup EXIT
 
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
+RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest}
 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
   wikipedia \
   --download-dir "$DOWNLOAD_DIR" \
diff --git a/test_unstructured_ingest/test-help.sh b/test_unstructured_ingest/test-help.sh
deleted file mode 100755
index 9ec8a9824..000000000
--- a/test_unstructured_ingest/test-help.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env bash
-
-set -u -o pipefail -e
-
-RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
-sources=$(PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" --help | sed -e '1,/Commands/ d' | awk '{NF=1}1')
-echo "Checking all source: $sources"
-for src in $sources; do
-  destinations=$(PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" "$src" --help | sed -e '1,/Destinations/ d' | awk '{NF=1}1')
-  for dest in $destinations; do
-    echo "Checking $src -> $dest"
-    PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" "$src" "$dest" --help
-  done
-done
diff --git a/test_unstructured_ingest/test-ingest-src.sh b/test_unstructured_ingest/test-ingest-src.sh
index 1ebb3dc58..8634b330f 100755
--- a/test_unstructured_ingest/test-ingest-src.sh
+++ b/test_unstructured_ingest/test-ingest-src.sh
@@ -44,7 +44,8 @@ all_tests=(
   'elasticsearch.sh'
   'confluence-diff.sh'
   'confluence-large.sh'
-  'airtable-diff.sh'
+  # NOTE(christine): This test is disabled because it is triggering 404 client errors to the API
+  # 'airtable-diff.sh'
   # # NOTE(ryan): This test is disabled because it is triggering too many requests to the API
   # 'airtable-large.sh'
   'local-single-file.sh'
diff --git a/test_unstructured_ingest/unit/cli/test_cli.py b/test_unstructured_ingest/unit/cli/test_cli.py
deleted file mode 100644
index b0fcf50cc..000000000
--- a/test_unstructured_ingest/unit/cli/test_cli.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import click
-import pytest
-
-from unstructured.ingest.cli.interfaces import CliMixin
-
-
-def test_add_params():
-    @click.command()
-    def sample_cmd():
-        pass
-
-    options = [
-        click.Option(["--opt1"]),
-        click.Option(["--opt1"]),
-    ]
-    cmd = sample_cmd
-    with pytest.raises(ValueError):
-        CliMixin.add_params(cmd=cmd, params=options)
diff --git a/test_unstructured_ingest/unit/connector/fsspec/test_connector_gcs.py b/test_unstructured_ingest/unit/connector/fsspec/test_connector_gcs.py
deleted file mode 100644
index 60a14e987..000000000
--- a/test_unstructured_ingest/unit/connector/fsspec/test_connector_gcs.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from unittest.mock import MagicMock
-
-import pytest
-
-from unstructured.ingest.connector.fsspec.gcs import GcsAccessConfig
-
-
-@pytest.mark.parametrize(
-    ("given_access_token", "then_access_token"),
-    [
-        (None, None),
-        ("/tmp/gcs.key", "/tmp/gcs.key"),
-        ("google_default", "google_default"),
-        ("cache", "cache"),
-        ("anon", "anon"),
-        ("browser", "browser"),
-        ("cloud", "cloud"),
-        ("{'some_key': 'some_value'}", {"some_key": "some_value"}),
-    ],
-)
-def test_validate_access_token(mocker, given_access_token, then_access_token):
-    mocked_isfile: MagicMock = mocker.patch("pathlib.Path.is_file")
-    mocked_isfile.return_value = True
-
-    when_token = GcsAccessConfig(token=given_access_token).token
-    assert when_token == then_access_token
-
-
-def test_fail_validate_access_token(mocker):
-    mocked_isfile: MagicMock = mocker.patch("pathlib.Path.is_file")
-    mocked_isfile.return_value = False
-
-    given_access_token = "/tmp/gcs.key"
-    with pytest.raises(ValueError):
-        GcsAccessConfig(token=given_access_token)
diff --git a/test_unstructured_ingest/unit/connector/fsspec/test_fsspec.py b/test_unstructured_ingest/unit/connector/fsspec/test_fsspec.py
deleted file mode 100644
index edbe543dc..000000000
--- a/test_unstructured_ingest/unit/connector/fsspec/test_fsspec.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from unittest.mock import MagicMock, patch
-
-from fsspec import AbstractFileSystem
-
-from unstructured.ingest.connector.fsspec.fsspec import FsspecIngestDoc, SimpleFsspecConfig
-from unstructured.ingest.interfaces import ProcessorConfig, ReadConfig
-
-
-@patch("fsspec.get_filesystem_class")
-def test_version_is_string(mock_get_filesystem_class):
-    """
-    Test that the version is a string even when the filesystem checksum is an integer.
-    """
-    mock_fs = MagicMock(spec=AbstractFileSystem)
-    mock_fs.checksum.return_value = 1234567890
-    mock_fs.info.return_value = {"etag": ""}
-    mock_get_filesystem_class.return_value = lambda **kwargs: mock_fs
-    config = SimpleFsspecConfig("s3://my-bucket", access_config={})
-    doc = FsspecIngestDoc(
-        processor_config=ProcessorConfig(),
-        read_config=ReadConfig(),
-        connector_config=config,
-        remote_file_path="test.txt",
-    )
-    assert isinstance(doc.source_metadata.version, str)
diff --git a/test_unstructured_ingest/unit/connector/fsspec/test_paths.py b/test_unstructured_ingest/unit/connector/fsspec/test_paths.py
deleted file mode 100644
index de3648914..000000000
--- a/test_unstructured_ingest/unit/connector/fsspec/test_paths.py
+++ /dev/null
@@ -1,223 +0,0 @@
-from dataclasses import dataclass
-from pathlib import Path
-
-import pytest
-
-from unstructured.ingest.connector.fsspec.dropbox import (
-    DropboxIngestDoc,
-)
-from unstructured.ingest.connector.fsspec.fsspec import (
-    FsspecIngestDoc,
-)
-from unstructured.ingest.connector.fsspec.sftp import SftpAccessConfig, SimpleSftpConfig
-from unstructured.ingest.interfaces import (
-    FsspecConfig,
-)
-
-
-@dataclass
-class FakeConfigDropboxRoot:
-    output_dir = "/fakeuser/fake_output"
-    dir_path = " "
-    download_dir = "/fakeuser/fake_download"
-    path_without_protocol = " "
-
-
-@dataclass
-class FakeConfigFolder:
-    output_dir = "/fakeuser/fake_output"
-    dir_path = "fake_folder"
-    download_dir = "/fakeuser/fake_download"
-    path_without_protocol = "fake_folder"
-
-
-def test_dropbox_root_succeeds():
-    """
-    Test that path joining method works for Dropbox root folder.
-    Note slash in front of remote_file_path.
-    """
-    dbox = DropboxIngestDoc(
-        connector_config=FakeConfigDropboxRoot,
-        read_config=FakeConfigDropboxRoot,
-        processor_config=FakeConfigDropboxRoot,
-        remote_file_path="/fake_file.txt",
-    )
-    output_filename = dbox._output_filename
-    download_filename = dbox._tmp_download_file()
-
-    assert output_filename == Path("/fakeuser/fake_output/fake_file.txt.json")
-    assert download_filename == Path("/fakeuser/fake_download/fake_file.txt")
-
-
-def test_dropbox_root_succeeds2():
-    """
-    Test that path joining method works for Dropbox root folder.
-    Note lack of slash in front of remote_file_path. This still works.
-    """
-    dbox = DropboxIngestDoc(
-        connector_config=FakeConfigDropboxRoot,
-        read_config=FakeConfigDropboxRoot,
-        processor_config=FakeConfigDropboxRoot,
-        remote_file_path="fake_file.txt",
-    )
-    output_filename = dbox._output_filename
-    download_filename = dbox._tmp_download_file()
-
-    assert output_filename == Path("/fakeuser/fake_output/fake_file.txt.json")
-    assert download_filename == Path("/fakeuser/fake_download/fake_file.txt")
-
-
-def test_dropbox_folder_succeeds():
-    """
-    Test that path joining method works for Dropbox root folder.
-    Note no slash in front of remote_file_path.
-    """
-    dbox = DropboxIngestDoc(
-        connector_config=FakeConfigFolder,
-        read_config=FakeConfigFolder,
-        processor_config=FakeConfigFolder,
-        remote_file_path="fake_file2.txt",
-    )
-    output_filename = dbox._output_filename
-    download_filename = dbox._tmp_download_file()
-
-    assert output_filename == Path("/fakeuser/fake_output/fake_file2.txt.json")
-    assert download_filename == Path("/fakeuser/fake_download/fake_file2.txt")
-
-
-def test_dropbox_folder_fails():
-    """Test that path joining method gives WRONG path. Note slash in front of remote_file_path.
-    Path joining is sensitive. Note that the path is MISSING the folders."""
-    dbox = DropboxIngestDoc(
-        connector_config=FakeConfigFolder,
-        read_config=FakeConfigFolder,
-        processor_config=FakeConfigFolder,
-        remote_file_path="/fake_file2.txt",
-    )
-    output_filename = dbox._output_filename
-    download_filename = dbox._tmp_download_file()
-
-    assert output_filename == Path("/fake_file2.txt.json")
-    assert download_filename == Path("/fake_file2.txt")
-
-
-def test_fsspec_folder_succeeds():
-    """
-    Test that path joining method works for root folder.
-    Note no slash in front of remote_file_path.
-    """
-    dbox = FsspecIngestDoc(
-        connector_config=FakeConfigFolder,
-        read_config=FakeConfigFolder,
-        processor_config=FakeConfigFolder,
-        remote_file_path="fake_file2.txt",
-    )
-    output_filename = dbox._output_filename
-    download_filename = dbox._tmp_download_file()
-
-    assert output_filename == Path("/fakeuser/fake_output/fake_file2.txt.json")
-    assert download_filename == Path("/fakeuser/fake_download/fake_file2.txt")
-
-
-def test_fsspec_folder_fails():
-    """Test that path joining method gives WRONG path. Note slash in front of remote_file_path.
-    Path joining is sensitive. Note that the path is MISSING the folders."""
-    fstest = FsspecIngestDoc(
-        connector_config=FakeConfigFolder,
-        read_config=FakeConfigFolder,
-        processor_config=FakeConfigFolder,
-        remote_file_path="/fake_file2.txt",
-    )
-    output_filename = fstest._output_filename
-    download_filename = fstest._tmp_download_file()
-
-    assert output_filename == Path("/fake_file2.txt.json")
-    assert download_filename == Path("/fake_file2.txt")
-
-
-def test_post_init_invalid_protocol():
-    """Validate that an invalid protocol raises a ValueError"""
-    with pytest.raises(ValueError):
-        FsspecConfig(remote_url="ftp://example.com/path/to/file.txt")
-
-
-def test_fsspec_path_extraction_dropbox_root():
-    """Validate that the path extraction works for dropbox root"""
-    config = FsspecConfig(remote_url="dropbox:// /")
-    assert config.protocol == "dropbox"
-    assert config.path_without_protocol == " /"
-    assert config.dir_path == " "
-    assert config.file_path == ""
-
-
-def test_fsspec_path_extraction_dropbox_subfolder():
-    """Validate that the path extraction works for dropbox subfolder"""
-    config = FsspecConfig(remote_url="dropbox://path")
-    assert config.protocol == "dropbox"
-    assert config.path_without_protocol == "path"
-    assert config.dir_path == "path"
-    assert config.file_path == ""
-
-
-def test_fsspec_path_extraction_s3_bucket_only():
-    """Validate that the path extraction works for s3 bucket without filename"""
-    config = FsspecConfig(remote_url="s3://bucket-name")
-    assert config.protocol == "s3"
-    assert config.path_without_protocol == "bucket-name"
-    assert config.dir_path == "bucket-name"
-    assert config.file_path == ""
-
-
-def test_fsspec_path_extraction_s3_valid_path():
-    """Validate that the path extraction works for s3 bucket with filename"""
-    config = FsspecConfig(remote_url="s3://bucket-name/path/to/file.txt")
-    assert config.protocol == "s3"
-    assert config.path_without_protocol == "bucket-name/path/to/file.txt"
-    assert config.dir_path == "bucket-name"
-    assert config.file_path == "path/to/file.txt"
-
-
-def test_fsspec_path_extraction_s3_invalid_path():
-    """Validate that an invalid s3 path (that mimics triple slash for dropbox)
-    raises a ValueError"""
-    with pytest.raises(ValueError):
-        FsspecConfig(remote_url="s3:///bucket-name/path/to")
-
-
-def test_sftp_path_extraction_post_init_with_extension():
-    """Validate that the path extraction works for sftp with file extension"""
-    config = SimpleSftpConfig(
-        remote_url="sftp://example.com/path/to/file.txt",
-        access_config=SftpAccessConfig(username="username", password="password", host="", port=22),
-    )
-    assert config.file_path == "file.txt"
-    assert config.dir_path == "path/to"
-    assert config.path_without_protocol == "path/to"
-    assert config.access_config.host == "example.com"
-    assert config.access_config.port == 22
-
-
-def test_sftp_path_extraction_without_extension():
-    """Validate that the path extraction works for sftp without extension"""
-    config = SimpleSftpConfig(
-        remote_url="sftp://example.com/path/to/directory",
-        access_config=SftpAccessConfig(username="username", password="password", host="", port=22),
-    )
-    assert config.file_path == ""
-    assert config.dir_path == "path/to/directory"
-    assert config.path_without_protocol == "path/to/directory"
-    assert config.access_config.host == "example.com"
-    assert config.access_config.port == 22
-
-
-def test_sftp_path_extraction_with_port():
-    """Validate that the path extraction works for sftp with a non-default port"""
-    config = SimpleSftpConfig(
-        remote_url="sftp://example.com:47474/path/to/file.txt",
-        access_config=SftpAccessConfig(username="username", password="password", host="", port=22),
-    )
-    assert config.file_path == "file.txt"
-    assert config.dir_path == "path/to"
-    assert config.path_without_protocol == "path/to"
-    assert config.access_config.host == "example.com"
-    assert config.access_config.port == 47474
diff --git a/test_unstructured_ingest/unit/connector/test_connector_git.py b/test_unstructured_ingest/unit/connector/test_connector_git.py
deleted file mode 100644
index 88760df16..000000000
--- a/test_unstructured_ingest/unit/connector/test_connector_git.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from pathlib import Path
-
-import pytest
-
-from unstructured.ingest.connector.git import GitAccessConfig, GitSourceConnector, SimpleGitConfig
-
-
-@pytest.mark.parametrize(
-    ("given_file_path", "then_is_supported"),
-    [
-        (Path("src/submodule/document.md"), True),
-        (Path("src/submodule/document.txt"), True),
-        (Path("src/submodule/document.pdf"), True),
-        (Path("src/submodule/document.doc"), True),
-        (Path("src/submodule/document.docx"), True),
-        (Path("src/submodule/document.eml"), True),
-        (Path("src/submodule/document.html"), True),
-        (Path("src/submodule/document.png"), True),
-        (Path("src/submodule/document.jpg"), True),
-        (Path("src/submodule/document.ppt"), True),
-        (Path("src/submodule/document.pptx"), True),
-        (Path("src/submodule/document.xml"), True),
-        (Path("src/submodule/code.py"), False),
-        (Path("src/submodule/Dockerfile"), False),
-        (Path("src/submodule/Makefile"), False),
-        (Path("src/submodule/LICENSE"), False),
-    ],
-)
-def test_connector_supports_file(given_file_path, then_is_supported):
-    when_is_supported = GitSourceConnector.is_file_type_supported(str(given_file_path))
-
-    assert when_is_supported == then_is_supported
-
-
-class FakeGitSourceConnectorImpl(GitSourceConnector):
-    def get_ingest_docs(self):
-        pass
-
-
-@pytest.mark.parametrize(
-    ("given_file_path", "given_file_glob", "then_matches_glob"),
-    [
-        (Path("LICENSE"), None, True),
-        (Path("Makefile"), ["Makefile"], True),
-        (Path("src/my/super/module/main.py"), ["**/*.py"], True),
-        (Path("src/my/super/module/main.pyc"), ["**/*.py"], False),
-    ],
-)
-def test_connector_does_path_match_glob(given_file_path, given_file_glob, then_matches_glob):
-    connector_config = SimpleGitConfig(
-        url="some_fake_url",
-        access_config=GitAccessConfig(access_token="some_fake_token"),
-        file_glob=given_file_glob,
-    )
-    connector = FakeGitSourceConnectorImpl(
-        processor_config=None, read_config=None, connector_config=connector_config
-    )
-
-    when_matches_glob = connector.does_path_match_glob(str(given_file_path))
-
-    assert when_matches_glob == then_matches_glob
diff --git a/test_unstructured_ingest/unit/connector/test_salesforce_connector.py b/test_unstructured_ingest/unit/connector/test_salesforce_connector.py
deleted file mode 100644
index 29643ec2b..000000000
--- a/test_unstructured_ingest/unit/connector/test_salesforce_connector.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from pathlib import Path
-from unittest.mock import MagicMock
-
-import pytest
-from cryptography.hazmat.primitives import serialization
-from cryptography.hazmat.primitives.asymmetric import dsa, ec, rsa
-
-from unstructured.ingest.connector.salesforce import SalesforceAccessConfig
-
-
-def pkey_to_str(key) -> str:
-    return key.private_bytes(
-        encoding=serialization.Encoding.PEM,
-        format=serialization.PrivateFormat.PKCS8,
-        encryption_algorithm=serialization.NoEncryption(),
-    ).decode("utf-8")
-
-
-def rsa_private_key() -> str:
-    return pkey_to_str(rsa.generate_private_key(0x10001, 2048))
-
-
-def brainpoolp512r1_private_key() -> str:
-    return pkey_to_str(ec.generate_private_key(ec.BrainpoolP512R1))
-
-
-def dsa_private_key() -> str:
-    return pkey_to_str(dsa.generate_private_key(1024))
-
-
-@pytest.mark.parametrize(
-    ("private_key", "private_key_type"),
-    [
-        (rsa_private_key(), str),
-        (brainpoolp512r1_private_key(), str),
-        (dsa_private_key(), str),
-        ("some_path/priv.key", Path),
-    ],
-)
-def test_private_key_type(mocker, private_key, private_key_type):
-    mocked_isfile: MagicMock = mocker.patch("pathlib.Path.is_file")
-    mocked_isfile.return_value = True
-
-    config = SalesforceAccessConfig(consumer_key="asdf", private_key=private_key)
-    actual_pkey_value, actual_pkey_type = config.get_private_key_value_and_type()
-    assert actual_pkey_type == private_key_type
-    assert actual_pkey_value == private_key
-
-
-def test_private_key_type_fail(mocker):
-    mocked_isfile: MagicMock = mocker.patch("pathlib.Path.is_file")
-    mocked_isfile.return_value = False
-
-    given_nonexistent_path = "some_path/priv.key"
-    with pytest.raises(expected_exception=ValueError):
-        config = SalesforceAccessConfig(consumer_key="asdf", private_key=given_nonexistent_path)
-        config.get_private_key_value_and_type()
diff --git a/test_unstructured_ingest/unit/connector/test_serialization.py b/test_unstructured_ingest/unit/connector/test_serialization.py
deleted file mode 100644
index f7043e996..000000000
--- a/test_unstructured_ingest/unit/connector/test_serialization.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from unstructured.ingest.connector.local import LocalIngestDoc, SimpleLocalConfig
-from unstructured.ingest.connector.registry import (
-    create_ingest_doc_from_dict,
-    create_ingest_doc_from_json,
-)
-from unstructured.ingest.interfaces import ProcessorConfig, ReadConfig
-
-doc = LocalIngestDoc(
-    path="test_unstructured_ingest/example-docs/layout-parser-paper.pdf",
-    connector_config=SimpleLocalConfig(input_path="test_unstructured_ingest/example-docs/"),
-    processor_config=ProcessorConfig(),
-    read_config=ReadConfig(),
-)
-doc.update_source_metadata()
-serialized_json = doc.to_json()
-serialized_dict = doc.to_dict()
-
-
-def test_manual_deserialization():
-    deserialized_doc = LocalIngestDoc.from_json(serialized_json)
-    assert doc == deserialized_doc
-
-
-def test_registry_from_json():
-    deserialized_doc = create_ingest_doc_from_json(serialized_json)
-    assert doc == deserialized_doc
-
-
-def test_registry_from_dict():
-    deserialized_doc = create_ingest_doc_from_dict(serialized_dict)
-    assert doc == deserialized_doc
-
-
-def test_source_metadata_serialization():
-    doc = LocalIngestDoc(
-        path="test_unstructured_ingest/example-docs/layout-parser-paper.pdf",
-        connector_config=SimpleLocalConfig(input_path="test_unstructured_ingest/example-docs/"),
-        processor_config=ProcessorConfig(),
-        read_config=ReadConfig(),
-    )
-    serialized_json = doc.to_dict()
-    assert not serialized_json["_source_metadata"]
-
-    doc.update_source_metadata()
-    serialized_json_w_meta = doc.to_dict()
-    assert serialized_json_w_meta["_source_metadata"]
diff --git a/test_unstructured_ingest/unit/connector/test_sharepoint.py b/test_unstructured_ingest/unit/connector/test_sharepoint.py
deleted file mode 100644
index c48747fb9..000000000
--- a/test_unstructured_ingest/unit/connector/test_sharepoint.py
+++ /dev/null
@@ -1,59 +0,0 @@
-from datetime import datetime
-from unittest.mock import MagicMock
-
-import pytest
-
-from unstructured.ingest.connector.sharepoint import SharepointIngestDoc
-from unstructured.ingest.interfaces import ProcessorConfig, ReadConfig
-
-
-@pytest.mark.parametrize(
-    ("time_created", "time_last_modified", "expected_created", "expected_modified"),
-    [
-        (
-            "2023-06-16T05:05:05+00:00",
-            datetime(2023, 6, 16, 5, 5, 5),
-            "2023-06-16T05:05:05+00:00",
-            "2023-06-16T05:05:05",
-        ),
-        ("2023-06-16 05:05:05", "2023-06-16", "2023-06-16T05:05:05", "2023-06-16T00:00:00"),
-        # Add more pairs of input strings and their expected ISO format results here
-    ],
-)
-def test_datetime_handling_in_update_source_metadata(
-    mocker, time_created, time_last_modified, expected_created, expected_modified
-):
-    """Test the handling of various datetime formats in update_source_metadata."""
-    # Create a mock SharePoint response directly in the test
-    mock_sharepoint_response = mocker.MagicMock()
-    mock_sharepoint_response.time_created = time_created
-    mock_sharepoint_response.time_last_modified = time_last_modified
-
-    # Patch the SharePoint interaction methods to use the mock response
-    mocker.patch(
-        "unstructured.ingest.connector.sharepoint.SharepointIngestDoc._fetch_file",
-        return_value=mock_sharepoint_response,
-    )
-    mocker.patch(
-        "unstructured.ingest.connector.sharepoint.SharepointIngestDoc._fetch_page",
-        return_value=None,
-    )
-
-    # Instantiate your document with dummy data
-    ingest_doc = SharepointIngestDoc(
-        connector_config=MagicMock(),
-        site_url="dummy_url",
-        server_path="dummy_path",
-        is_page=False,
-        file_path="dummy_path.html",
-        processor_config=ProcessorConfig(),
-        read_config=ReadConfig(),
-    )
-
-    # Execute the method under test
-    ingest_doc.update_source_metadata()
-
-    # Assertions to verify the datetime handling against expected results
-    assert ingest_doc.source_metadata is not None
-    assert ingest_doc.source_metadata.date_created.startswith(expected_created)
-    assert ingest_doc.source_metadata.date_modified.startswith(expected_modified)
diff --git a/test_unstructured_ingest/unit/connector/test_sql_conform_dict.py b/test_unstructured_ingest/unit/connector/test_sql_conform_dict.py
deleted file mode 100644
index 45a8a44ef..000000000
--- a/test_unstructured_ingest/unit/connector/test_sql_conform_dict.py
+++ /dev/null
@@ -1,169 +0,0 @@
-import datetime
-from unittest.mock import Mock, patch
-
-from unstructured.ingest.connector.sql import SqlDestinationConnector
-
-TEST_DATA_1 = {
-    "element_id": "80803034fe04181c163306740700cc54",
-    "metadata": {
-        "coordinates": {
-            "layout_height": 792,
-            "layout_width": 612,
-            "points": [
-                [72.0, 72.69200000000001],
-                [72.0, 83.69200000000001],
-                [135.8, 83.69200000000001],
-                [135.8, 72.69200000000001],
-            ],
-            "system": "PixelSpace",
-        },
-        "data_source": {
-            "date_created": "2023-10-25 10:05:44.976775",
-            "date_modified": "2023-10-25 10:05:44.976775",
-            "date_processed": "2023-12-14T17:06:33.074057",
-            "permissions_data": [{"mode": 33188}],
-            "url": "example-docs/pdf/fake-memo.pdf",
-        },
-        "file_directory": "example-docs",
-        "filename": "fake-memo.pdf",
-        "filetype": "application/pdf",
-        "languages": ["eng"],
-        "last_modified": "2023-10-25T10:05:44",
-        "page_number": 1,
-    },
-    "text": "May 5, 2023",
-    "type": "UncategorizedText",
-    "embeddings": [
-        -0.05623878538608551,
-        0.008579030632972717,
-        0.03698136284947395,
-        -0.01745658740401268,
-        -0.030465232208371162,
-        0.00996527448296547,
-    ],
-}
-
-TEST_DATA_2 = {
-    "metadata": {
-        "coordinates": {"points": [1, 2, 3]},
-        "links": {"link1": "https://example.com", "link2": "https://example.org"},
-        "data_source": {
-            "date_created": "2021-01-01T00:00:00",
-            "date_modified": "2021-01-02T00:00:00",
-            "date_processed": "2022-12-13T15:44:08",
-            "version": 1.1,
-        },
-        "last_modified": "2021-01-03T00:00:00",
-        "page_number": 10,
-    },
-    "embeddings": [0.1, 0.2, 0.3],
-}
-
-TEST_DATA_3 = {
-    "metadata": {
-        "coordinates": {"points": [1, 2, 3]},
-        "data_source": {
-            "date_created": "2021-01-01T00:00:00",
-            "date_modified": "2021-01-02T00:00:00",
-            "date_processed": "2022-12-13T15:44:08",
-            "version": 1.1,
-        },
-        "last_modified": "2021-01-03T00:00:00",
-        "page_number": 10,
-        "link_texts": ["Skip to main content"],
-        "link_urls": ["#main-content"],
-    },
-    "embeddings": [0.1, 0.2, 0.3],
-}
-
-
-def test_conform_dict_1():
-    """Validate that the conform_dict method returns the expected output for a real example"""
-    # Create a mock instance of the connector class
-    connector = SqlDestinationConnector(write_config=Mock(), connector_config=Mock())
-
-    # Mock the uuid.uuid4 function to return a fixed value
-    with patch("uuid.uuid4", return_value="mocked_uuid"):
-        # Call the conform_dict method
-        data_out = TEST_DATA_1.copy()
-        connector.conform_dict(data_out)
-
-    # Assert that the result matches the expected output
-    assert data_out == {
-        "element_id": "80803034fe04181c163306740700cc54",
-        "text": "May 5, 2023",
-        "type": "UncategorizedText",
-        "id": "mocked_uuid",
-        "file_directory": "example-docs",
-        "filename": "fake-memo.pdf",
-        "filetype": "application/pdf",
-        "languages": ["eng"],
-        "last_modified": datetime.datetime(2023, 10, 25, 10, 5, 44),
-        "page_number": "1",
-        "date_created": datetime.datetime(2023, 10, 25, 10, 5, 44, 976775),
-        "date_modified": datetime.datetime(2023, 10, 25, 10, 5, 44, 976775),
-        "date_processed": datetime.datetime(2023, 12, 14, 17, 6, 33, 74057),
-        "permissions_data": '[{"mode": 33188}]',
-        "url": "example-docs/pdf/fake-memo.pdf",
-        "layout_height": 792,
-        "layout_width": 612,
-        "points": "[[72.0, 72.69200000000001], [72.0, 83.69200000000001],"
-        " [135.8, 83.69200000000001], [135.8, 72.69200000000001]]",
-        "system": "PixelSpace",
-        "embeddings": "[-0.05623878538608551, 0.008579030632972717, "
-        "0.03698136284947395, -0.01745658740401268, "
-        "-0.030465232208371162, 0.00996527448296547]",
-    }
-
-
-def test_conform_dict_2():
-    """Validate that the conform_dict method returns the expected output for a simplified example"""
-    # Create a mock instance of the connector class
-    connector = SqlDestinationConnector(write_config=Mock(), connector_config=Mock())
-
-    # Mock the uuid.uuid4 function to return a fixed value
-    with patch("uuid.uuid4", return_value="mocked_uuid"):
-        # Call the conform_dict method
-        data_out = TEST_DATA_2.copy()
-        connector.conform_dict(data_out)
-
-    # Assert that the result matches the expected output
-    assert data_out == {
-        "embeddings": "[0.1, 0.2, 0.3]",
-        "id": "mocked_uuid",
-        "links": '{"link1": "https://example.com", "link2": "https://example.org"}',
-        "last_modified": datetime.datetime(2021, 1, 3, 0, 0),
-        "page_number": "10",
-        "date_created": datetime.datetime(2021, 1, 1, 0, 0),
-        "date_modified": datetime.datetime(2021, 1, 2, 0, 0),
-        "date_processed": datetime.datetime(2022, 12, 13, 15, 44, 8),
-        "version": "1.1",
-        "points": "[1, 2, 3]",
-    }
-
-
-def test_conform_dict_link_texts():
-    """Validate that the conform_dict method returns the expected output link_texts"""
-    # Create a mock instance of the connector class
-    connector = SqlDestinationConnector(write_config=Mock(), connector_config=Mock())
-
-    # Mock the uuid.uuid4 function to return a fixed value
-    with patch("uuid.uuid4", return_value="mocked_uuid"):
-        # Call the conform_dict method
-        data_out = TEST_DATA_3.copy()
-        connector.conform_dict(data_out)
-
-    # Assert that the result matches the expected output
-    assert data_out == {
-        "embeddings": "[0.1, 0.2, 0.3]",
-        "id": "mocked_uuid",
-        "last_modified": datetime.datetime(2021, 1, 3, 0, 0),
-        "link_texts": ["Skip to main content"],
-        "link_urls": ["#main-content"],
-        "page_number": "10",
-        "date_created": datetime.datetime(2021, 1, 1, 0, 0),
-        "date_modified": datetime.datetime(2021, 1, 2, 0, 0),
-        "date_processed": datetime.datetime(2022, 12, 13, 15, 44, 8),
-        "version": "1.1",
-        "points": "[1, 2, 3]",
-    }
diff --git a/test_unstructured_ingest/unit/enhanced_dataclass/test_enhanced_dataclass.py b/test_unstructured_ingest/unit/enhanced_dataclass/test_enhanced_dataclass.py
deleted file mode 100644
index 7e1727d1e..000000000
--- a/test_unstructured_ingest/unit/enhanced_dataclass/test_enhanced_dataclass.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import json
-from dataclasses import Field, dataclass, fields
-
-import pytest
-
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field
-from unstructured.ingest.enhanced_dataclass.dataclasses import EnhancedField
-
-
-@dataclass
-class AuthData(EnhancedDataClassJsonMixin):
-    username: str
-    password: str = enhanced_field(sensitive=True)
-    date: int = enhanced_field(overload_name="time")
-
-
-auth = AuthData(username="my name", password="top secret", date=3)
-
-
-def test_enhanced_field():
-    fs = fields(AuthData)
-    for f in fs:
-        if f.name == "username":
-            assert isinstance(f, Field)
-            assert hasattr(f, "sensitive") is False
-        else:
-            assert isinstance(f, EnhancedField)
-            if f.name == "password":
-                assert f.sensitive is True
-            else:
-                assert not f.sensitive
-
-
-@pytest.mark.parametrize(
-    ("apply_name_overload", "expected_dict"),
-    [
-        (True, {"username": "my name", "password": "THIS IS REDACTED", "time": 3}),
-        (False, {"username": "my name", "password": "THIS IS REDACTED", "date": 3}),
-    ],
-)
-def test_to_json(apply_name_overload: bool, expected_dict: dict):
-    j = auth.to_json(
-        redact_sensitive=True,
-        redacted_text="THIS IS REDACTED",
-        apply_name_overload=apply_name_overload,
-    )
-    expected = json.dumps(expected_dict)
-    assert j == expected
-
-
-@pytest.mark.parametrize(
-    ("apply_name_overload", "expected_dict"),
-    [
-        (True, {"username": "my name", "password": "***REDACTED***", "time": 3}),
-        (False, {"username": "my name", "password": "***REDACTED***", "date": 3}),
-    ],
-)
-def test_to_dict(apply_name_overload: bool, expected_dict: dict):
-    d = auth.to_dict(redact_sensitive=True, apply_name_overload=apply_name_overload)
-    assert d == expected_dict
diff --git a/test_unstructured_ingest/unit/pipeline/reformat/test_chunking.py b/test_unstructured_ingest/unit/pipeline/reformat/test_chunking.py
deleted file mode 100644
index 433ee810d..000000000
--- a/test_unstructured_ingest/unit/pipeline/reformat/test_chunking.py
+++ /dev/null
@@ -1,156 +0,0 @@
-from __future__ import annotations
-
-import json
-import logging
-import os
-
-import pytest
-from _pytest.logging import LogCaptureFixture
-
-from test_unstructured.unit_utils import (
-    FixtureRequest,
-    Mock,
-    example_doc_path,
-    function_mock,
-    method_mock,
-)
-from unstructured.documents.elements import CompositeElement
-from unstructured.ingest.interfaces import ChunkingConfig, PartitionConfig
-from unstructured.ingest.pipeline.interfaces import PipelineContext
-from unstructured.ingest.pipeline.reformat.chunking import Chunker
-
-ELEMENTS_JSON_FILE = example_doc_path(
-    "test_evaluate_files/unstructured_output/Bank Good Credit Loan.pptx.json"
-)
-
-
-class DescribeChunker:
-    """Unit tests for ingest.pipeline.reformat.chunking.Chunker"""
-
-    # -- Chunker.run() -----------------------------------------------------------------------------
-
-    # -- integration test --
-    def it_creates_JSON_elements(self, _ingest_docs_map_: Mock, tmpdir: str):
-        chunker = Chunker(
-            chunking_config=ChunkingConfig(chunking_strategy="by_title"),
-            pipeline_context=PipelineContext(work_dir=tmpdir),
-            partition_config=PartitionConfig(),
-        )
-        # -- `Chunker.chunk()` defaults to writing to "{work_dir}/chunked", which is located in
-        # -- "/.cache" of a user's profile.
-        # -- Define `work_dir` add the "/chunked" subdirectory to it:
-        os.makedirs(os.path.join(tmpdir, "chunked"), exist_ok=True)
-
-        filename = chunker.run(ELEMENTS_JSON_FILE) or ""
-
-        head, tail = os.path.split(filename if filename else "")
-        # -- Check that a json file was created in `/chunked` --
-        assert head.endswith("chunked")
-        assert tail.endswith(".json")
-        # -- Check contents of file --
-        with open(filename) as json_f:
-            json_data = json.load(json_f)
-        assert all(d.get("type") == "CompositeElement" for d in json_data)
-        assert len(json_data) == 5
-
-    def it_returns_None_and_logs_message_without_chunking_strategy(
-        self, _ingest_docs_map_: Mock, caplog: LogCaptureFixture
-    ):
-        chunker = Chunker(
-            chunking_config=ChunkingConfig(),
-            pipeline_context=PipelineContext(),
-            partition_config=PartitionConfig(),
-        )
-        caplog.set_level(logging.INFO)
-
-        assert chunker.run(ELEMENTS_JSON_FILE) is None
-        assert "chunking_strategy is None, skipping chunking for" in caplog.text
-
-    def it_logs_error_on_invalid_remote_chunking_strategy(
-        self, _ingest_docs_map_: Mock, caplog: LogCaptureFixture
-    ):
-        chunker = Chunker(
-            chunking_config=ChunkingConfig(chunking_strategy="by_invalid"),
-            pipeline_context=PipelineContext(),
-            partition_config=PartitionConfig(partition_by_api=True),
-        )
-
-        chunker.run(ELEMENTS_JSON_FILE)
-
-        assert "Input should be 'basic', 'by_page', 'by_similarity'" in caplog.text
-
-    def it_warns_with_nonlocal_chunking_strategy_and_partition_by_api_False(
-        self, _ingest_docs_map_: Mock, caplog: LogCaptureFixture
-    ):
-        chunker = Chunker(
-            chunking_config=ChunkingConfig(chunking_strategy="by_similarity"),
-            pipeline_context=PipelineContext(),
-            partition_config=PartitionConfig(partition_by_api=False),
-        )
-
-        chunker.run(ELEMENTS_JSON_FILE)
-
-        assert "There is no locally available chunking_strategy:" in caplog.text
-
-    # -- Chunker.chunk() ---------------------------------------------------------------------------
-
-    def it_skips_chunking_if_strategy_is_None(self):
-        chunker = Chunker(
-            chunking_config=ChunkingConfig(chunking_strategy=None),
-            pipeline_context=PipelineContext(),
-            partition_config=PartitionConfig(),
-        )
-
-        assert chunker.chunk(ELEMENTS_JSON_FILE) is None
-
-    # -- integration test --
-    @pytest.mark.parametrize("strategy", ["by_title", "basic"])
-    def it_chunks_locally(self, strategy: str, _ingest_docs_map_: Mock):
-        chunker = Chunker(
-            chunking_config=ChunkingConfig(chunking_strategy=strategy),
-            pipeline_context=PipelineContext(),
-            partition_config=PartitionConfig(),
-        )
-
-        chunked_elements = chunker.chunk(ELEMENTS_JSON_FILE)
-
-        assert all(isinstance(elem, CompositeElement) for elem in chunked_elements)  # type: ignore
-
-    def it_chunks_remotely(self, _ingest_docs_map_: Mock, _partition_via_api_: Mock):
-        chunker = Chunker(
-            chunking_config=ChunkingConfig(chunking_strategy="by_similarity"),
-            pipeline_context=PipelineContext(),
-            partition_config=PartitionConfig(
-                partition_by_api=True, api_key="aaaaaaaaaaaaaaaaaaaaa"
-            ),
-        )
-
-        chunker.chunk(ELEMENTS_JSON_FILE)
-
-        _partition_via_api_.assert_called_once_with(
-            filename=ELEMENTS_JSON_FILE,
-            api_key="aaaaaaaaaaaaaaaaaaaaa",
-            api_url="https://api.unstructured.io/general/v0/general",
-            chunking_strategy="by_similarity",
-            # (jennings) the sdk uses combine_under_n_chars but the ChunkingConfig param is
-            # combine_text_under_n_chars
-            combine_under_n_chars=None,
-            include_orig_elements=None,
-            max_characters=None,
-            multipage_sections=None,
-            new_after_n_chars=None,
-            overlap=None,
-            overlap_all=None,
-        )
-
-    # -- fixtures --------------------------------------------------------------------------------
-
-    @pytest.fixture()
-    def _ingest_docs_map_(self, request: FixtureRequest):
-        return method_mock(request, PipelineContext, "ingest_docs_map")
-
-    @pytest.fixture()
-    def _partition_via_api_(self, request: FixtureRequest):
-        return function_mock(
-            request, "unstructured.ingest.pipeline.reformat.chunking.partition_via_api"
-        )
diff --git a/test_unstructured_ingest/unit/test_error.py b/test_unstructured_ingest/unit/test_error.py
deleted file mode 100644
index 0c588409e..000000000
--- a/test_unstructured_ingest/unit/test_error.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import pytest
-
-from unstructured.ingest.error import (
-    DestinationConnectionError,
-    PartitionError,
-    SourceConnectionError,
-)
-
-
-@pytest.mark.parametrize(
-    ("error_class", "exception_type", "error_message"),
-    [
-        (SourceConnectionError, ValueError, "Simulated connection error"),
-        (DestinationConnectionError, RuntimeError, "Simulated connection error"),
-        (PartitionError, FileNotFoundError, "Simulated partition error"),
-    ],
-)
-def test_custom_error_decorator(error_class, exception_type, error_message):
-    @error_class.wrap
-    def simulate_error():
-        raise exception_type(error_message)
-
-    with pytest.raises(error_class) as context:
-        simulate_error()
-
-    expected_error_string = error_class.error_string.format(error_message)
-    assert str(context.value) == expected_error_string
diff --git a/test_unstructured_ingest/unit/test_interfaces.py b/test_unstructured_ingest/unit/test_interfaces.py
deleted file mode 100644
index 7a91ed9f1..000000000
--- a/test_unstructured_ingest/unit/test_interfaces.py
+++ /dev/null
@@ -1,281 +0,0 @@
-from __future__ import annotations
-
-import os
-import pathlib
-from dataclasses import dataclass
-from typing import Any, Dict
-
-import pytest
-
-from unstructured.documents.elements import DataSourceMetadata
-from unstructured.ingest.interfaces import (
-    BaseConnectorConfig,
-    BaseSingleIngestDoc,
-    ChunkingConfig,
-    PartitionConfig,
-    ProcessorConfig,
-    ReadConfig,
-)
-from unstructured.partition.auto import partition
-from unstructured.staging.base import elements_to_dicts
-
-DIRECTORY = pathlib.Path(__file__).parent.resolve()
-EXAMPLE_DOCS_DIRECTORY = os.path.join(DIRECTORY, "../..", "example-docs")
-TEST_DOWNLOAD_DIR = "/tmp"
-TEST_OUTPUT_DIR = "/tmp"
-TEST_ID = "test"
-TEST_FILE_PATH = os.path.join(EXAMPLE_DOCS_DIRECTORY, "book-war-and-peace-1p.txt")
-
-
-@dataclass
-class ExampleConfig(BaseConnectorConfig):
-    id: str
-    path: str
-
-
-TEST_CONFIG = ExampleConfig(id=TEST_ID, path=TEST_FILE_PATH)
-TEST_SOURCE_URL = "test-source-url"
-TEST_VERSION = "1.1.1"
-TEST_RECORD_LOCATOR = {"id": "data-source-id"}
-TEST_DATE_CREATED = "2021-01-01T00:00:00"
-TEST_DATE_MODIFIED = "2021-01-02T00:00:00"
-TEST_DATE_PROCESSSED = "2022-12-13T15:44:08"
-
-
-@dataclass
-class ExampleIngestDoc(BaseSingleIngestDoc):
-    connector_config: ExampleConfig
-
-    @property
-    def filename(self):
-        return TEST_FILE_PATH
-
-    @property
-    def _output_filename(self):
-        return TEST_FILE_PATH + ".json"
-
-    @property
-    def source_url(self) -> str:
-        return TEST_SOURCE_URL
-
-    @property
-    def version(self) -> str:
-        return TEST_VERSION
-
-    @property
-    def record_locator(self) -> Dict[str, Any]:
-        return TEST_RECORD_LOCATOR
-
-    @property
-    def date_created(self) -> str:
-        return TEST_DATE_CREATED
-
-    @property
-    def date_modified(self) -> str:
-        return TEST_DATE_MODIFIED
-
-    @property
-    def exists(self) -> bool:
-        return True
-
-    def cleanup_file(self):
-        pass
-
-    def get_file(self):
-        pass
-
-    def has_output(self):
-        return True
-
-    def write_result(self, result):
-        pass
-
-
-@pytest.fixture()
-def partition_test_results():
-    # Reusable partition test results, calculated only once
-    result = partition(
-        filename=str(TEST_FILE_PATH),
-        data_source_metadata=DataSourceMetadata(
-            url=TEST_SOURCE_URL,
-            version=TEST_VERSION,
-            record_locator=TEST_RECORD_LOCATOR,
-            date_created=TEST_DATE_CREATED,
-            date_modified=TEST_DATE_MODIFIED,
-            date_processed=TEST_DATE_PROCESSSED,
-        ),
-    )
-    return result
-
-
-@pytest.fixture()
-def partition_file_test_results(partition_test_results):
-    # Reusable partition_file test results, calculated only once
-    return elements_to_dicts(partition_test_results)
-
-
-def test_partition_file():
-    """Validate partition_file returns a list of dictionaries with the expected keys,
-    metadatakeys, and data source metadata values."""
-    test_ingest_doc = ExampleIngestDoc(
-        connector_config=TEST_CONFIG,
-        read_config=ReadConfig(download_dir=TEST_DOWNLOAD_DIR),
-        processor_config=ProcessorConfig(output_dir=TEST_OUTPUT_DIR),
-    )
-    test_ingest_doc._date_processed = TEST_DATE_PROCESSSED
-    elements = test_ingest_doc.partition_file(partition_config=PartitionConfig())
-    element_dicts = elements_to_dicts(elements)
-    assert len(element_dicts)
-    expected_keys = {
-        "element_id",
-        "text",
-        "type",
-        "metadata",
-    }
-    # The document in TEST_FILE_PATH does not have elements with coordinates so
-    # partition is not expected to return coordinates metadata.
-    expected_metadata_keys = {
-        "data_source",
-        "filename",
-        "file_directory",
-        "filetype",
-        "languages",
-        "last_modified",
-    }
-    for elem in element_dicts:
-        # Parent IDs are non-deterministic - remove them from the test
-        elem["metadata"].pop("parent_id", None)
-
-        assert expected_keys == set(elem.keys())
-        assert expected_metadata_keys == set(elem["metadata"].keys())
-        data_source_metadata = elem["metadata"]["data_source"]
-        assert data_source_metadata["url"] == TEST_SOURCE_URL
-        assert data_source_metadata["version"] == TEST_VERSION
-        assert data_source_metadata["record_locator"] == TEST_RECORD_LOCATOR
-        assert data_source_metadata["date_created"] == TEST_DATE_CREATED
-        assert data_source_metadata["date_modified"] == TEST_DATE_MODIFIED
-        assert data_source_metadata["date_processed"] == TEST_DATE_PROCESSSED
-
-
-def test_process_file_fields_include_default(mocker, partition_test_results):
-    """Validate when metadata_include and metadata_exclude are not set, all fields:
-    ("element_id", "text", "type", "metadata") are included"""
-    mock_partition = mocker.patch(
-        "unstructured.partition.auto.partition",
-        return_value=partition_test_results,
-    )
-    test_ingest_doc = ExampleIngestDoc(
-        connector_config=TEST_CONFIG,
-        read_config=ReadConfig(download_dir=TEST_DOWNLOAD_DIR),
-        processor_config=ProcessorConfig(output_dir=TEST_OUTPUT_DIR),
-    )
-    elements = test_ingest_doc.partition_file(partition_config=PartitionConfig())
-    element_dicts = elements_to_dicts(elements)
-    assert len(element_dicts)
-    assert mock_partition.call_count == 1
-    for elem in element_dicts:
-        # Parent IDs are non-deterministic - remove them from the test
-        elem["metadata"].pop("parent_id", None)
-
-        assert {"element_id", "text", "type", "metadata"} == set(elem.keys())
-        data_source_metadata = elem["metadata"]["data_source"]
-        assert data_source_metadata["url"] == TEST_SOURCE_URL
-        assert data_source_metadata["version"] == TEST_VERSION
-        assert data_source_metadata["record_locator"] == TEST_RECORD_LOCATOR
-        assert data_source_metadata["date_created"] == TEST_DATE_CREATED
-        assert data_source_metadata["date_modified"] == TEST_DATE_MODIFIED
-        assert data_source_metadata["date_processed"] == TEST_DATE_PROCESSSED
-
-
-def test_process_file_metadata_includes_filename_and_filetype(
-    mocker,
-    partition_test_results,
-):
-    """Validate when metadata_include is set to "filename,filetype",
-    only filename is included in metadata"""
-    mocker.patch(
-        "unstructured.partition.auto.partition",
-        return_value=partition_test_results,
-    )
-    partition_config = PartitionConfig(
-        metadata_include=["filename", "filetype"],
-    )
-    test_ingest_doc = ExampleIngestDoc(
-        connector_config=TEST_CONFIG,
-        read_config=ReadConfig(download_dir=TEST_DOWNLOAD_DIR),
-        processor_config=ProcessorConfig(output_dir=TEST_OUTPUT_DIR),
-    )
-    isd_elems = test_ingest_doc.process_file(partition_config=partition_config)
-    assert len(isd_elems)
-    for elem in isd_elems:
-        # Parent IDs are non-deterministic - remove them from the test
-        elem["metadata"].pop("parent_id", None)
-
-        assert set(elem["metadata"].keys()) == {"filename", "filetype"}
-
-
-def test_process_file_metadata_exclude_filename_pagenum(mocker, partition_test_results):
-    """Validate when metadata_exclude is set to "filename,page_number",
-    neither filename nor page_number are included in metadata"""
-    mocker.patch(
-        "unstructured.partition.auto.partition",
-        return_value=partition_test_results,
-    )
-    partition_config = PartitionConfig(
-        metadata_exclude=["filename", "page_number"],
-    )
-    test_ingest_doc = ExampleIngestDoc(
-        connector_config=TEST_CONFIG,
-        read_config=ReadConfig(download_dir=TEST_DOWNLOAD_DIR),
-        processor_config=ProcessorConfig(
-            output_dir=TEST_OUTPUT_DIR,
-        ),
-    )
-    isd_elems = test_ingest_doc.process_file(partition_config=partition_config)
-    assert len(isd_elems)
-    for elem in isd_elems:
-        assert "filename" not in elem["metadata"]
-        assert "page_number" not in elem["metadata"]
-
-
-def test_process_file_flatten_metadata(mocker, partition_test_results):
-    mocker.patch(
-        "unstructured.partition.auto.partition",
-        return_value=partition_test_results,
-    )
-    partition_config = PartitionConfig(
-        metadata_include=["filename", "file_directory", "filetype"],
-        flatten_metadata=True,
-    )
-    test_ingest_doc = ExampleIngestDoc(
-        connector_config=TEST_CONFIG,
-        read_config=ReadConfig(download_dir=TEST_DOWNLOAD_DIR),
-        processor_config=ProcessorConfig(
-            output_dir=TEST_OUTPUT_DIR,
-        ),
-    )
-    isd_elems = test_ingest_doc.process_file(partition_config=partition_config)
-    expected_keys = {"element_id", "text", "type", "filename", "file_directory", "filetype"}
-    for elem in isd_elems:
-        assert expected_keys == set(elem.keys())
-
-
-class DescribeChunkingConfig:
-    """Unit tests for unstructured.ingest.interfaces.ChunkingConfig"""
-
-    def it_accepts_chunking_strategy_by_itself(self):
-        config = ChunkingConfig(chunking_strategy="basic")
-        assert config.chunking_strategy == "basic"
-
-    def it_defaults_to_chunk_by_title_if_only_chunk_elements_is_True(self):
-        config = ChunkingConfig(chunk_elements=True)
-        assert config.chunking_strategy == "by_title"
-
-    def but_it_defaults_to_chunking_strategy_over_chunk_elements(self):
-        config = ChunkingConfig(chunk_elements=True, chunking_strategy="basic")
-        assert config.chunking_strategy == "basic"
-
-    def it_silently_accepts_unrecognized_chunker(self, caplog: pytest.LogCaptureFixture):
-        config = ChunkingConfig(chunking_strategy="foobar")
-        assert config.chunking_strategy == "foobar"
-        assert caplog.text == ""
diff --git a/test_unstructured_ingest/unit/test_logger.py b/test_unstructured_ingest/unit/test_logger.py
deleted file mode 100644
index 4f15aba4c..000000000
--- a/test_unstructured_ingest/unit/test_logger.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import json
-
-import pytest
-
-from unstructured.ingest.logger import (
-    default_is_data_sensitive,
-    hide_sensitive_fields,
-    redact_jsons,
-)
-
-
-@pytest.mark.parametrize(
-    ("key", "value", "is_sensitive"),
-    [
-        ("username", "john_smith", False),
-        ("password", "13?H%", True),
-        ("token", "123", True),
-        ("AWS_CREDENTIAL", "aws_credential", True),
-        ("AWS_KEY", None, False),
-    ],
-)
-def test_default_is_sensitive(key, value, is_sensitive):
-    assert default_is_data_sensitive(key, value) == is_sensitive
-
-
-def test_hide_sensitive_fields():
-    d = {
-        "username": "john_smith",
-        "password": "13?H%",
-        "inner": {
-            "token": "123",
-            "AWS_KEY": None,
-            "inner_j_string": json.dumps(
-                {"account_name": "secret name", "client_id": 123, "timestamp": 123}
-            ),
-        },
-    }
-    redacted_d = hide_sensitive_fields(d)
-    expected_d = {
-        "password": "*******",
-        "username": "john_smith",
-        "inner": {
-            "token": "*******",
-            "AWS_KEY": None,
-            "inner_j_string": json.dumps(
-                {"account_name": "*******", "client_id": "*******", "timestamp": 123}
-            ),
-        },
-    }
-    assert redacted_d == expected_d
-
-
-def test_redact_jsons():
-    d1 = {
-        "username": "john_smith",
-        "password": "13?H%",
-        "inner": {
-            "token": "123",
-            "AWS_KEY": None,
-            "inner_j_string": json.dumps(
-                {"account_name": "secret name", "client_id": 123, "timestamp": 123}
-            ),
-        },
-    }
-
-    d2 = {"username": "tim67", "update_time": 456}
-    d3 = {"account_name": "top secret", "host": "http://localhost:8888"}
-
-    sensitive_string = f"Some topic secret info ({json.dumps(d1)} regarding {d2} and {d3})"
-    expected_string = (
-        'Some topic secret info ({"username": "john_smith", "password": "*******", '
-        '"inner": {"token": "*******", "AWS_KEY": null, "inner_j_string": '
-        '"{\\"account_name\\": \\"*******\\", \\"client_id\\": \\"*******\\", '
-        '\\"timestamp\\": 123}"}} regarding {"username": "tim67", "update_time": 456} '
-        'and {"account_name": "*******", "host": "http://localhost:8888"})'
-    )
-    redacted_string = redact_jsons(sensitive_string)
-    assert redacted_string == expected_string
diff --git a/test_unstructured_ingest/unit/test_utils.py b/test_unstructured_ingest/unit/test_utils.py
deleted file mode 100644
index bf2556cbe..000000000
--- a/test_unstructured_ingest/unit/test_utils.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import json
-import typing as t
-from dataclasses import dataclass, field
-from datetime import datetime
-
-import pytest
-import pytz
-
-from unstructured.ingest.cli.utils import extract_config
-from unstructured.ingest.interfaces import BaseConfig
-from unstructured.ingest.utils.string_and_date_utils import ensure_isoformat_datetime, json_to_dict
-
-
-@dataclass
-class A(BaseConfig):
-    a: str
-
-
-@dataclass
-class B(BaseConfig):
-    a: A
-    b: int
-
-
-flat_data = {"a": "test", "b": 4, "c": True}
-
-
-def test_extract_config_concrete():
-    @dataclass
-    class C(BaseConfig):
-        b: B
-        c: bool
-
-    c = extract_config(flat_data=flat_data, config=C)
-    expected_result = {"b": {"a": {"a": "test"}, "b": 4}, "c": True}
-    assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
-
-
-def test_extract_config_optional():
-    @dataclass
-    class C(BaseConfig):
-        c: bool
-        b: t.Optional[B] = None
-
-    c = extract_config(flat_data=flat_data, config=C)
-    expected_result = {"b": {"a": {"a": "test"}, "b": 4}, "c": True}
-    assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
-
-
-def test_extract_config_union():
-    @dataclass
-    class C(BaseConfig):
-        c: bool
-        b: t.Optional[t.Union[B, int]] = None
-
-    c = extract_config(flat_data=flat_data, config=C)
-    expected_result = {"b": 4, "c": True}
-    assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
-
-
-def test_extract_config_list():
-    @dataclass
-    class C(BaseConfig):
-        c: t.List[int]
-        b: B
-
-    flat_data = {"a": "test", "b": 4, "c": [1, 2, 3]}
-    c = extract_config(flat_data=flat_data, config=C)
-    expected_result = {"b": {"a": {"a": "test"}, "b": 4}, "c": [1, 2, 3]}
-    assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
-
-
-def test_extract_config_optional_list():
-    @dataclass
-    class C(BaseConfig):
-        b: B
-        c: t.Optional[t.List[int]] = None
-
-    flat_data = {"a": "test", "b": 4, "c": [1, 2, 3]}
-    c = extract_config(flat_data=flat_data, config=C)
-    expected_result = {"b": {"a": {"a": "test"}, "b": 4}, "c": [1, 2, 3]}
-    assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
-
-
-def test_extract_config_dataclass_list():
-    @dataclass
-    class C(BaseConfig):
-        c: bool
-        b: t.List[B] = field(default_factory=list)
-
-    flat_data = {"a": "test", "c": True}
-    c = extract_config(flat_data=flat_data, config=C)
-    expected_result = {"b": [], "c": True}
-    assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
-
-
-def test_extract_config_dict():
-    @dataclass
-    class C(BaseConfig):
-        c: bool
-        b: t.Dict[str, B] = field(default_factory=dict)
-
-    flat_data = {"c": True}
-    c = extract_config(flat_data=flat_data, config=C)
-    expected_result = {"c": True, "b": {}}
-    assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
-
-
-def test_json_to_dict_valid_json():
-    json_string = '{"key": "value"}'
-    expected_result = {"key": "value"}
-    assert json_to_dict(json_string) == expected_result
-    assert isinstance(json_to_dict(json_string), dict)
-
-
-def test_json_to_dict_malformed_json():
-    json_string = '{"key": "value"'
-    expected_result = '{"key": "value"'
-    assert json_to_dict(json_string) == expected_result
-    assert isinstance(json_to_dict(json_string), str)
-
-
-def test_json_to_dict_single_quotes():
-    json_string = "{'key': 'value'}"
-    expected_result = {"key": "value"}
-    assert json_to_dict(json_string) == expected_result
-    assert isinstance(json_to_dict(json_string), dict)
-
-
-def test_json_to_dict_path():
-    json_string = "/path/to/file.json"
-    expected_result = "/path/to/file.json"
-    assert json_to_dict(json_string) == expected_result
-    assert isinstance(json_to_dict(json_string), str)
-
-
-def test_ensure_isoformat_datetime_for_datetime():
-    dt = ensure_isoformat_datetime(datetime(2021, 1, 1, 12, 0, 0))
-    assert dt == "2021-01-01T12:00:00"
-
-
-def test_ensure_isoformat_datetime_for_datetime_with_tz():
-    dt = ensure_isoformat_datetime(datetime(2021, 1, 1, 12, 0, 0, tzinfo=pytz.UTC))
-    assert dt == "2021-01-01T12:00:00+00:00"
-
-
-def test_ensure_isoformat_datetime_for_string():
-    dt = ensure_isoformat_datetime("2021-01-01T12:00:00")
-    assert dt == "2021-01-01T12:00:00"
-
-
-def test_ensure_isoformat_datetime_for_string2():
-    dt = ensure_isoformat_datetime("2021-01-01T12:00:00+00:00")
-    assert dt == "2021-01-01T12:00:00+00:00"
-
-
-def test_ensure_isoformat_datetime_fails_on_string():
-    with pytest.raises(ValueError):
-        ensure_isoformat_datetime("bad timestamp")
-
-
-def test_ensure_isoformat_datetime_fails_on_int():
-    with pytest.raises(TypeError):
-        ensure_isoformat_datetime(1111)
diff --git a/unstructured/__version__.py b/unstructured/__version__.py
index e794a070a..65162b438 100644
--- a/unstructured/__version__.py
+++ b/unstructured/__version__.py
@@ -1 +1 @@
-__version__ = "0.15.15-dev0"  # pragma: no cover
+__version__ = "0.16.0"  # pragma: no cover
diff --git a/unstructured/embed/bedrock.py b/unstructured/embed/bedrock.py
index dba52e776..b667e9558 100644
--- a/unstructured/embed/bedrock.py
+++ b/unstructured/embed/bedrock.py
@@ -1,62 +1,69 @@
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, List
 
 import numpy as np
+from pydantic import SecretStr
 
 from unstructured.documents.elements import (
     Element,
 )
 from unstructured.embed.interfaces import BaseEmbeddingEncoder, EmbeddingConfig
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import EmbeddingEncoderConnectionError
 from unstructured.utils import requires_dependencies
 
 if TYPE_CHECKING:
     from langchain_community.embeddings import BedrockEmbeddings
 
 
-@dataclass
 class BedrockEmbeddingConfig(EmbeddingConfig):
-    aws_access_key_id: str = enhanced_field(sensitive=True)
-    aws_secret_access_key: str = enhanced_field(sensitive=True)
+    aws_access_key_id: SecretStr
+    aws_secret_access_key: SecretStr
     region_name: str = "us-west-2"
 
+    @requires_dependencies(
+        ["boto3", "numpy", "langchain_community"],
+        extras="bedrock",
+    )
+    def get_client(self) -> "BedrockEmbeddings":
+        # delay import only when needed
+        import boto3
+        from langchain_community.embeddings import BedrockEmbeddings
+
+        bedrock_runtime = boto3.client(
+            service_name="bedrock-runtime",
+            aws_access_key_id=self.aws_access_key_id.get_secret_value(),
+            aws_secret_access_key=self.aws_secret_access_key.get_secret_value(),
+            region_name=self.region_name,
+        )
+
+        bedrock_client = BedrockEmbeddings(client=bedrock_runtime)
+        return bedrock_client
+
 
 @dataclass
 class BedrockEmbeddingEncoder(BaseEmbeddingEncoder):
     config: BedrockEmbeddingConfig
-    _client: Optional["BedrockEmbeddings"] = enhanced_field(init=False, default=None)
-    _exemplary_embedding: Optional[List[float]] = enhanced_field(init=False, default=None)
 
-    @property
-    def client(self) -> "BedrockEmbeddings":
-        if self._client is None:
-            self._client = self.create_client()
-        return self._client
-
-    @property
-    def exemplary_embedding(self) -> List[float]:
-        if self._exemplary_embedding is None:
-            self._exemplary_embedding = self.client.embed_query("Q")
-        return self._exemplary_embedding
+    def get_exemplary_embedding(self) -> List[float]:
+        return self.embed_query(query="Q")
 
     def __post_init__(self):
         self.initialize()
 
-    def initialize(self):
-        self.bedrock_client = self.create_client()
-
     def num_of_dimensions(self):
-        return np.shape(self.exemplary_embedding)
+        exemplary_embedding = self.get_exemplary_embedding()
+        return np.shape(exemplary_embedding)
 
     def is_unit_vector(self):
-        return np.isclose(np.linalg.norm(self.exemplary_embedding), 1.0)
+        exemplary_embedding = self.get_exemplary_embedding()
+        return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
 
     def embed_query(self, query):
-        return np.array(self.bedrock_client.embed_query(query))
+        bedrock_client = self.config.get_client()
+        return np.array(bedrock_client.embed_query(query))
 
     def embed_documents(self, elements: List[Element]) -> List[Element]:
-        embeddings = self.bedrock_client.embed_documents([str(e) for e in elements])
+        bedrock_client = self.config.get_client()
+        embeddings = bedrock_client.embed_documents([str(e) for e in elements])
         elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
         return elements_with_embeddings
 
@@ -67,18 +74,3 @@ class BedrockEmbeddingEncoder(BaseEmbeddingEncoder):
             element.embeddings = embeddings[i]
             elements_w_embedding.append(element)
         return elements
-
-    @EmbeddingEncoderConnectionError.wrap
-    @requires_dependencies(
-        ["boto3", "numpy", "langchain_community"],
-        extras="bedrock",
-    )
-    def create_client(self) -> "BedrockEmbeddings":
-        # delay import only when needed
-        import boto3
-        from langchain_community.embeddings import BedrockEmbeddings
-
-        bedrock_runtime = boto3.client(service_name="bedrock-runtime", **self.config.to_dict())
-
-        bedrock_client = BedrockEmbeddings(client=bedrock_runtime)
-        return bedrock_client
diff --git a/unstructured/embed/huggingface.py b/unstructured/embed/huggingface.py
index cb98be0e8..d955f7053 100644
--- a/unstructured/embed/huggingface.py
+++ b/unstructured/embed/huggingface.py
@@ -1,60 +1,59 @@
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import TYPE_CHECKING, List, Optional
 
 import numpy as np
+from pydantic import Field
 
 from unstructured.documents.elements import (
     Element,
 )
 from unstructured.embed.interfaces import BaseEmbeddingEncoder, EmbeddingConfig
-from unstructured.ingest.error import EmbeddingEncoderConnectionError
 from unstructured.utils import requires_dependencies
 
 if TYPE_CHECKING:
     from langchain_huggingface.embeddings import HuggingFaceEmbeddings
 
 
-@dataclass
 class HuggingFaceEmbeddingConfig(EmbeddingConfig):
-    model_name: Optional[str] = "sentence-transformers/all-MiniLM-L6-v2"
-    model_kwargs: Optional[dict] = field(default_factory=lambda: {"device": "cpu"})
-    encode_kwargs: Optional[dict] = field(default_factory=lambda: {"normalize_embeddings": False})
-    cache_folder: Optional[dict] = None
+    model_name: Optional[str] = Field(default="sentence-transformers/all-MiniLM-L6-v2")
+    model_kwargs: Optional[dict] = Field(default_factory=lambda: {"device": "cpu"})
+    encode_kwargs: Optional[dict] = Field(default_factory=lambda: {"normalize_embeddings": False})
+    cache_folder: Optional[dict] = Field(default=None)
+
+    @requires_dependencies(
+        ["langchain_huggingface"],
+        extras="embed-huggingface",
+    )
+    def get_client(self) -> "HuggingFaceEmbeddings":
+        """Creates a langchain Huggingface python client to embed elements."""
+        from langchain_huggingface.embeddings import HuggingFaceEmbeddings
+
+        client = HuggingFaceEmbeddings(**self.dict())
+        return client
 
 
 @dataclass
 class HuggingFaceEmbeddingEncoder(BaseEmbeddingEncoder):
     config: HuggingFaceEmbeddingConfig
-    _client: Optional["HuggingFaceEmbeddings"] = field(init=False, default=None)
-    _exemplary_embedding: Optional[List[float]] = field(init=False, default=None)
 
-    @property
-    def client(self) -> "HuggingFaceEmbeddings":
-        if self._client is None:
-            self._client = self.create_client()
-        return self._client
-
-    @property
-    def exemplary_embedding(self) -> List[float]:
-        if self._exemplary_embedding is None:
-            self._exemplary_embedding = self.client.embed_query("Q")
-        return self._exemplary_embedding
-
-    def initialize(self):
-        """Creates a langchain HuggingFace object to embed elements."""
-        _ = self.client
+    def get_exemplary_embedding(self) -> List[float]:
+        return self.embed_query(query="Q")
 
     def num_of_dimensions(self):
-        return np.shape(self.exemplary_embedding)
+        exemplary_embedding = self.get_exemplary_embedding()
+        return np.shape(exemplary_embedding)
 
     def is_unit_vector(self):
-        return np.isclose(np.linalg.norm(self.exemplary_embedding), 1.0)
+        exemplary_embedding = self.get_exemplary_embedding()
+        return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
 
     def embed_query(self, query):
-        return self.client.embed_query(str(query))
+        client = self.config.get_client()
+        return client.embed_query(str(query))
 
     def embed_documents(self, elements: List[Element]) -> List[Element]:
-        embeddings = self.client.embed_documents([str(e) for e in elements])
+        client = self.config.get_client()
+        embeddings = client.embed_documents([str(e) for e in elements])
         elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
         return elements_with_embeddings
 
@@ -66,15 +65,3 @@ class HuggingFaceEmbeddingEncoder(BaseEmbeddingEncoder):
             element.embeddings = embeddings[i]
             elements_w_embedding.append(element)
         return elements
-
-    @EmbeddingEncoderConnectionError.wrap
-    @requires_dependencies(
-        ["langchain_huggingface"],
-        extras="embed-huggingface",
-    )
-    def create_client(self) -> "HuggingFaceEmbeddings":
-        """Creates a langchain Huggingface python client to embed elements."""
-        from langchain_huggingface.embeddings import HuggingFaceEmbeddings
-
-        client = HuggingFaceEmbeddings(**self.config.to_dict())
-        return client
diff --git a/unstructured/embed/interfaces.py b/unstructured/embed/interfaces.py
index e98c0c902..a6b0a3665 100644
--- a/unstructured/embed/interfaces.py
+++ b/unstructured/embed/interfaces.py
@@ -2,17 +2,17 @@ from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import List, Tuple
 
+from pydantic import BaseModel
+
 from unstructured.documents.elements import Element
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
 
 
-@dataclass
-class EmbeddingConfig(EnhancedDataClassJsonMixin):
+class EmbeddingConfig(BaseModel):
     pass
 
 
 @dataclass
-class BaseEmbeddingEncoder(EnhancedDataClassJsonMixin, ABC):
+class BaseEmbeddingEncoder(ABC):
     config: EmbeddingConfig
 
     @abstractmethod
diff --git a/unstructured/embed/mixedbreadai.py b/unstructured/embed/mixedbreadai.py
index 656d41e99..d89db571f 100644
--- a/unstructured/embed/mixedbreadai.py
+++ b/unstructured/embed/mixedbreadai.py
@@ -3,10 +3,10 @@ from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, List, Optional
 
 import numpy as np
+from pydantic import Field, SecretStr
 
 from unstructured.documents.elements import Element
 from unstructured.embed.interfaces import BaseEmbeddingEncoder, EmbeddingConfig
-from unstructured.ingest.error import EmbeddingEncoderConnectionError
 from unstructured.utils import requires_dependencies
 
 USER_AGENT = "@mixedbread-ai/unstructured"
@@ -22,7 +22,6 @@ if TYPE_CHECKING:
     from mixedbread_ai.core import RequestOptions
 
 
-@dataclass
 class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
     """
     Configuration class for Mixedbread AI Embedding Encoder.
@@ -32,14 +31,31 @@ class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
         model_name (str): Name of the model to use for embeddings.
     """
 
-    api_key: str = field(
-        default_factory=lambda: os.environ.get("MXBAI_API_KEY"),
+    api_key: SecretStr = Field(
+        default_factory=lambda: SecretStr(os.environ.get("MXBAI_API_KEY")),
     )
 
-    model_name: str = field(
+    model_name: str = Field(
         default="mixedbread-ai/mxbai-embed-large-v1",
     )
 
+    @requires_dependencies(
+        ["mixedbread_ai"],
+        extras="embed-mixedbreadai",
+    )
+    def get_client(self) -> "MixedbreadAI":
+        """
+        Create the Mixedbread AI client.
+
+        Returns:
+            MixedbreadAI: Initialized client.
+        """
+        from mixedbread_ai.client import MixedbreadAI
+
+        return MixedbreadAI(
+            api_key=self.api_key.get_secret_value(),
+        )
+
 
 @dataclass
 class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
@@ -52,23 +68,12 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
 
     config: MixedbreadAIEmbeddingConfig
 
-    _client: Optional["MixedbreadAI"] = field(init=False, default=None)
     _exemplary_embedding: Optional[List[float]] = field(init=False, default=None)
     _request_options: Optional["RequestOptions"] = field(init=False, default=None)
 
-    @property
-    def client(self) -> "MixedbreadAI":
-        """Lazy initialization of the Mixedbread AI client."""
-        if self._client is None:
-            self._client = self.create_client()
-        return self._client
-
-    @property
-    def exemplary_embedding(self) -> List[float]:
+    def get_exemplary_embedding(self) -> List[float]:
         """Get an exemplary embedding to determine dimensions and unit vector status."""
-        if self._exemplary_embedding is None:
-            self._exemplary_embedding = self._embed(["Q"])[0]
-        return self._exemplary_embedding
+        return self._embed(["Q"])[0]
 
     def initialize(self):
         if self.config.api_key is None:
@@ -89,12 +94,14 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
     @property
     def num_of_dimensions(self):
         """Get the number of dimensions for the embeddings."""
-        return np.shape(self.exemplary_embedding)
+        exemplary_embedding = self.get_exemplary_embedding()
+        return np.shape(exemplary_embedding)
 
     @property
     def is_unit_vector(self) -> bool:
         """Check if the embedding is a unit vector."""
-        return np.isclose(np.linalg.norm(self.exemplary_embedding), 1.0)
+        exemplary_embedding = self.get_exemplary_embedding()
+        return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
 
     def _embed(self, texts: List[str]) -> List[List[float]]:
         """
@@ -110,10 +117,10 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
         batch_itr = range(0, len(texts), batch_size)
 
         responses = []
-
+        client = self.config.get_client()
         for i in batch_itr:
             batch = texts[i : i + batch_size]
-            response = self.client.embeddings(
+            response = client.embeddings(
                 model=self.config.model_name,
                 normalized=True,
                 encoding_format=ENCODING_FORMAT,
@@ -169,21 +176,3 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
             List[float]: Embedding of the query.
         """
         return self._embed([query])[0]
-
-    @EmbeddingEncoderConnectionError.wrap
-    @requires_dependencies(
-        ["mixedbread_ai"],
-        extras="embed-mixedbreadai",
-    )
-    def create_client(self) -> "MixedbreadAI":
-        """
-        Create the Mixedbread AI client.
-
-        Returns:
-            MixedbreadAI: Initialized client.
-        """
-        from mixedbread_ai.client import MixedbreadAI
-
-        return MixedbreadAI(
-            api_key=self.config.api_key,
-        )
diff --git a/unstructured/embed/octoai.py b/unstructured/embed/octoai.py
index e4f7fcb38..119a41bc8 100644
--- a/unstructured/embed/octoai.py
+++ b/unstructured/embed/octoai.py
@@ -2,57 +2,57 @@ from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, List, Optional
 
 import numpy as np
+from pydantic import Field, SecretStr
 
 from unstructured.documents.elements import (
     Element,
 )
 from unstructured.embed.interfaces import BaseEmbeddingEncoder, EmbeddingConfig
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import EmbeddingEncoderConnectionError
 from unstructured.utils import requires_dependencies
 
 if TYPE_CHECKING:
     from openai import OpenAI
 
-OCTOAI_BASE_URL = "https://text.octoai.run/v1"
 
-
-@dataclass
 class OctoAiEmbeddingConfig(EmbeddingConfig):
-    api_key: str = enhanced_field(sensitive=True)
-    model_name: str = "thenlper/gte-large"
+    api_key: SecretStr
+    model_name: str = Field(default="thenlper/gte-large")
+    base_url: str = Field(default="https://text.octoai.run/v1")
+
+    @requires_dependencies(
+        ["openai", "tiktoken"],
+        extras="embed-octoai",
+    )
+    def get_client(self) -> "OpenAI":
+        """Creates an OpenAI python client to embed elements. Uses the OpenAI SDK."""
+        from openai import OpenAI
+
+        return OpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
 
 
 @dataclass
 class OctoAIEmbeddingEncoder(BaseEmbeddingEncoder):
     config: OctoAiEmbeddingConfig
     # Uses the OpenAI SDK
-    _client: Optional["OpenAI"] = field(init=False, default=None)
     _exemplary_embedding: Optional[List[float]] = field(init=False, default=None)
 
-    @property
-    def client(self) -> "OpenAI":
-        if self._client is None:
-            self._client = self.create_client()
-        return self._client
-
-    @property
-    def exemplary_embedding(self) -> List[float]:
-        if self._exemplary_embedding is None:
-            self._exemplary_embedding = self.embed_query("Q")
-        return self._exemplary_embedding
+    def get_exemplary_embedding(self) -> List[float]:
+        return self.embed_query("Q")
 
     def initialize(self):
         pass
 
     def num_of_dimensions(self):
-        return np.shape(self.exemplary_embedding)
+        exemplary_embedding = self.get_exemplary_embedding()
+        return np.shape(exemplary_embedding)
 
     def is_unit_vector(self):
-        return np.isclose(np.linalg.norm(self.exemplary_embedding), 1.0)
+        exemplary_embedding = self.get_exemplary_embedding()
+        return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
 
     def embed_query(self, query):
-        response = self.client.embeddings.create(input=str(query), model=self.config.model_name)
+        client = self.config.get_client()
+        response = client.embeddings.create(input=str(query), model=self.config.model_name)
         return response.data[0].embedding
 
     def embed_documents(self, elements: List[Element]) -> List[Element]:
@@ -67,14 +67,3 @@ class OctoAIEmbeddingEncoder(BaseEmbeddingEncoder):
             element.embeddings = embeddings[i]
             elements_w_embedding.append(element)
         return elements
-
-    @EmbeddingEncoderConnectionError.wrap
-    @requires_dependencies(
-        ["openai", "tiktoken"],
-        extras="embed-octoai",
-    )
-    def create_client(self) -> "OpenAI":
-        """Creates an OpenAI python client to embed elements. Uses the OpenAI SDK."""
-        from openai import OpenAI
-
-        return OpenAI(api_key=self.config.api_key, base_url=OCTOAI_BASE_URL)
diff --git a/unstructured/embed/openai.py b/unstructured/embed/openai.py
index a2f7d6472..ad97c49d9 100644
--- a/unstructured/embed/openai.py
+++ b/unstructured/embed/openai.py
@@ -1,58 +1,60 @@
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, List, Optional
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, List
 
 import numpy as np
+from pydantic import Field, SecretStr
 
 from unstructured.documents.elements import (
     Element,
 )
 from unstructured.embed.interfaces import BaseEmbeddingEncoder, EmbeddingConfig
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import EmbeddingEncoderConnectionError
 from unstructured.utils import requires_dependencies
 
 if TYPE_CHECKING:
     from langchain_openai.embeddings import OpenAIEmbeddings
 
 
-@dataclass
 class OpenAIEmbeddingConfig(EmbeddingConfig):
-    api_key: str = enhanced_field(sensitive=True)
-    model_name: str = "text-embedding-ada-002"
+    api_key: SecretStr
+    model_name: str = Field(default="text-embedding-ada-002")
+
+    @requires_dependencies(["langchain_openai"], extras="openai")
+    def get_client(self) -> "OpenAIEmbeddings":
+        """Creates a langchain OpenAI python client to embed elements."""
+        from langchain_openai import OpenAIEmbeddings
+
+        openai_client = OpenAIEmbeddings(
+            openai_api_key=self.api_key.get_secret_value(),
+            model=self.model_name,  # type:ignore
+        )
+        return openai_client
 
 
 @dataclass
 class OpenAIEmbeddingEncoder(BaseEmbeddingEncoder):
     config: OpenAIEmbeddingConfig
-    _client: Optional["OpenAIEmbeddings"] = field(init=False, default=None)
-    _exemplary_embedding: Optional[List[float]] = field(init=False, default=None)
 
-    @property
-    def client(self) -> "OpenAIEmbeddings":
-        if self._client is None:
-            self._client = self.create_client()
-        return self._client
-
-    @property
-    def exemplary_embedding(self) -> List[float]:
-        if self._exemplary_embedding is None:
-            self._exemplary_embedding = self.client.embed_query("Q")
-        return self._exemplary_embedding
+    def get_exemplary_embedding(self) -> List[float]:
+        return self.embed_query(query="Q")
 
     def initialize(self):
         pass
 
     def num_of_dimensions(self):
-        return np.shape(self.exemplary_embedding)
+        exemplary_embedding = self.get_exemplary_embedding()
+        return np.shape(exemplary_embedding)
 
     def is_unit_vector(self):
-        return np.isclose(np.linalg.norm(self.exemplary_embedding), 1.0)
+        exemplary_embedding = self.get_exemplary_embedding()
+        return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
 
     def embed_query(self, query):
-        return self.client.embed_query(str(query))
+        client = self.config.get_client()
+        return client.embed_query(str(query))
 
     def embed_documents(self, elements: List[Element]) -> List[Element]:
-        embeddings = self.client.embed_documents([str(e) for e in elements])
+        client = self.config.get_client()
+        embeddings = client.embed_documents([str(e) for e in elements])
         elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
         return elements_with_embeddings
 
@@ -63,15 +65,3 @@ class OpenAIEmbeddingEncoder(BaseEmbeddingEncoder):
             element.embeddings = embeddings[i]
             elements_w_embedding.append(element)
         return elements
-
-    @EmbeddingEncoderConnectionError.wrap
-    @requires_dependencies(["langchain_openai"], extras="openai")
-    def create_client(self) -> "OpenAIEmbeddings":
-        """Creates a langchain OpenAI python client to embed elements."""
-        from langchain_openai import OpenAIEmbeddings
-
-        openai_client = OpenAIEmbeddings(
-            openai_api_key=self.config.api_key,
-            model=self.config.model_name,  # type:ignore
-        )
-        return openai_client
diff --git a/unstructured/embed/vertexai.py b/unstructured/embed/vertexai.py
index edbc8c2ef..5228ed497 100644
--- a/unstructured/embed/vertexai.py
+++ b/unstructured/embed/vertexai.py
@@ -1,62 +1,71 @@
 # type: ignore
 import json
 import os
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import TYPE_CHECKING, List, Optional
 
 import numpy as np
+from pydantic import Field, SecretStr
 
 from unstructured.documents.elements import (
     Element,
 )
 from unstructured.embed.interfaces import BaseEmbeddingEncoder, EmbeddingConfig
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import EmbeddingEncoderConnectionError
 from unstructured.utils import FileHandler, requires_dependencies
 
 if TYPE_CHECKING:
     from langchain_google_vertexai import VertexAIEmbeddings
 
 
-@dataclass
 class VertexAIEmbeddingConfig(EmbeddingConfig):
-    api_key: str = enhanced_field(sensitive=True)
-    model_name: Optional[str] = "textembedding-gecko@001"
+    api_key: SecretStr
+    model_name: Optional[str] = Field(default="textembedding-gecko@001")
+
+    def register_application_credentials(self):
+        application_credentials_path = os.path.join("/tmp", "google-vertex-app-credentials.json")
+        credentials_file = FileHandler(application_credentials_path)
+        credentials_file.write_file(json.dumps(json.loads(self.api_key.get_secret_value())))
+        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = application_credentials_path
+
+    @requires_dependencies(
+        ["langchain", "langchain_google_vertexai"],
+        extras="embed-vertexai",
+    )
+    def get_client(self) -> "VertexAIEmbeddings":
+        """Creates a Langchain VertexAI python client to embed elements."""
+        from langchain_google_vertexai import VertexAIEmbeddings
+
+        self.register_application_credentials()
+        vertexai_client = VertexAIEmbeddings(model_name=self.model_name)
+        return vertexai_client
 
 
 @dataclass
 class VertexAIEmbeddingEncoder(BaseEmbeddingEncoder):
     config: VertexAIEmbeddingConfig
-    _client: Optional["VertexAIEmbeddings"] = field(init=False, default=None)
-    _exemplary_embedding: Optional[List[float]] = field(init=False, default=None)
 
-    @property
-    def client(self) -> "VertexAIEmbeddings":
-        if self._client is None:
-            self._client = self.create_client()
-        return self._client
-
-    @property
-    def exemplary_embedding(self) -> List[float]:
-        if self._exemplary_embedding is None:
-            self._exemplary_embedding = self.client.embed_query("A sample query.")
-        return self._exemplary_embedding
+    def get_exemplary_embedding(self) -> List[float]:
+        return self.embed_query(query="A sample query.")
 
     def initialize(self):
         pass
 
     def num_of_dimensions(self):
-        return np.shape(self.exemplary_embedding)
+        exemplary_embedding = self.get_exemplary_embedding()
+        return np.shape(exemplary_embedding)
 
     def is_unit_vector(self):
-        return np.isclose(np.linalg.norm(self.exemplary_embedding), 1.0)
+        exemplary_embedding = self.get_exemplary_embedding()
+        return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
 
     def embed_query(self, query):
-        result = self.client.embed_query(str(query))
+        client = self.config.get_client()
+        result = client.embed_query(str(query))
         return result
 
     def embed_documents(self, elements: List[Element]) -> List[Element]:
-        embeddings = self.client.embed_documents([str(e) for e in elements])
+        client = self.config.get_client()
+        embeddings = client.embed_documents([str(e) for e in elements])
         elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
         return elements_with_embeddings
 
@@ -67,25 +76,3 @@ class VertexAIEmbeddingEncoder(BaseEmbeddingEncoder):
             element.embeddings = embeddings[i]
             elements_w_embedding.append(element)
         return elements
-
-    @property
-    def application_credentials_path(self):
-        return os.path.join("/tmp", "google-vertex-app-credentials.json")
-
-    def register_application_credentials(self):
-        credentials_file = FileHandler(self.application_credentials_path)
-        credentials_file.write_file(json.dumps(json.loads(self.config.api_key)))
-        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.application_credentials_path
-
-    @EmbeddingEncoderConnectionError.wrap
-    @requires_dependencies(
-        ["langchain", "langchain_google_vertexai"],
-        extras="embed-vertexai",
-    )
-    def create_client(self) -> "VertexAIEmbeddings":
-        """Creates a Langchain VertexAI python client to embed elements."""
-        from langchain_google_vertexai import VertexAIEmbeddings
-
-        self.register_application_credentials()
-        vertexai_client = VertexAIEmbeddings(model_name=self.config.model_name)
-        return vertexai_client
diff --git a/unstructured/embed/voyageai.py b/unstructured/embed/voyageai.py
index 56f98d365..c5dd5b61c 100644
--- a/unstructured/embed/voyageai.py
+++ b/unstructured/embed/voyageai.py
@@ -1,61 +1,67 @@
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import TYPE_CHECKING, List, Optional
 
 import numpy as np
+from pydantic import Field, SecretStr
 
 from unstructured.documents.elements import Element
 from unstructured.embed.interfaces import BaseEmbeddingEncoder, EmbeddingConfig
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import EmbeddingEncoderConnectionError
 from unstructured.utils import requires_dependencies
 
 if TYPE_CHECKING:
     from langchain_voyageai import VoyageAIEmbeddings
 
 
-@dataclass
 class VoyageAIEmbeddingConfig(EmbeddingConfig):
-    api_key: str = enhanced_field(sensitive=True)
+    api_key: SecretStr
     model_name: str
-    batch_size: Optional[int] = None
-    truncation: Optional[bool] = None
+    batch_size: Optional[int] = Field(default=None)
+    truncation: Optional[bool] = Field(default=None)
+
+    @requires_dependencies(
+        ["langchain", "langchain_voyageai"],
+        extras="embed-voyageai",
+    )
+    def get_client(self) -> "VoyageAIEmbeddings":
+        """Creates a Langchain VoyageAI python client to embed elements."""
+        from langchain_voyageai import VoyageAIEmbeddings
+
+        return VoyageAIEmbeddings(
+            voyage_api_key=self.api_key,
+            model=self.model_name,
+            batch_size=self.batch_size,
+            truncation=self.truncation,
+        )
 
 
 @dataclass
 class VoyageAIEmbeddingEncoder(BaseEmbeddingEncoder):
     config: VoyageAIEmbeddingConfig
-    _client: Optional["VoyageAIEmbeddings"] = field(init=False, default=None)
-    _exemplary_embedding: Optional[List[float]] = field(init=False, default=None)
 
-    @property
-    def client(self) -> "VoyageAIEmbeddings":
-        if self._client is None:
-            self._client = self.create_client()
-        return self._client
-
-    @property
-    def exemplary_embedding(self) -> List[float]:
-        if self._exemplary_embedding is None:
-            self._exemplary_embedding = self.client.embed_query("A sample query.")
-        return self._exemplary_embedding
+    def get_exemplary_embedding(self) -> List[float]:
+        return self.embed_query(query="A sample query.")
 
     def initialize(self):
         pass
 
     @property
     def num_of_dimensions(self) -> tuple[int, ...]:
-        return np.shape(self.exemplary_embedding)
+        exemplary_embedding = self.get_exemplary_embedding()
+        return np.shape(exemplary_embedding)
 
     @property
     def is_unit_vector(self) -> bool:
-        return np.isclose(np.linalg.norm(self.exemplary_embedding), 1.0)
+        exemplary_embedding = self.get_exemplary_embedding()
+        return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
 
     def embed_documents(self, elements: List[Element]) -> List[Element]:
-        embeddings = self.client.embed_documents([str(e) for e in elements])
+        client = self.config.get_client()
+        embeddings = client.embed_documents([str(e) for e in elements])
         return self._add_embeddings_to_elements(elements, embeddings)
 
     def embed_query(self, query: str) -> List[float]:
-        return self.client.embed_query(query)
+        client = self.config.get_client()
+        return client.embed_query(query)
 
     @staticmethod
     def _add_embeddings_to_elements(elements, embeddings) -> List[Element]:
@@ -65,19 +71,3 @@ class VoyageAIEmbeddingEncoder(BaseEmbeddingEncoder):
             element.embeddings = embeddings[i]
             elements_w_embedding.append(element)
         return elements
-
-    @EmbeddingEncoderConnectionError.wrap
-    @requires_dependencies(
-        ["langchain", "langchain_voyageai"],
-        extras="embed-voyageai",
-    )
-    def create_client(self) -> "VoyageAIEmbeddings":
-        """Creates a Langchain VoyageAI python client to embed elements."""
-        from langchain_voyageai import VoyageAIEmbeddings
-
-        return VoyageAIEmbeddings(
-            voyage_api_key=self.config.api_key,
-            model=self.config.model_name,
-            batch_size=self.config.batch_size,
-            truncation=self.config.truncation,
-        )
diff --git a/unstructured/ingest/README.md b/unstructured/ingest/README.md
deleted file mode 100644
index f7291aa5a..000000000
--- a/unstructured/ingest/README.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# Ingest
-![Project unmaintained](https://img.shields.io/badge/project-unmaintained-red.svg)
-
-Project has been moved to: [Unstructured Ingest](https://github.com/Unstructured-IO/unstructured-ingest)
-
-This python module will be removed from this repo in the near future.
diff --git a/unstructured/ingest/__init__.py b/unstructured/ingest/__init__.py
deleted file mode 100644
index cae55db4a..000000000
--- a/unstructured/ingest/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from __future__ import annotations
-
-import warnings
-
-warnings.warn(
-    "unstructured.ingest will be removed in a future version. "
-    "Functionality moved to the unstructured-ingest project.",
-    DeprecationWarning,
-    stacklevel=2,
-)
diff --git a/unstructured/ingest/cli/__init__.py b/unstructured/ingest/cli/__init__.py
deleted file mode 100644
index f3490ae22..000000000
--- a/unstructured/ingest/cli/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-import typing as t
-
-import click
-
-from unstructured.ingest.cli.cmds import base_dest_cmd_fns, base_src_cmd_fns
-
-src: t.List[click.Group] = [v().get_src_cmd() for v in base_src_cmd_fns]
-
-dest: t.List[click.Command] = [v().get_dest_cmd() for v in base_dest_cmd_fns]
-
-__all__ = [
-    "src",
-    "dest",
-]
diff --git a/unstructured/ingest/cli/base/__init__.py b/unstructured/ingest/cli/base/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/unstructured/ingest/cli/base/cmd.py b/unstructured/ingest/cli/base/cmd.py
deleted file mode 100644
index f02a81424..000000000
--- a/unstructured/ingest/cli/base/cmd.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import typing as t
-from abc import ABC
-from dataclasses import dataclass, field
-
-from unstructured.ingest.cli.interfaces import CliConfig
-from unstructured.ingest.interfaces import BaseConfig
-
-
-@dataclass
-class BaseCmd(ABC):
-    cmd_name: str
-    cli_config: t.Optional[t.Type[BaseConfig]] = None
-    additional_cli_options: t.List[t.Type[CliConfig]] = field(default_factory=list)
-    addition_configs: t.Dict[str, t.Type[BaseConfig]] = field(default_factory=dict)
-    is_fsspec: bool = False
-
-    @property
-    def cmd_name_key(self):
-        return self.cmd_name.replace("-", "_")
diff --git a/unstructured/ingest/cli/base/dest.py b/unstructured/ingest/cli/base/dest.py
deleted file mode 100644
index 4b3d62739..000000000
--- a/unstructured/ingest/cli/base/dest.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import logging
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.cmd import BaseCmd
-from unstructured.ingest.cli.cmd_factory import get_src_cmd
-from unstructured.ingest.cli.common import (
-    log_options,
-)
-from unstructured.ingest.cli.interfaces import BaseConfig, CliFilesStorageConfig
-from unstructured.ingest.cli.utils import (
-    add_options,
-    conform_click_options,
-    extract_config,
-    extract_configs,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner.writers import writer_map
-
-
-@dataclass
-class BaseDestCmd(BaseCmd):
-    write_config: t.Optional[t.Type[BaseConfig]] = None
-
-    def get_dest_runner(self, source_cmd: str, options: dict, parent_options: dict):
-        src_cmd_fn = get_src_cmd(cmd_name=source_cmd)
-        src_cmd = src_cmd_fn()
-        runner = src_cmd.get_source_runner(options=parent_options)
-        addition_configs = self.addition_configs
-        if "connector_config" not in addition_configs:
-            addition_configs["connector_config"] = self.cli_config
-        if self.write_config:
-            addition_configs["write_config"] = self.write_config
-        configs = extract_configs(
-            options,
-            validate=[self.cli_config] if self.cli_config else None,
-            extras=addition_configs,
-            add_defaults=False,
-        )
-        writer_cls = writer_map[self.cmd_name_key]
-        writer = writer_cls(**configs)  # type: ignore
-        runner.writer = writer
-        runner.writer_kwargs = options
-        return runner
-
-    def check_dest_options(self, options: dict):
-        extract_config(flat_data=options, config=self.cli_config)
-
-    def dest(self, ctx: click.Context, **options):
-        if not ctx.parent:
-            raise click.ClickException("destination command called without a parent")
-        if not ctx.parent.info_name:
-            raise click.ClickException("parent command missing info name")
-        source_cmd = ctx.parent.info_name.replace("-", "_")
-        parent_options: dict = ctx.parent.params if ctx.parent else {}
-        conform_click_options(options)
-        verbose = parent_options.get("verbose", False)
-        ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-        log_options(parent_options, verbose=verbose)
-        log_options(options, verbose=verbose)
-        try:
-            self.check_dest_options(options=options)
-            runner = self.get_dest_runner(
-                source_cmd=source_cmd,
-                options=options,
-                parent_options=parent_options,
-            )
-            runner.run(**parent_options)
-        except Exception as e:
-            logger.error(e, exc_info=True)
-            raise click.ClickException(str(e)) from e
-
-    def get_dest_cmd(self) -> click.Command:
-        # Dynamically create the command without the use of click decorators
-        fn = self.dest
-        fn = click.pass_context(fn)
-        cmd: click.Group = click.command(fn)
-        cmd.name = self.cmd_name
-        cmd.invoke_without_command = True
-        options = [self.cli_config] if self.cli_config else []
-        options += self.additional_cli_options
-        if self.is_fsspec and CliFilesStorageConfig not in options:
-            options.append(CliFilesStorageConfig)
-        add_options(cmd, extras=options, is_src=False)
-        return cmd
diff --git a/unstructured/ingest/cli/base/src.py b/unstructured/ingest/cli/base/src.py
deleted file mode 100644
index 70acbced4..000000000
--- a/unstructured/ingest/cli/base/src.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import logging
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.cmd import BaseCmd
-from unstructured.ingest.cli.common import (
-    log_options,
-)
-from unstructured.ingest.cli.interfaces import CliFilesStorageConfig
-from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import runner_map
-
-
-@dataclass
-class BaseSrcCmd(BaseCmd):
-    def get_source_runner(self, options: dict):
-        addition_configs = self.addition_configs
-        if "connector_config" not in addition_configs:
-            addition_configs["connector_config"] = self.cli_config
-        configs = extract_configs(
-            options,
-            validate=[self.cli_config] if self.cli_config else None,
-            extras=addition_configs,
-        )
-        runner = runner_map[self.cmd_name_key]
-        return runner(**configs)  # type: ignore
-
-    def src(self, ctx: click.Context, **options):
-        if ctx.invoked_subcommand:
-            return
-
-        conform_click_options(options)
-        verbose = options.get("verbose", False)
-        ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-        log_options(options, verbose=verbose)
-        try:
-            runner = self.get_source_runner(options=options)
-            runner.run(**options)
-        except Exception as e:
-            logger.error(e, exc_info=True)
-            raise click.ClickException(str(e)) from e
-
-    def get_src_cmd(self) -> click.Group:
-        # Dynamically create the command without the use of click decorators
-        fn = self.src
-        fn = click.pass_context(fn)
-        cmd: click.Group = click.group(fn, cls=Group)
-        cmd.name = self.cmd_name
-        cmd.invoke_without_command = True
-        extra_options = [self.cli_config] if self.cli_config else []
-        extra_options += self.additional_cli_options
-        if self.is_fsspec and CliFilesStorageConfig not in extra_options:
-            extra_options.append(CliFilesStorageConfig)
-        add_options(cmd, extras=extra_options)
-        return cmd
diff --git a/unstructured/ingest/cli/cli.py b/unstructured/ingest/cli/cli.py
deleted file mode 100644
index fa7c3008e..000000000
--- a/unstructured/ingest/cli/cli.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import click
-
-from unstructured.ingest.cli import dest, src
-from unstructured.ingest.v2.cli.cmds import dest as dest_v2
-from unstructured.ingest.v2.cli.cmds import src as src_v2
-
-
-@click.group()
-def ingest():
-    pass
-
-
-def get_cmd() -> click.Command:
-    """Construct and return a Click command object representing the main command for the CLI.
-
-    This function adds all dest_subcommand(s) to each src_subcommand, and adds all of those
-    to the main command as nested subcommands.
-    """
-    cmd = ingest
-    src_dict = {s.name: s for s in src}
-    dest_dict = {d.name: d for d in dest}
-    for s in src_v2:
-        src_dict[s.name] = s
-    for d in dest_v2:
-        dest_dict[d.name] = d
-    # Add all subcommands
-    for src_subcommand in src_dict.values():
-        # Add all destination subcommands
-        for dest_subcommand in dest_dict.values():
-            src_subcommand.add_command(dest_subcommand)
-        cmd.add_command(src_subcommand)
-    return cmd
diff --git a/unstructured/ingest/cli/cmd_factory.py b/unstructured/ingest/cli/cmd_factory.py
deleted file mode 100644
index 3260828cb..000000000
--- a/unstructured/ingest/cli/cmd_factory.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import typing as t
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.cmds import base_src_cmd_fns
-
-
-def get_src_cmd_map() -> t.Dict[str, t.Callable[[], BaseSrcCmd]]:
-    return {b().cmd_name_key: b for b in base_src_cmd_fns}
-
-
-def get_src_cmd(cmd_name: str) -> t.Callable[[], BaseSrcCmd]:
-    return get_src_cmd_map()[cmd_name]
diff --git a/unstructured/ingest/cli/cmds/__init__.py b/unstructured/ingest/cli/cmds/__init__.py
deleted file mode 100644
index f75ee797e..000000000
--- a/unstructured/ingest/cli/cmds/__init__.py
+++ /dev/null
@@ -1,145 +0,0 @@
-from __future__ import annotations
-
-import collections
-import typing as t
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.cmds.fsspec.sftp import get_base_src_cmd as sftp_base_src_cmd
-
-from .airtable import get_base_src_cmd as airtable_base_src_cmd
-from .astradb import get_base_dest_cmd as astradb_base_dest_cmd
-from .astradb import get_base_src_cmd as astradb_base_src_cmd
-from .azure_cognitive_search import get_base_dest_cmd as azure_cognitive_search_base_dest_cmd
-from .biomed import get_base_src_cmd as biomed_base_src_cmd
-from .chroma import get_base_dest_cmd as chroma_base_dest_cmd
-from .clarifai import get_base_dest_cmd as clarifai_base_dest_cmd
-from .confluence import get_base_src_cmd as confluence_base_src_cmd
-from .databricks_volumes import get_base_dest_cmd as databricks_volumes_dest_cmd
-from .delta_table import get_base_dest_cmd as delta_table_dest_cmd
-from .delta_table import get_base_src_cmd as delta_table_base_src_cmd
-from .discord import get_base_src_cmd as discord_base_src_cmd
-from .elasticsearch import get_base_dest_cmd as elasticsearch_base_dest_cmd
-from .elasticsearch import get_base_src_cmd as elasticsearch_base_src_cmd
-from .fsspec.azure import get_base_dest_cmd as azure_base_dest_cmd
-from .fsspec.azure import get_base_src_cmd as azure_base_src_cmd
-from .fsspec.box import get_base_dest_cmd as box_base_dest_cmd
-from .fsspec.box import get_base_src_cmd as box_base_src_cmd
-from .fsspec.dropbox import get_base_dest_cmd as dropbox_base_dest_cmd
-from .fsspec.dropbox import get_base_src_cmd as dropbox_base_src_cmd
-from .fsspec.fsspec import get_base_dest_cmd as fsspec_base_dest_cmd
-from .fsspec.fsspec import get_base_src_cmd as fsspec_base_src_cmd
-from .fsspec.gcs import get_base_dest_cmd as gcs_base_dest_cmd
-from .fsspec.gcs import get_base_src_cmd as gcs_base_src_cmd
-from .fsspec.s3 import get_base_dest_cmd as s3_base_dest_cmd
-from .fsspec.s3 import get_base_src_cmd as s3_base_src_cmd
-from .github import get_base_src_cmd as github_base_src_cmd
-from .gitlab import get_base_src_cmd as gitlab_base_src_cmd
-from .google_drive import get_base_src_cmd as google_drive_base_src_cmd
-from .hubspot import get_base_src_cmd as hubspot_base_src_cmd
-from .jira import get_base_src_cmd as jira_base_src_cmd
-from .kafka import get_base_dest_cmd as kafka_base_dest_cmd
-from .kafka import get_base_src_cmd as kafka_base_src_cmd
-from .local import get_base_src_cmd as local_base_src_cmd
-from .mongodb import get_base_dest_cmd as mongo_base_dest_cmd
-from .mongodb import get_base_src_cmd as mongodb_base_src_cmd
-from .notion import get_base_src_cmd as notion_base_src_cmd
-from .onedrive import get_base_src_cmd as onedrive_base_src_cmd
-from .opensearch import get_base_dest_cmd as opensearch_base_dest_cmd
-from .opensearch import get_base_src_cmd as opensearch_base_src_cmd
-from .outlook import get_base_src_cmd as outlook_base_src_cmd
-from .pinecone import get_base_dest_cmd as pinecone_base_dest_cmd
-from .qdrant import get_base_dest_cmd as qdrant_base_dest_cmd
-from .reddit import get_base_src_cmd as reddit_base_src_cmd
-from .salesforce import get_base_src_cmd as salesforce_base_src_cmd
-from .sharepoint import get_base_src_cmd as sharepoint_base_src_cmd
-from .slack import get_base_src_cmd as slack_base_src_cmd
-from .sql import get_base_dest_cmd as sql_base_dest_cmd
-from .vectara import get_base_dest_cmd as vectara_base_dest_cmd
-from .weaviate import get_base_dest_cmd as weaviate_dest_cmd
-from .wikipedia import get_base_src_cmd as wikipedia_base_src_cmd
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-base_src_cmd_fns: t.List[t.Callable[[], BaseSrcCmd]] = [
-    airtable_base_src_cmd,
-    astradb_base_src_cmd,
-    azure_base_src_cmd,
-    biomed_base_src_cmd,
-    box_base_src_cmd,
-    confluence_base_src_cmd,
-    delta_table_base_src_cmd,
-    discord_base_src_cmd,
-    dropbox_base_src_cmd,
-    elasticsearch_base_src_cmd,
-    fsspec_base_src_cmd,
-    gcs_base_src_cmd,
-    github_base_src_cmd,
-    gitlab_base_src_cmd,
-    google_drive_base_src_cmd,
-    hubspot_base_src_cmd,
-    jira_base_src_cmd,
-    kafka_base_src_cmd,
-    local_base_src_cmd,
-    mongodb_base_src_cmd,
-    notion_base_src_cmd,
-    onedrive_base_src_cmd,
-    opensearch_base_src_cmd,
-    outlook_base_src_cmd,
-    reddit_base_src_cmd,
-    salesforce_base_src_cmd,
-    sftp_base_src_cmd,
-    sharepoint_base_src_cmd,
-    slack_base_src_cmd,
-    s3_base_src_cmd,
-    wikipedia_base_src_cmd,
-]
-
-# Make sure there are not overlapping names
-src_cmd_names = [b().cmd_name for b in base_src_cmd_fns]
-src_duplicates = [item for item, count in collections.Counter(src_cmd_names).items() if count > 1]
-if src_duplicates:
-    raise ValueError(
-        "multiple base src commands defined with the same names: {}".format(
-            ", ".join(src_duplicates),
-        ),
-    )
-
-base_dest_cmd_fns: t.List[t.Callable[[], "BaseDestCmd"]] = [
-    astradb_base_dest_cmd,
-    azure_base_dest_cmd,
-    box_base_dest_cmd,
-    chroma_base_dest_cmd,
-    clarifai_base_dest_cmd,
-    databricks_volumes_dest_cmd,
-    dropbox_base_dest_cmd,
-    elasticsearch_base_dest_cmd,
-    fsspec_base_dest_cmd,
-    gcs_base_dest_cmd,
-    kafka_base_dest_cmd,
-    s3_base_dest_cmd,
-    azure_cognitive_search_base_dest_cmd,
-    delta_table_dest_cmd,
-    sql_base_dest_cmd,
-    weaviate_dest_cmd,
-    mongo_base_dest_cmd,
-    pinecone_base_dest_cmd,
-    qdrant_base_dest_cmd,
-    opensearch_base_dest_cmd,
-    vectara_base_dest_cmd,
-]
-
-# Make sure there are not overlapping names
-dest_cmd_names = [b().cmd_name for b in base_dest_cmd_fns]
-dest_duplicates = [item for item, count in collections.Counter(dest_cmd_names).items() if count > 1]
-if dest_duplicates:
-    raise ValueError(
-        "multiple base dest commands defined with the same names: {}".format(
-            ", ".join(dest_duplicates),
-        ),
-    )
-
-__all__ = [
-    "base_src_cmd_fns",
-    "base_dest_cmd_fns",
-]
diff --git a/unstructured/ingest/cli/cmds/airtable.py b/unstructured/ingest/cli/cmds/airtable.py
deleted file mode 100644
index c7462a707..000000000
--- a/unstructured/ingest/cli/cmds/airtable.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-)
-from unstructured.ingest.connector.airtable import SimpleAirtableConfig
-
-
-@dataclass
-class AirtableCliConfig(SimpleAirtableConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--personal-access-token"],
-                default=None,
-                help="Personal access token to authenticate into Airtable. Check: "
-                "https://support.airtable.com/docs/creating-and-using-api-keys-and-access-tokens "
-                "for more info",
-            ),
-            click.Option(
-                ["--list-of-paths"],
-                default=None,
-                help="""
-        A list of paths that specify the locations to ingest data from within Airtable.
-
-        If this argument is not set, the connector ingests all tables within each and every base.
-        --list-of-paths: path1 path2 path3 ….
-        path: base_id/table_id(optional)/view_id(optional)/
-
-        To obtain (base, table, view) ids in bulk, check:
-        https://airtable.com/developers/web/api/list-bases (base ids)
-        https://airtable.com/developers/web/api/get-base-schema (table and view ids)
-        https://pyairtable.readthedocs.io/en/latest/metadata.html (base, table and view ids)
-
-        To obtain specific ids from Airtable UI, go to your workspace, and copy any
-        relevant id from the URL structure:
-        https://airtable.com/appAbcDeF1ghijKlm/tblABcdEfG1HIJkLm/viwABCDEfg6hijKLM
-        appAbcDeF1ghijKlm -> base_id
-        tblABcdEfG1HIJkLm -> table_id
-        viwABCDEfg6hijKLM -> view_id
-
-        You can also check: https://support.airtable.com/docs/finding-airtable-ids
-
-        Here is an example for one --list-of-paths:
-            base1/		→ gets the entirety of all tables inside base1
-            base1/table1		→ gets all rows and columns within table1 in base1
-            base1/table1/view1	→ gets the rows and columns that are
-                                  visible in view1 for the table1 in base1
-
-        Examples to invalid airtable_paths:
-            table1          → has to mention base to be valid
-            base1/view1     → has to mention table to be valid
-                """,
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="airtable",
-        cli_config=AirtableCliConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/astradb.py b/unstructured/ingest/cli/cmds/astradb.py
deleted file mode 100644
index b7be8f56c..000000000
--- a/unstructured/ingest/cli/cmds/astradb.py
+++ /dev/null
@@ -1,99 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.interfaces import CliConfig, Dict
-from unstructured.ingest.connector.astradb import AstraDBWriteConfig, SimpleAstraDBConfig
-
-
-@dataclass
-class AstraDBCliConfig(SimpleAstraDBConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--token"],
-                required=True,
-                type=str,
-                help="Astra DB Token with access to the database.",
-                envvar="ASTRA_DB_APPLICATION_TOKEN",
-                show_envvar=True,
-            ),
-            click.Option(
-                ["--api-endpoint"],
-                required=True,
-                type=str,
-                help="The API endpoint for the Astra DB.",
-                envvar="ASTRA_DB_API_ENDPOINT",
-                show_envvar=True,
-            ),
-            click.Option(
-                ["--collection-name"],
-                required=False,
-                type=str,
-                help="The name of the Astra DB collection. "
-                "Note that the collection name must only include letters, "
-                "numbers, and underscores.",
-            ),
-            click.Option(
-                ["--namespace"],
-                required=False,
-                default=None,
-                type=str,
-                help="The Astra DB connection namespace.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class AstraDBCliWriteConfig(AstraDBWriteConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--embedding-dimension"],
-                required=True,
-                default=384,
-                type=int,
-                help="The dimensionality of the embeddings",
-            ),
-            click.Option(
-                ["--requested-indexing-policy"],
-                required=False,
-                default=None,
-                type=Dict(),
-                help="The indexing policy to use for the collection."
-                'example: \'{"deny": ["metadata"]}\' ',
-            ),
-            click.Option(
-                ["--batch-size"],
-                default=20,
-                type=int,
-                help="Number of records per batch",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd():
-    from unstructured.ingest.cli.base.src import BaseSrcCmd
-
-    cmd_cls = BaseSrcCmd(
-        cmd_name="astradb",
-        cli_config=AstraDBCliConfig,
-    )
-    return cmd_cls
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name="astradb",
-        cli_config=AstraDBCliConfig,
-        additional_cli_options=[AstraDBCliWriteConfig],
-        write_config=AstraDBWriteConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/azure_cognitive_search.py b/unstructured/ingest/cli/cmds/azure_cognitive_search.py
deleted file mode 100644
index 029519fb8..000000000
--- a/unstructured/ingest/cli/cmds/azure_cognitive_search.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-)
-from unstructured.ingest.connector.azure_cognitive_search import (
-    AzureCognitiveSearchWriteConfig,
-    SimpleAzureCognitiveSearchStorageConfig,
-)
-
-
-@dataclass
-class AzureCognitiveSearchCliConfig(SimpleAzureCognitiveSearchStorageConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--key"],
-                required=True,
-                type=str,
-                help="Key credential used for authenticating to an Azure service.",
-                envvar="AZURE_SEARCH_API_KEY",
-                show_envvar=True,
-            ),
-            click.Option(
-                ["--endpoint"],
-                required=True,
-                type=str,
-                help="The URL endpoint of an Azure search service. "
-                "In the form of https://{{service_name}}.search.windows.net",
-                envvar="AZURE_SEARCH_ENDPOINT",
-                show_envvar=True,
-            ),
-        ]
-        return options
-
-
-@dataclass
-class AzureCognitiveSearchCliWriteConfig(AzureCognitiveSearchWriteConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--index"],
-                required=True,
-                type=str,
-                help="The name of the index to connect to",
-            ),
-        ]
-        return options
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name="azure-cognitive-search",
-        cli_config=AzureCognitiveSearchCliConfig,
-        additional_cli_options=[AzureCognitiveSearchCliWriteConfig],
-        write_config=AzureCognitiveSearchCliWriteConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/biomed.py b/unstructured/ingest/cli/cmds/biomed.py
deleted file mode 100644
index bafe403f3..000000000
--- a/unstructured/ingest/cli/cmds/biomed.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-)
-from unstructured.ingest.connector.biomed import SimpleBiomedConfig
-
-
-@dataclass
-class BiomedCliConfig(SimpleBiomedConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--api-id"],
-                default=None,
-                help="ID parameter for OA Web Service API.",
-            ),
-            click.Option(
-                ["--api-from"],
-                default=None,
-                help="From parameter for OA Web Service API.",
-            ),
-            click.Option(
-                ["--api-until"],
-                default=None,
-                help="Until parameter for OA Web Service API.",
-            ),
-            click.Option(
-                ["--path"],
-                default=None,
-                help="PMC Open Access FTP Directory Path.",
-            ),
-            click.Option(
-                ["--max-request-time"],
-                default=45,
-                help="(In seconds) Max request time to OA Web Service API.",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="biomed",
-        cli_config=BiomedCliConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/chroma.py b/unstructured/ingest/cli/cmds/chroma.py
deleted file mode 100644
index c4a5cbcce..000000000
--- a/unstructured/ingest/cli/cmds/chroma.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.interfaces import CliConfig, Dict
-from unstructured.ingest.connector.chroma import ChromaWriteConfig, SimpleChromaConfig
-
-
-@dataclass
-class ChromaCliConfig(SimpleChromaConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--path"],
-                required=False,
-                type=str,
-                help="Location where Chroma is persisted," "if not connecting via http.",
-            ),
-            click.Option(
-                ["--settings"],
-                required=False,
-                type=Dict(),
-                help="A dictionary of settings to communicate with the chroma server."
-                'example: \'{"persist_directory":"./chroma-persist"}\' ',
-            ),
-            click.Option(
-                ["--tenant"],
-                required=False,
-                default="default_tenant",
-                type=str,
-                help="The tenant to use for this client. Chroma defaults to 'default_tenant'.",
-            ),
-            click.Option(
-                ["--database"],
-                required=False,
-                default="default_database",
-                type=str,
-                help="The database to use for this client."
-                "Chroma defaults to 'default_database'.",
-            ),
-            click.Option(
-                ["--host"],
-                required=False,
-                type=str,
-                help="The hostname of the Chroma server.",
-            ),
-            click.Option(
-                ["--port"],
-                required=False,
-                type=int,
-                help="The port of the Chroma server.",
-            ),
-            click.Option(
-                ["--ssl"],
-                required=False,
-                default=False,
-                is_flag=True,
-                type=bool,
-                help="Whether to use SSL to connect to the Chroma server.",
-            ),
-            click.Option(
-                ["--headers"],
-                required=False,
-                type=Dict(),
-                help="A dictionary of headers to send to the Chroma server."
-                'example: \'{"Authorization":"Basic()"}\' ',
-            ),
-            click.Option(
-                ["--collection-name"],
-                required=True,
-                type=str,
-                help="The name of the Chroma collection to write into.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class ChromaCliWriteConfig(ChromaWriteConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--batch-size"],
-                default=100,
-                type=int,
-                help="Number of records per batch",
-            ),
-        ]
-        return options
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name="chroma",
-        cli_config=ChromaCliConfig,
-        additional_cli_options=[ChromaCliWriteConfig],
-        write_config=ChromaWriteConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/clarifai.py b/unstructured/ingest/cli/cmds/clarifai.py
deleted file mode 100644
index 23178d172..000000000
--- a/unstructured/ingest/cli/cmds/clarifai.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.interfaces import CliConfig
-from unstructured.ingest.connector.clarifai import (
-    ClarifaiWriteConfig,
-    SimpleClarifaiConfig,
-)
-
-CMD_NAME = "clarifai"
-
-
-@dataclass
-class ClarifaiCliConfig(SimpleClarifaiConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--api-key"],
-                required=True,
-                type=str,
-                help="The CLARIFAI_PAT of the user to access clarifai platform apps and models",
-                envvar="CLARIFAI_PAT",
-                show_envvar=True,
-            ),
-            click.Option(
-                ["--app-id"],
-                required=True,
-                type=str,
-                help="Clarifai app name/id",
-            ),
-            click.Option(
-                ["--user-id"],
-                required=True,
-                type=str,
-                help="Clarifai User name/ID",
-            ),
-            click.Option(
-                ["--dataset-id"], type=str, default=None, help="Clarifai App Dataset ID (optional)"
-            ),
-        ]
-        return options
-
-
-@dataclass
-class ClarifaiCliWriteConfig(ClarifaiWriteConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.option]:
-        options = [
-            click.Option(
-                ["--batch-size"],
-                type=int,
-                default=50,
-                help="No of inputs upload per batch",
-            ),
-        ]
-        return options
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name=CMD_NAME,
-        cli_config=ClarifaiCliConfig,
-        additional_cli_options=[ClarifaiCliWriteConfig],
-        write_config=ClarifaiWriteConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/confluence.py b/unstructured/ingest/cli/cmds/confluence.py
deleted file mode 100644
index 1fc43d2ae..000000000
--- a/unstructured/ingest/cli/cmds/confluence.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-    DelimitedString,
-)
-from unstructured.ingest.connector.confluence import SimpleConfluenceConfig
-
-
-@dataclass
-class ConfluenceCliConfig(SimpleConfluenceConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--api-token"],
-                required=True,
-                help="API Token to authenticate into Confluence Cloud. "
-                "Check "
-                "https://developer.atlassian.com/cloud/confluence/basic-auth-for-rest-apis/ "
-                "for more info.",
-            ),
-            click.Option(
-                ["--url"],
-                required=True,
-                help='URL to Confluence Cloud, e.g. "unstructured-ingest-test.atlassian.net"',
-            ),
-            click.Option(
-                ["--user-email"],
-                required=True,
-                help="Email to authenticate into Confluence Cloud",
-            ),
-            click.Option(
-                ["--spaces"],
-                default=None,
-                type=DelimitedString(),
-                help="A list of confluence space ids to be fetched. From each fetched space, "
-                "--num-of-docs-from-each-space number of docs will be ingested. "
-                "--spaces and --num-of-spaces cannot be used at the same time",
-            ),
-            click.Option(
-                ["--max-num-of-docs-from-each-space"],
-                default=100,
-                help="Number of documents to be aimed to be ingested from each fetched "
-                "confluence space. If any space has fewer documents, all the documents from "
-                "that space will be ingested. Documents are not necessarily "
-                "ingested in order of creation date.",
-            ),
-            click.Option(
-                ["--max-num-of-spaces"],
-                default=500,
-                help="Number of confluence space ids to be fetched. From each fetched space, "
-                "--num-of-docs-from-each-space number of docs will be ingested. "
-                "--spaces and --num-of-spaces cannot be used at the same time",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="confluence",
-        cli_config=ConfluenceCliConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/databricks_volumes.py b/unstructured/ingest/cli/cmds/databricks_volumes.py
deleted file mode 100644
index faea5e0d4..000000000
--- a/unstructured/ingest/cli/cmds/databricks_volumes.py
+++ /dev/null
@@ -1,163 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.interfaces import CliConfig
-from unstructured.ingest.connector.databricks_volumes import (
-    DatabricksVolumesWriteConfig,
-    SimpleDatabricksVolumesConfig,
-)
-
-CMD_NAME = "databricks-volumes"
-
-
-@dataclass
-class DatabricksVolumesCliConfig(SimpleDatabricksVolumesConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--host"],
-                type=str,
-                default=None,
-                help="The Databricks host URL for either the "
-                "Databricks workspace endpoint or the "
-                "Databricks accounts endpoint.",
-            ),
-            click.Option(
-                ["--account-id"],
-                type=str,
-                default=None,
-                help="The Databricks account ID for the Databricks "
-                "accounts endpoint. Only has effect when Host is "
-                "either https://accounts.cloud.databricks.com/ (AWS), "
-                "https://accounts.azuredatabricks.net/ (Azure), "
-                "or https://accounts.gcp.databricks.com/ (GCP).",
-            ),
-            click.Option(
-                ["--username"],
-                type=str,
-                default=None,
-                help="The Databricks username part of basic authentication. "
-                "Only possible when Host is *.cloud.databricks.com (AWS).",
-            ),
-            click.Option(
-                ["--password"],
-                type=str,
-                default=None,
-                help="The Databricks password part of basic authentication. "
-                "Only possible when Host is *.cloud.databricks.com (AWS).",
-            ),
-            click.Option(["--client-id"], type=str, default=None),
-            click.Option(["--client-secret"], type=str, default=None),
-            click.Option(
-                ["--token"],
-                type=str,
-                default=None,
-                help="The Databricks personal access token (PAT) (AWS, Azure, and GCP) or "
-                "Azure Active Directory (Azure AD) token (Azure).",
-            ),
-            click.Option(
-                ["--azure-workspace-resource-id"],
-                type=str,
-                default=None,
-                help="The Azure Resource Manager ID for the Azure Databricks workspace, "
-                "which is exchanged for a Databricks host URL.",
-            ),
-            click.Option(
-                ["--azure-client-secret"],
-                type=str,
-                default=None,
-                help="The Azure AD service principal’s client secret.",
-            ),
-            click.Option(
-                ["--azure-client-id"],
-                type=str,
-                default=None,
-                help="The Azure AD service principal’s application ID.",
-            ),
-            click.Option(
-                ["--azure-tenant-id"],
-                type=str,
-                default=None,
-                help="The Azure AD service principal’s tenant ID.",
-            ),
-            click.Option(
-                ["--azure-environment"],
-                type=str,
-                default=None,
-                help="The Azure environment type (such as Public, UsGov, China, and Germany) for a "
-                "specific set of API endpoints. Defaults to PUBLIC.",
-            ),
-            click.Option(
-                ["--auth-type"],
-                type=str,
-                default=None,
-                help="When multiple auth attributes are available in the "
-                "environment, use the auth type specified by this "
-                "argument. This argument also holds the currently "
-                "selected auth.",
-            ),
-            click.Option(["--cluster-id"], type=str, default=None),
-            click.Option(["--google-credentials"], type=str, default=None),
-            click.Option(["--google-service-account"], type=str, default=None),
-        ]
-        return options
-
-
-@dataclass
-class DatabricksVolumesCliWriteConfig(DatabricksVolumesWriteConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--volume"], type=str, required=True, help="Name of volume in the Unity Catalog"
-            ),
-            click.Option(
-                ["--catalog"],
-                type=str,
-                required=True,
-                help="Name of the catalog in the Databricks Unity Catalog service",
-            ),
-            click.Option(
-                ["--volume-path"],
-                type=str,
-                required=False,
-                default=None,
-                help="Optional path within the volume to write to",
-            ),
-            click.Option(
-                ["--overwrite"],
-                type=bool,
-                is_flag=True,
-                help="If true, an existing file will be overwritten.",
-            ),
-            click.Option(
-                ["--encoding"],
-                type=str,
-                required=True,
-                default="utf-8",
-                help="Encoding applied to the data when written to the volume",
-            ),
-            click.Option(
-                ["--schema"],
-                type=str,
-                required=True,
-                default="default",
-                help="Schema associated with the volume to write to in the Unity Catalog service",
-            ),
-        ]
-        return options
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name=CMD_NAME,
-        cli_config=DatabricksVolumesCliConfig,
-        additional_cli_options=[DatabricksVolumesCliWriteConfig],
-        write_config=DatabricksVolumesWriteConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/delta_table.py b/unstructured/ingest/cli/cmds/delta_table.py
deleted file mode 100644
index 8504c09b0..000000000
--- a/unstructured/ingest/cli/cmds/delta_table.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import CliConfig, Dict
-from unstructured.ingest.connector.delta_table import DeltaTableWriteConfig, SimpleDeltaTableConfig
-
-CMD_NAME = "delta-table"
-
-
-@dataclass
-class DeltaTableCliConfig(SimpleDeltaTableConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--table-uri"],
-                required=True,
-                help="the path of the DeltaTable",
-            ),
-            click.Option(
-                ["--version"],
-                default=None,
-                type=int,
-                help="version of the DeltaTable",
-            ),
-            click.Option(
-                ["--storage_options"],
-                required=False,
-                type=Dict(),
-                default=None,
-                help="a dictionary of the options to use for the storage backend, "
-                "passed in as a json string",
-            ),
-            click.Option(
-                ["--without-files"],
-                is_flag=True,
-                default=False,
-                help="If set, will load table without tracking files.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class DeltaTableCliWriteConfig(DeltaTableWriteConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--overwrite-schema"],
-                is_flag=True,
-                default=False,
-                help="Flag to overwrite schema of destination table",
-            ),
-            click.Option(
-                ["--drop-empty-cols"],
-                is_flag=True,
-                default=False,
-                help="Flag to drop any columns that have no content",
-            ),
-            click.Option(
-                ["--mode"],
-                default="error",
-                type=click.Choice(["error", "append", "overwrite", "ignore"]),
-                help="How to handle existing data. Default is to error if table already exists. "
-                "If 'append', will add new data. "
-                "If 'overwrite', will replace table with new data. "
-                "If 'ignore', will not write anything if table already exists.",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name=CMD_NAME,
-        cli_config=DeltaTableCliConfig,
-    )
-    return cmd_cls
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name=CMD_NAME,
-        cli_config=DeltaTableCliConfig,
-        additional_cli_options=[DeltaTableCliWriteConfig],
-        write_config=DeltaTableWriteConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/discord.py b/unstructured/ingest/cli/cmds/discord.py
deleted file mode 100644
index 115745a6a..000000000
--- a/unstructured/ingest/cli/cmds/discord.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-    DelimitedString,
-)
-from unstructured.ingest.connector.discord import SimpleDiscordConfig
-
-
-@dataclass
-class DiscordCliConfig(SimpleDiscordConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--token"],
-                required=True,
-                help="Bot token used to access Discord API, must have "
-                "READ_MESSAGE_HISTORY scope for the bot user",
-            ),
-            click.Option(
-                ["--channels"],
-                required=True,
-                type=DelimitedString(),
-                help="Comma-delimited list of discord channel ids to ingest from.",
-            ),
-            click.Option(
-                ["--period"],
-                default=None,
-                type=click.IntRange(0),
-                help="Number of days to go back in the history of "
-                "discord channels, must be a number",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="discord",
-        cli_config=DiscordCliConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/elasticsearch.py b/unstructured/ingest/cli/cmds/elasticsearch.py
deleted file mode 100644
index 58e3ec4d6..000000000
--- a/unstructured/ingest/cli/cmds/elasticsearch.py
+++ /dev/null
@@ -1,133 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import CliConfig, DelimitedString
-from unstructured.ingest.connector.elasticsearch import (
-    ElasticsearchWriteConfig,
-    SimpleElasticsearchConfig,
-)
-
-CMD_NAME = "elasticsearch"
-
-
-@dataclass
-class ElasticsearchCliConfig(SimpleElasticsearchConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--index-name"],
-                required=True,
-                type=str,
-                help="Name of the Elasticsearch index to pull data from, or upload data to.",
-            ),
-            click.Option(
-                ["--hosts"],
-                type=DelimitedString(),
-                help='List of the Elasticsearch hosts to connect to, e.g. "http://localhost:9200"',
-            ),
-            click.Option(
-                ["--fields"],
-                type=DelimitedString(),
-                default=[],
-                help="If provided, will limit the fields returned by Elasticsearch "
-                "to this comma-delimited list",
-            ),
-            click.Option(
-                ["--username"], type=str, default=None, help="username when using basic auth"
-            ),
-            click.Option(
-                ["--password"],
-                type=str,
-                default=None,
-                help="password when using basic auth or connecting to a cloud instance",
-            ),
-            click.Option(
-                ["--cloud-id"], type=str, default=None, help="id used to connect to Elastic Cloud"
-            ),
-            click.Option(
-                ["--es-api-key"], type=str, default=None, help="api key used for authentication"
-            ),
-            click.Option(
-                ["--api-key-id"],
-                type=str,
-                default=None,
-                help="id associated with api key used for authentication: "
-                "https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html",  # noqa: E501
-            ),
-            click.Option(
-                ["--bearer-auth"],
-                type=str,
-                default=None,
-                help="bearer token used for HTTP bearer authentication",
-            ),
-            click.Option(
-                ["--ca-certs"],
-                type=click.Path(),
-                default=None,
-            ),
-            click.Option(
-                ["--ssl-assert-fingerprint"],
-                type=str,
-                default=None,
-                help="SHA256 fingerprint value",
-            ),
-            click.Option(
-                ["--batch-size"],
-                default=100,
-                type=click.IntRange(0),
-                help="how many records to read at a time per process",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class ElasticsearchCliWriteConfig(ElasticsearchWriteConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--batch-size-bytes"],
-                required=False,
-                default=15_000_000,
-                type=int,
-                help="Size limit (in bytes) for each batch of items to be uploaded. Check"
-                " https://www.elastic.co/guide/en/elasticsearch/guide/current/bulk.html"
-                "#_how_big_is_too_big for more information.",
-            ),
-            click.Option(
-                ["--num-processes"],
-                required=False,
-                default=1,
-                type=int,
-                help="Number of processes to be used while uploading content",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="elasticsearch",
-        cli_config=ElasticsearchCliConfig,
-    )
-    return cmd_cls
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name="elasticsearch",
-        cli_config=ElasticsearchCliConfig,
-        additional_cli_options=[ElasticsearchCliWriteConfig],
-        addition_configs={
-            "connector_config": SimpleElasticsearchConfig,
-            "write_config": ElasticsearchCliWriteConfig,
-        },
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/fsspec/__init__.py b/unstructured/ingest/cli/cmds/fsspec/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/unstructured/ingest/cli/cmds/fsspec/azure.py b/unstructured/ingest/cli/cmds/fsspec/azure.py
deleted file mode 100644
index 0d5f04344..000000000
--- a/unstructured/ingest/cli/cmds/fsspec/azure.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-)
-from unstructured.ingest.connector.fsspec.azure import (
-    AzureWriteConfig,
-    SimpleAzureBlobStorageConfig,
-)
-
-CMD_NAME = "azure"
-
-
-@dataclass
-class AzureCliConfig(SimpleAzureBlobStorageConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--account-key"],
-                default=None,
-                help="The storage account key. This is used for shared key "
-                "authentication. If any of account key, sas token or "
-                "client_id are not specified, anonymous access will be used.",
-            ),
-            click.Option(
-                ["--account-name"],
-                default=None,
-                help="The storage account name. This is used to authenticate "
-                "requests signed with an account key and to construct "
-                "the storage endpoint. It is required unless a connection "
-                "string is given, or if a custom domain is used with "
-                "anonymous authentication.",
-            ),
-            click.Option(
-                ["--connection-string"],
-                default=None,
-                help="If specified, this will override all other parameters. See "
-                "http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/ "  # noqa: E501
-                "for the connection string format.",
-            ),
-            click.Option(
-                ["--sas_token"],
-                default=None,
-                help="A shared access signature token to use to authenticate "
-                "requests instead of the account key. If account key and "
-                "sas token are both specified, account key will be used "
-                "to sign. If any of account key, sas token or client_id "
-                "are not specified, anonymous access will be used.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class AzureCliWriteConfig(AzureWriteConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--overwrite"],
-                is_flag=True,
-                default=False,
-                show_default=True,
-                help="If set, will overwrite content if content already exists",
-            )
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name=CMD_NAME,
-        cli_config=AzureCliConfig,
-        is_fsspec=True,
-    )
-    return cmd_cls
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name=CMD_NAME,
-        cli_config=AzureCliConfig,
-        write_config=AzureCliWriteConfig,
-        is_fsspec=True,
-        additional_cli_options=[AzureCliWriteConfig],
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/fsspec/box.py b/unstructured/ingest/cli/cmds/fsspec/box.py
deleted file mode 100644
index 0d7976350..000000000
--- a/unstructured/ingest/cli/cmds/fsspec/box.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-)
-from unstructured.ingest.connector.fsspec.box import BoxWriteConfig, SimpleBoxConfig
-
-CMD_NAME = "box"
-
-
-@dataclass
-class BoxCliConfig(SimpleBoxConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--box-app-config"],
-                default=None,
-                type=click.Path(),
-                help="Path to Box app credentials as json file.",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name=CMD_NAME,
-        cli_config=BoxCliConfig,
-        is_fsspec=True,
-    )
-    return cmd_cls
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name=CMD_NAME,
-        cli_config=BoxCliConfig,
-        write_config=BoxWriteConfig,
-        is_fsspec=True,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/fsspec/dropbox.py b/unstructured/ingest/cli/cmds/fsspec/dropbox.py
deleted file mode 100644
index 247643016..000000000
--- a/unstructured/ingest/cli/cmds/fsspec/dropbox.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-)
-from unstructured.ingest.connector.fsspec.dropbox import (
-    DropboxWriteConfig,
-    SimpleDropboxConfig,
-)
-
-CMD_NAME = "dropbox"
-
-
-@dataclass
-class DropboxCliConfig(SimpleDropboxConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--token"],
-                required=True,
-                type=str,
-                help="Dropbox access token.",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name=CMD_NAME,
-        cli_config=DropboxCliConfig,
-        is_fsspec=True,
-    )
-    return cmd_cls
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name=CMD_NAME,
-        cli_config=DropboxCliConfig,
-        write_config=DropboxWriteConfig,
-        is_fsspec=True,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/fsspec/fsspec.py b/unstructured/ingest/cli/cmds/fsspec/fsspec.py
deleted file mode 100644
index e2d50a278..000000000
--- a/unstructured/ingest/cli/cmds/fsspec/fsspec.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-
-CMD_NAME = "fsspec"
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(cmd_name=CMD_NAME, is_fsspec=True)
-    return cmd_cls
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(cmd_name=CMD_NAME, is_fsspec=True)
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/fsspec/gcs.py b/unstructured/ingest/cli/cmds/fsspec/gcs.py
deleted file mode 100644
index 4664694a7..000000000
--- a/unstructured/ingest/cli/cmds/fsspec/gcs.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-    FileOrJson,
-)
-from unstructured.ingest.connector.fsspec.gcs import GcsWriteConfig, SimpleGcsConfig
-
-CMD_NAME = "gcs"
-
-
-@dataclass
-class GcsCliConfig(SimpleGcsConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        help_string = """
-        Options:
-        - ``None``, GCSFS will attempt to guess your credentials in the
-      following order: gcloud CLI default, gcsfs cached token, google compute
-      metadata service, anonymous.
-        - ``'google_default'``, your default gcloud credentials will be used,
-          which are typically established by doing ``gcloud login`` in a terminal.
-        - ``'cache'``, credentials from previously successful gcsfs
-          authentication will be used (use this after "browser" auth succeeded)
-        - ``'anon'``, no authentication is performed, and you can only
-          access data which is accessible to allUsers (in this case, the project and
-          access level parameters are meaningless)
-        - ``'browser'``, you get an access code with which you can
-          authenticate via a specially provided URL
-        - if ``'cloud'``, we assume we are running within google compute
-          or google container engine, and query the internal metadata directly for
-          a token.
-        - you may supply a token generated by the
-          [gcloud](https://cloud.google.com/sdk/docs/)
-          utility; this is either a python dictionary or the name of a file
-          containing the JSON returned by logging in with the gcloud CLI tool.
-        """
-        options = [
-            click.Option(
-                ["--service-account-key"],
-                default=None,
-                type=FileOrJson(allow_raw_str=True),
-                help=help_string,
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name=CMD_NAME,
-        cli_config=GcsCliConfig,
-        is_fsspec=True,
-    )
-    return cmd_cls
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name=CMD_NAME,
-        cli_config=GcsCliConfig,
-        write_config=GcsWriteConfig,
-        is_fsspec=True,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/fsspec/s3.py b/unstructured/ingest/cli/cmds/fsspec/s3.py
deleted file mode 100644
index a185fa2e1..000000000
--- a/unstructured/ingest/cli/cmds/fsspec/s3.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-)
-from unstructured.ingest.connector.fsspec.s3 import S3WriteConfig, SimpleS3Config
-
-CMD_NAME = "s3"
-
-
-@dataclass
-class S3CliConfig(SimpleS3Config, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--anonymous"],
-                is_flag=True,
-                default=False,
-                help="Connect to s3 without local AWS credentials.",
-            ),
-            click.Option(
-                ["--endpoint-url"],
-                type=str,
-                default=None,
-                help="Use this endpoint_url, if specified. Needed for "
-                "connecting to non-AWS S3 buckets.",
-            ),
-            click.Option(
-                ["--key"],
-                type=str,
-                default=None,
-                help="If not anonymous, use this access key ID, if specified. Takes precedence "
-                "over `aws_access_key_id` in client_kwargs.",
-            ),
-            click.Option(
-                ["--secret"],
-                type=str,
-                default=None,
-                help="If not anonymous, use this secret access key, if specified.",
-            ),
-            click.Option(
-                ["--token"],
-                type=str,
-                default=None,
-                help="If not anonymous, use this security token, if specified.",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd():
-    cmd_cls = BaseSrcCmd(
-        cmd_name=CMD_NAME,
-        cli_config=S3CliConfig,
-        is_fsspec=True,
-    )
-    return cmd_cls
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name=CMD_NAME,
-        cli_config=S3CliConfig,
-        write_config=S3WriteConfig,
-        is_fsspec=True,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/fsspec/sftp.py b/unstructured/ingest/cli/cmds/fsspec/sftp.py
deleted file mode 100644
index 01f7c615a..000000000
--- a/unstructured/ingest/cli/cmds/fsspec/sftp.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-)
-from unstructured.ingest.connector.fsspec.sftp import SimpleSftpConfig
-
-CMD_NAME = "sftp"
-
-
-@dataclass
-class SftpCliConfig(SimpleSftpConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--username"],
-                required=True,
-                type=str,
-                help="Username for sftp connection",
-            ),
-            click.Option(
-                ["--password"],
-                required=True,
-                type=str,
-                help="Password for sftp connection",
-            ),
-            click.Option(
-                ["--look-for-keys"],
-                required=False,
-                default=False,
-                is_flag=True,
-                type=bool,
-                help="Whether to search for private key files in ~/.ssh/",
-            ),
-            click.Option(
-                ["--allow-agent"],
-                required=False,
-                default=False,
-                is_flag=True,
-                type=bool,
-                help="Whether to connect to the SSH agent.",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name=CMD_NAME,
-        cli_config=SftpCliConfig,
-        is_fsspec=True,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/github.py b/unstructured/ingest/cli/cmds/github.py
deleted file mode 100644
index bb3f1b7f0..000000000
--- a/unstructured/ingest/cli/cmds/github.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import CliConfig, DelimitedString
-from unstructured.ingest.connector.github import SimpleGitHubConfig
-
-
-@dataclass
-class GithubCliConfig(SimpleGitHubConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--url"],
-                required=True,
-                type=str,
-                help="URL to GitHub repository, e.g. "
-                '"https://github.com/Unstructured-IO/unstructured", or '
-                'a repository owner/name pair, e.g. "Unstructured-IO/unstructured"',
-            ),
-            click.Option(
-                ["--git-access-token"],
-                default=None,
-                help="A GitHub or GitLab access token, "
-                "see https://docs.github.com/en/authentication or "
-                "https://docs.gitlab.com/ee/api/rest/index.html#personalprojectgroup-access-tokens",
-            ),
-            click.Option(
-                ["--git-branch"],
-                default=None,
-                type=str,
-                help="The branch for which to fetch files from. If not given,"
-                " the default repository branch is used.",
-            ),
-            click.Option(
-                ["--git-file-glob"],
-                default=None,
-                type=DelimitedString(),
-                help="A comma-separated list of file globs to limit which "
-                "types of files are accepted, e.g. '*.html,*.txt'",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="github",
-        cli_config=GithubCliConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/gitlab.py b/unstructured/ingest/cli/cmds/gitlab.py
deleted file mode 100644
index 5f01c4201..000000000
--- a/unstructured/ingest/cli/cmds/gitlab.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import CliConfig, DelimitedString
-from unstructured.ingest.connector.gitlab import SimpleGitlabConfig
-
-
-@dataclass
-class GitlabCliConfig(SimpleGitlabConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--url"],
-                required=True,
-                type=str,
-                help="URL to GitHub repository, e.g. "
-                '"https://github.com/Unstructured-IO/unstructured", or '
-                'a repository owner/name pair, e.g. "Unstructured-IO/unstructured"',
-            ),
-            click.Option(
-                ["--git-access-token"],
-                default=None,
-                help="A GitHub or GitLab access token, "
-                "see https://docs.github.com/en/authentication or "
-                "https://docs.gitlab.com/ee/api/rest/index.html#personalprojectgroup-access-tokens",
-            ),
-            click.Option(
-                ["--git-branch"],
-                default=None,
-                type=str,
-                help="The branch for which to fetch files from. If not given,"
-                " the default repository branch is used.",
-            ),
-            click.Option(
-                ["--git-file-glob"],
-                default=None,
-                type=DelimitedString(),
-                help="A comma-separated list of file globs to limit which types of "
-                "files are accepted, e.g. '*.html,*.txt'",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="gitlab",
-        cli_config=GitlabCliConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/google_drive.py b/unstructured/ingest/cli/cmds/google_drive.py
deleted file mode 100644
index 6fc9b1930..000000000
--- a/unstructured/ingest/cli/cmds/google_drive.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-    CliRecursiveConfig,
-    FileOrJson,
-)
-from unstructured.ingest.connector.google_drive import SimpleGoogleDriveConfig
-
-
-@dataclass
-class GoogleDriveCliConfig(SimpleGoogleDriveConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--drive-id"],
-                required=True,
-                type=str,
-                help="Google Drive File or Folder ID.",
-            ),
-            click.Option(
-                ["--service-account-key"],
-                required=True,
-                type=FileOrJson(),
-                help="Either the file path of the credentials file to use or a json string of "
-                "those values to use for authentication",
-            ),
-            click.Option(
-                ["--extension"],
-                default=None,
-                type=str,
-                help="Filters the files to be processed based on extension e.g. .jpg, .docx, etc.",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="google-drive",
-        cli_config=GoogleDriveCliConfig,
-        additional_cli_options=[CliRecursiveConfig],
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/hubspot.py b/unstructured/ingest/cli/cmds/hubspot.py
deleted file mode 100644
index 219973cb7..000000000
--- a/unstructured/ingest/cli/cmds/hubspot.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import CliMixin, DelimitedString, Dict
-from unstructured.ingest.connector.hubspot import HubSpotObjectTypes, SimpleHubSpotConfig
-
-OBJECT_TYPES = {t.value for t in HubSpotObjectTypes}
-
-
-def validate_custom_property(ctx, param, value) -> t.Dict[str, t.List[str]]:
-    if not value:
-        return value
-    for k in value:
-        if k not in OBJECT_TYPES:
-            raise ValueError(f"Invalid object type: {k}, must be one of {OBJECT_TYPES}")
-        if not isinstance(value[k], list):
-            raise ValueError(f"Invalid type: {type(value[k])}, must be a Python list.")
-    return value
-
-
-@dataclass
-class HubSpotCliConfig(SimpleHubSpotConfig, CliMixin):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--api-token"],
-                required=True,
-                type=str,
-                help="Access token to perform operations on Hubspot. \
-                    Check \
-                    https://developers.hubspot.com/docs/api/private-apps/ \
-                    for more info",
-            ),
-            click.Option(
-                ["--object-types"],
-                default=None,
-                required=False,
-                type=DelimitedString(choices=OBJECT_TYPES),
-                is_flag=False,
-                help=f"Object to include in the process.\
-                    Must be a subset of {','.join(OBJECT_TYPES)}.\
-                    If the argument is omitted all objects listed will be processed.",
-            ),
-            click.Option(
-                ["--custom-properties"],
-                default=None,
-                required=False,
-                type=Dict(),
-                is_flag=False,
-                callback=validate_custom_property,
-                help="Custom property to process information from.\
-                    It should be a json-like string in the form\
-                        <object_type>:[<custom_property_id>, ..., <custom_property_id>]\
-                    Must be internal name of the variable. If the property is missing, \
-                        it will be omitted.",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="hubspot",
-        cli_config=HubSpotCliConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/jira.py b/unstructured/ingest/cli/cmds/jira.py
deleted file mode 100644
index 74b2d5356..000000000
--- a/unstructured/ingest/cli/cmds/jira.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-    DelimitedString,
-)
-from unstructured.ingest.connector.jira import SimpleJiraConfig
-
-
-@dataclass
-class JiraCliConfig(SimpleJiraConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--api-token"],
-                required=True,
-                type=str,
-                help="API Token to authenticate into Jira (into Atlassian). \
-                    Check \
-                    https://developer.atlassian.com/cloud/jira/platform/basic-auth-for-rest-apis/ \
-                    for more info.",
-            ),
-            click.Option(
-                ["--url"],
-                required=True,
-                type=str,
-                help="URL to Atlassian (Jira) Cloud, e.g. "
-                '"unstructured-jira-connector-test.atlassian.net"',
-            ),
-            click.Option(
-                ["--user-email"],
-                required=True,
-                type=str,
-                help="Email to authenticate into Atlassian (Jira) Cloud.",
-            ),
-            click.Option(
-                ["--projects"],
-                default=None,
-                type=DelimitedString(),
-                help="Comma-delimited Project ids or keys. Use Jira UI or the "
-                "API to find or obtain keys. Alternatively, use API to obtain ids.",
-            ),
-            click.Option(
-                ["--boards"],
-                default=None,
-                type=DelimitedString(),
-                help="Comma-delimited Board ids. Check board URL, or use the "
-                "API to find the board ids.",
-            ),
-            click.Option(
-                ["--issues"],
-                default=None,
-                type=DelimitedString(),
-                help="Comma-delimited Issue ids or keys. Use Jira UI or the API to "
-                "find or obtain keys. Alternatively, use API to obtain ids.",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="jira",
-        cli_config=JiraCliConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/kafka.py b/unstructured/ingest/cli/cmds/kafka.py
deleted file mode 100644
index afbad4888..000000000
--- a/unstructured/ingest/cli/cmds/kafka.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import CliConfig
-from unstructured.ingest.connector.kafka import KafkaWriteConfig, SimpleKafkaConfig
-
-CMD_NAME = "kafka"
-
-
-@dataclass
-class KafkaCliConfig(SimpleKafkaConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--bootstrap-server"], required=True, type=str, help="Broker server hostname"
-            ),
-            click.Option(
-                ["--port"],
-                required=True,
-                type=str,
-                help="The bootstrap port",
-            ),
-            click.Option(
-                ["--topic"],
-                required=True,
-                type=str,
-                help="The topic to write into.'",
-            ),
-            click.Option(
-                ["--kafka-api-key"],
-                required=False,
-                type=str,
-                help="The API KEY",
-            ),
-            click.Option(
-                ["--secret"],
-                required=False,
-                type=str,
-                help="The secret",
-            ),
-            click.Option(
-                ["--num-messages-to-consume"],
-                required=False,
-                type=int,
-                default=1,
-                help="The number of messages to consume before unblocking the consumer",
-            ),
-            click.Option(
-                ["--timeout"],
-                required=False,
-                type=float,
-                default=1.0,
-                help="Maximum time to block waiting for message(Seconds)",
-            ),
-            click.Option(
-                ["--confluent"],
-                required=False,
-                type=bool,
-                default=True,
-                help="Whether this Kafka instance is from Confluent",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class KafkaCliWriteConfig(KafkaWriteConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--batch-size"],
-                default=4,
-                type=int,
-                help="Number of records per batch",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name=CMD_NAME,
-        cli_config=KafkaCliConfig,
-    )
-    return cmd_cls
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name=CMD_NAME,
-        cli_config=KafkaCliConfig,
-        additional_cli_options=[KafkaCliWriteConfig],
-        write_config=KafkaWriteConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/local.py b/unstructured/ingest/cli/cmds/local.py
deleted file mode 100644
index ff70c44ca..000000000
--- a/unstructured/ingest/cli/cmds/local.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-    CliRecursiveConfig,
-    DelimitedString,
-)
-from unstructured.ingest.connector.local import SimpleLocalConfig
-
-
-@dataclass
-class LocalCliConfig(SimpleLocalConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--input-path"],
-                required=True,
-                type=click.Path(file_okay=True, dir_okay=True, exists=True),
-                help="Path to the location in the local file system that will be processed.",
-            ),
-            click.Option(
-                ["--file-glob"],
-                default=None,
-                type=DelimitedString(),
-                help="A comma-separated list of file globs to limit which types of "
-                "local files are accepted, e.g. '*.html,*.txt'",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="local",
-        cli_config=LocalCliConfig,
-        additional_cli_options=[CliRecursiveConfig],
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/mongodb.py b/unstructured/ingest/cli/cmds/mongodb.py
deleted file mode 100644
index 6fbb5c365..000000000
--- a/unstructured/ingest/cli/cmds/mongodb.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import CliConfig, DelimitedString
-from unstructured.ingest.connector.mongodb import SimpleMongoDBConfig
-from unstructured.ingest.interfaces import WriteConfig
-
-CMD_NAME = "mongodb"
-
-
-@dataclass
-class MongoDBCliConfig(SimpleMongoDBConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--uri"],
-                help="URI to user when connecting",
-            ),
-            click.Option(
-                ["--host"],
-                type=DelimitedString(),
-                help="hostname or IP address or Unix domain socket path of a single mongod or "
-                "mongos instance to connect to, or a list of hostnames",
-            ),
-            click.Option(["--port"], type=int, default=27017),
-            click.Option(
-                ["--database"], type=str, required=True, help="database name to connect to"
-            ),
-            click.Option(
-                ["--collection"], required=True, type=str, help="collection name to connect to"
-            ),
-        ]
-        return options
-
-
-@dataclass
-class MongoDBReadConfig(SimpleMongoDBConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--batch-size"],
-                default=100,
-                type=click.IntRange(0),
-                help="how many records to read at a time per process",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name=CMD_NAME,
-        cli_config=MongoDBCliConfig,
-        additional_cli_options=[MongoDBReadConfig],
-    )
-    return cmd_cls
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name=CMD_NAME,
-        cli_config=MongoDBCliConfig,
-        write_config=WriteConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/notion.py b/unstructured/ingest/cli/cmds/notion.py
deleted file mode 100644
index 02a9a30ed..000000000
--- a/unstructured/ingest/cli/cmds/notion.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-    CliRecursiveConfig,
-    DelimitedString,
-)
-from unstructured.ingest.connector.notion.connector import SimpleNotionConfig
-
-
-@dataclass
-class NotionCliConfig(SimpleNotionConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--notion-api-key"],
-                required=True,
-                type=str,
-                help="API key for Notion api",
-            ),
-            click.Option(
-                ["--page-ids"],
-                default=None,
-                type=DelimitedString(),
-                help="Notion page IDs to pull text from",
-            ),
-            click.Option(
-                ["--database-ids"],
-                default=None,
-                type=DelimitedString(),
-                help="Notion database IDs to pull text from",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="notion",
-        cli_config=NotionCliConfig,
-        additional_cli_options=[CliRecursiveConfig],
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/onedrive.py b/unstructured/ingest/cli/cmds/onedrive.py
deleted file mode 100644
index 5bf671d9f..000000000
--- a/unstructured/ingest/cli/cmds/onedrive.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-    CliRecursiveConfig,
-)
-from unstructured.ingest.connector.onedrive import SimpleOneDriveConfig
-
-
-@dataclass
-class OnedriveCliConfig(SimpleOneDriveConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--client-id"],
-                required=True,
-                type=str,
-                help="Microsoft app client ID",
-            ),
-            click.Option(
-                ["--client-cred"],
-                required=True,
-                type=str,
-                help="Microsoft App client secret",
-            ),
-            click.Option(
-                ["--user-pname"],
-                required=True,
-                type=str,
-                help="User principal name, usually is your Azure AD email.",
-            ),
-            click.Option(
-                ["--tenant"],
-                default="common",
-                type=str,
-                help="ID or domain name associated with your Azure AD instance",
-            ),
-            click.Option(
-                ["--path"],
-                default=None,
-                type=str,
-                help="Folder to start parsing files from.",
-            ),
-            click.Option(
-                ["--authority-url"],
-                default="https://login.microsoftonline.com",
-                type=str,
-                help="Authentication token provider for Microsoft apps, default is "
-                "https://login.microsoftonline.com",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="onedrive",
-        cli_config=OnedriveCliConfig,
-        additional_cli_options=[CliRecursiveConfig],
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/opensearch.py b/unstructured/ingest/cli/cmds/opensearch.py
deleted file mode 100644
index 0f135de15..000000000
--- a/unstructured/ingest/cli/cmds/opensearch.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.cmds.elasticsearch import ElasticsearchCliWriteConfig
-from unstructured.ingest.cli.interfaces import CliConfig, DelimitedString
-from unstructured.ingest.connector.opensearch import SimpleOpenSearchConfig
-
-CMD_NAME = "opensearch"
-
-
-@dataclass
-class OpenSearchCliConfig(SimpleOpenSearchConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--index-name"],
-                required=True,
-                type=str,
-                help="Name of the OpenSearch index to pull data from, or upload data to.",
-            ),
-            click.Option(
-                ["--hosts"],
-                type=DelimitedString(),
-                help='List of the OpenSearch hosts to connect to, e.g. "http://localhost:9200"',
-            ),
-            click.Option(
-                ["--fields"],
-                type=DelimitedString(),
-                default=[],
-                help="If provided, will limit the fields returned by OpenSearch "
-                "to this comma-delimited list",
-            ),
-            click.Option(
-                ["--username"], type=str, default=None, help="username when using basic auth"
-            ),
-            click.Option(
-                ["--password"],
-                type=str,
-                default=None,
-                help="password when using basic auth",
-            ),
-            click.Option(
-                ["--use-ssl"],
-                type=bool,
-                default=False,
-                is_flag=True,
-                help="use ssl for the connection",
-            ),
-            click.Option(
-                ["--verify-certs"],
-                type=bool,
-                default=False,
-                is_flag=True,
-                help="whether to verify SSL certificates",
-            ),
-            click.Option(
-                ["--ssl-show-warn"],
-                type=bool,
-                default=False,
-                is_flag=True,
-                help="show warning when verify certs is disabled",
-            ),
-            click.Option(
-                ["--ca-certs"],
-                type=click.Path(),
-                default=None,
-                help="path to CA bundle",
-            ),
-            click.Option(
-                ["--client-cert"],
-                type=click.Path(),
-                default=None,
-                help="path to the file containing the private key and the certificate,"
-                " or cert only if using client_key",
-            ),
-            click.Option(
-                ["--client-key"],
-                type=click.Path(),
-                default=None,
-                help="path to the file containing the private key"
-                " if using separate cert and key files",
-            ),
-            click.Option(
-                ["--batch-size"],
-                default=100,
-                type=click.IntRange(0),
-                help="how many records to read at a time per process",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="opensearch",
-        cli_config=OpenSearchCliConfig,
-    )
-    return cmd_cls
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name="opensearch",
-        cli_config=OpenSearchCliConfig,
-        additional_cli_options=[ElasticsearchCliWriteConfig],
-        addition_configs={
-            "connector_config": SimpleOpenSearchConfig,
-            "write_config": ElasticsearchCliWriteConfig,
-        },
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/outlook.py b/unstructured/ingest/cli/cmds/outlook.py
deleted file mode 100644
index 7b4e66968..000000000
--- a/unstructured/ingest/cli/cmds/outlook.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-    CliRecursiveConfig,
-    DelimitedString,
-)
-from unstructured.ingest.connector.outlook import SimpleOutlookConfig
-
-
-@dataclass
-class OutlookCliConfig(SimpleOutlookConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--client-id"],
-                required=True,
-                type=str,
-                help="Microsoft app client ID",
-            ),
-            click.Option(
-                ["--user-email"],
-                required=True,
-                type=str,
-                help="Outlook email to download messages from.",
-            ),
-            click.Option(
-                ["--tenant"],
-                default="common",
-                help="ID or domain name associated with your Azure AD instance",
-            ),
-            click.Option(
-                ["--outlook-folders"],
-                default=None,
-                type=DelimitedString(),
-                help="Folders to download email messages from. "
-                "Do not specify subfolders. Use quotes if spaces in folder names.",
-            ),
-            click.Option(
-                ["--client-cred"],
-                default=None,
-                type=str,
-                help="Microsoft App client secret",
-            ),
-            click.Option(
-                ["--authority-url"],
-                default="https://login.microsoftonline.com",
-                type=str,
-                help="Authentication token provider for Microsoft apps, default is "
-                "https://login.microsoftonline.com",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="outlook",
-        cli_config=OutlookCliConfig,
-        additional_cli_options=[CliRecursiveConfig],
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/pinecone.py b/unstructured/ingest/cli/cmds/pinecone.py
deleted file mode 100644
index 91d476669..000000000
--- a/unstructured/ingest/cli/cmds/pinecone.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-)
-from unstructured.ingest.connector.pinecone import PineconeWriteConfig, SimplePineconeConfig
-
-
-@dataclass
-class PineconeCliConfig(SimplePineconeConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--api-key"],
-                required=True,
-                type=str,
-                help="API key used for authenticating to a Pinecone instance.",
-                envvar="PINECONE_API_KEY",
-                show_envvar=True,
-            ),
-            click.Option(
-                ["--index-name"],
-                required=True,
-                type=str,
-                help="The name of the pinecone index to connect to.",
-            ),
-            click.Option(
-                ["--environment"],
-                required=True,
-                type=str,
-                help="The environment where the index lives. Eg. 'gcp-starter' or 'us-east1-gcp'",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class PineconeCliWriteConfig(PineconeWriteConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--batch-size"],
-                default=50,
-                type=int,
-                help="Number of records per batch",
-            ),
-            click.Option(
-                ["--num-processes"],
-                default=2,
-                type=int,
-                help="Number of parallel processes with which to upload elements",
-            ),
-        ]
-        return options
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name="pinecone",
-        cli_config=PineconeCliConfig,
-        additional_cli_options=[PineconeCliWriteConfig],
-        write_config=PineconeWriteConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/qdrant.py b/unstructured/ingest/cli/cmds/qdrant.py
deleted file mode 100644
index 1a0847614..000000000
--- a/unstructured/ingest/cli/cmds/qdrant.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-)
-from unstructured.ingest.connector.qdrant import QdrantWriteConfig, SimpleQdrantConfig
-
-
-@dataclass
-class QdrantCliConfig(SimpleQdrantConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--collection-name"],
-                required=True,
-                type=str,
-                help="The name of the Qdrant collection to use.",
-            ),
-            click.Option(
-                ["--location"],
-                type=str,
-                help="The location of the Qdrant cluster.",
-            ),
-            click.Option(
-                ["--url"],
-                type=str,
-                help="The location of the Qdrant cluster.",
-            ),
-            click.Option(
-                ["--port"],
-                type=int,
-                default=6333,
-                help="Port of the REST API interface. Default: 6333.",
-            ),
-            click.Option(
-                ["--grpc-port"],
-                type=int,
-                default=6334,
-                help="Port of the gRPC interface. Default: 6334.",
-            ),
-            click.Option(
-                ["--prefer-grpc"],
-                type=bool,
-                is_flag=True,
-                help="Whether to use gPRC interface whenever possible in methods. Default: False.",
-            ),
-            click.Option(
-                ["--https"],
-                type=bool,
-                is_flag=True,
-                help="Whether to use HTTPS(SSL) protocol. Default: False.",
-            ),
-            click.Option(
-                ["--prefix"],
-                type=str,
-                help="Prefix to add the REST API endpoints.",
-            ),
-            click.Option(
-                ["--timeout"],
-                type=int,
-                help="Timeout for operations. Default: 5.0 seconds for REST, unlimited for gRPC.",
-            ),
-            click.Option(
-                ["--host"],
-                type=str,
-                help="Host name of the Qdrant service.",
-            ),
-            click.Option(
-                ["--path"],
-                type=str,
-                help="Persistence path for QdrantLocal.",
-            ),
-            click.Option(
-                ["--force-disable-check-same-thread"],
-                type=bool,
-                is_flag=True,
-                help="Whether to force disable check same thread for QdrantLocal.",
-            ),
-            click.Option(
-                ["--api-key"],
-                type=str,
-                help="API key for authentication in Qdrant Cloud. Default: None.",
-                envvar="QDRANT_API_KEY",
-                show_envvar=True,
-            ),
-        ]
-        return options
-
-
-@dataclass
-class QdrantCliWriteConfig(QdrantWriteConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--batch-size"],
-                default=50,
-                type=int,
-                help="Number of points to upload per batch",
-            ),
-            click.Option(
-                ["--num-processes"],
-                default=2,
-                type=int,
-                help="Number of parallel processes with which to upload",
-            ),
-        ]
-        return options
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name="qdrant",
-        cli_config=QdrantCliConfig,
-        additional_cli_options=[QdrantCliWriteConfig],
-        write_config=QdrantWriteConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/reddit.py b/unstructured/ingest/cli/cmds/reddit.py
deleted file mode 100644
index 067b74250..000000000
--- a/unstructured/ingest/cli/cmds/reddit.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-)
-from unstructured.ingest.connector.reddit import SimpleRedditConfig
-
-
-@dataclass
-class RedditCliConfig(SimpleRedditConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--client-id"],
-                required=True,
-                type=str,
-                help="The client ID, see "
-                "https://praw.readthedocs.io/en/stable/getting_started/quick_start.html#prerequisites"  # noqa: E501
-                " for more information.",
-            ),
-            click.Option(
-                ["--client-secret"],
-                required=True,
-                type=str,
-                help="The client secret, see "
-                "https://praw.readthedocs.io/en/stable/getting_started/quick_start.html#prerequisites"  # noqa: E501
-                " for more information.",
-            ),
-            click.Option(
-                ["--subreddit-name"],
-                required=True,
-                type=str,
-                help='The name of a subreddit, without the "r\\", e.g. "machinelearning"',
-            ),
-            click.Option(
-                ["--search-query"],
-                default=None,
-                type=str,
-                help="If set, return posts using this query. Otherwise, use hot posts.",
-            ),
-            click.Option(
-                ["--num-posts"],
-                required=True,
-                type=click.IntRange(0),
-                help="If set, limits the number of posts to pull in.",
-            ),
-            click.Option(
-                ["--user-agent"],
-                required=True,
-                type=str,
-                help="user agent request header to use when calling Reddit API",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="reddit",
-        cli_config=RedditCliConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/salesforce.py b/unstructured/ingest/cli/cmds/salesforce.py
deleted file mode 100644
index a6d7119a1..000000000
--- a/unstructured/ingest/cli/cmds/salesforce.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-    CliRecursiveConfig,
-    DelimitedString,
-)
-from unstructured.ingest.connector.salesforce import SimpleSalesforceConfig
-
-
-@dataclass
-class SalesforceCliConfig(SimpleSalesforceConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        possible_categories = ["Account", "Case", "Campaign", "EmailMessage", "Lead"]
-        options = [
-            click.Option(
-                ["--username"],
-                required=True,
-                type=str,
-                help="Salesforce username usually looks like an email.",
-            ),
-            click.Option(
-                ["--consumer-key"],
-                required=True,
-                type=str,
-                help="For the Salesforce JWT auth. Found in Consumer Details.",
-            ),
-            click.Option(
-                ["--private-key"],
-                required=True,
-                type=str,
-                help="Path to the private key or its contents for the Salesforce JWT auth. "
-                "Key file is usually named server.key.",
-            ),
-            click.Option(
-                ["--categories"],
-                default=None,
-                required=True,
-                type=DelimitedString(choices=possible_categories),
-                help="Comma-delimited salesforce categories to download. "
-                "Currently only {}.".format(", ".join(possible_categories)),
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="salesforce",
-        cli_config=SalesforceCliConfig,
-        additional_cli_options=[CliRecursiveConfig],
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/sharepoint.py b/unstructured/ingest/cli/cmds/sharepoint.py
deleted file mode 100644
index 5c6185eef..000000000
--- a/unstructured/ingest/cli/cmds/sharepoint.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-    CliRecursiveConfig,
-)
-from unstructured.ingest.connector.sharepoint import SimpleSharepointConfig
-
-
-@dataclass
-class SharepointCliConfig(SimpleSharepointConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--client-id"],
-                default=None,
-                type=str,
-                help="Sharepoint app client ID",
-            ),
-            click.Option(
-                ["--client-cred"],
-                default=None,
-                type=str,
-                help="Sharepoint app secret",
-            ),
-            click.Option(
-                ["--site"],
-                default=None,
-                type=str,
-                help="Sharepoint site url. Process either base url e.g \
-                    https://[tenant].sharepoint.com  or relative sites \
-                    https://[tenant].sharepoint.com/sites/<site_name>. \
-                    To process all sites within the tenant pass a site url as \
-                    https://[tenant]-admin.sharepoint.com.\
-                    This requires the app to be registered at a tenant level",
-            ),
-            click.Option(
-                ["--path"],
-                default="Shared Documents",
-                type=str,
-                help="Path from which to start parsing files. If the connector is to \
-                process all sites  within the tenant this filter will be applied to \
-                all sites document libraries. Default 'Shared Documents'",
-            ),
-            click.Option(
-                ["--files-only"],
-                is_flag=True,
-                default=False,
-                help="Process only files.",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="sharepoint",
-        cli_config=SharepointCliConfig,
-        additional_cli_options=[CliRecursiveConfig],
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/slack.py b/unstructured/ingest/cli/cmds/slack.py
deleted file mode 100644
index 7112849e1..000000000
--- a/unstructured/ingest/cli/cmds/slack.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-    DelimitedString,
-)
-from unstructured.ingest.connector.slack import SimpleSlackConfig
-
-
-@dataclass
-class SlackCliConfig(SimpleSlackConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--token"],
-                required=True,
-                type=str,
-                help="Bot token used to access Slack API, must have channels:history "
-                "scope for the bot user",
-            ),
-            click.Option(
-                ["--channels"],
-                required=True,
-                type=DelimitedString(),
-                help="Comma-delimited list of Slack channel IDs to pull messages from, "
-                "can be a public or private channel",
-            ),
-            click.Option(
-                ["--start-date"],
-                default=None,
-                type=str,
-                help="Start date/time in formats YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or "
-                "YYYY-MM-DD+HH:MM:SS or YYYY-MM-DDTHH:MM:SStz",
-            ),
-            click.Option(
-                ["--end-date"],
-                default=None,
-                type=str,
-                help="End date/time in formats YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or "
-                "YYYY-MM-DD+HH:MM:SS or YYYY-MM-DDTHH:MM:SStz",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="slack",
-        cli_config=SlackCliConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/sql.py b/unstructured/ingest/cli/cmds/sql.py
deleted file mode 100644
index 7b4800e55..000000000
--- a/unstructured/ingest/cli/cmds/sql.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.interfaces import CliConfig
-from unstructured.ingest.connector.sql import SimpleSqlConfig
-from unstructured.ingest.interfaces import WriteConfig
-
-SQL_DRIVERS = {"postgresql", "sqlite"}
-
-
-@dataclass
-class SqlCliConfig(SimpleSqlConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--db-type"],
-                required=True,
-                type=click.Choice(SQL_DRIVERS),
-                help="Type of the database backend",
-            ),
-            click.Option(
-                ["--username"],
-                default=None,
-                type=str,
-                help="DB username",
-            ),
-            click.Option(
-                ["--password"],
-                default=None,
-                type=str,
-                help="DB password",
-            ),
-            click.Option(
-                ["--host"],
-                default=None,
-                type=str,
-                help="DB host",
-            ),
-            click.Option(
-                ["--port"],
-                default=None,
-                type=int,
-                help="DB host connection port",
-            ),
-            click.Option(
-                ["--database"],
-                default=None,
-                type=str,
-                help="Database name. For sqlite databases, this is the path to the .db file.",
-            ),
-        ]
-        return options
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name="sql",
-        cli_config=SqlCliConfig,
-        write_config=WriteConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/vectara.py b/unstructured/ingest/cli/cmds/vectara.py
deleted file mode 100644
index 0c623362b..000000000
--- a/unstructured/ingest/cli/cmds/vectara.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.interfaces import CliConfig
-from unstructured.ingest.connector.vectara import SimpleVectaraConfig, WriteConfig
-
-
-@dataclass
-class VectaraCliWriteConfig(SimpleVectaraConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--customer-id"],
-                required=True,
-                type=str,
-                help="The Vectara customer-id.",
-                envvar="VECTARA_CUSTOMER_ID",
-                show_envvar=True,
-            ),
-            click.Option(
-                ["--oauth-client-id"],
-                required=True,
-                type=str,
-                help="Vectara OAuth2 client ID.",
-                envvar="VECTARA_OAUTH_CLIENT_ID",
-                show_envvar=True,
-            ),
-            click.Option(
-                ["--oauth-secret"],
-                required=True,
-                type=str,
-                help="Vectara OAuth2 secret.",
-                envvar="VECTARA_OAUTH_SECRET",
-                show_envvar=True,
-            ),
-            click.Option(
-                ["--corpus-name"],
-                required=False,
-                type=str,
-                default=None,
-                help="The Vectara corpus-name.",
-            ),
-            click.Option(
-                ["--token-url"],
-                required=False,
-                default="https://vectara-prod-{}.auth.us-west-2.amazoncognito.com/oauth2/token",
-                type=str,
-                help="The Vectara endpoint for token refresh. Needs curly brackets for customer_id",
-            ),
-        ]
-        return options
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name="vectara",
-        cli_config=VectaraCliWriteConfig,
-        additional_cli_options=[],
-        write_config=WriteConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/weaviate.py b/unstructured/ingest/cli/cmds/weaviate.py
deleted file mode 100644
index 69107a9c2..000000000
--- a/unstructured/ingest/cli/cmds/weaviate.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.interfaces import CliConfig, DelimitedString
-from unstructured.ingest.connector.weaviate import SimpleWeaviateConfig, WeaviateWriteConfig
-
-CMD_NAME = "weaviate"
-
-
-@dataclass
-class WeaviateCliConfig(SimpleWeaviateConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--host-url"],
-                required=True,
-                help="Weaviate instance url",
-            ),
-            click.Option(
-                ["--class-name"],
-                default=None,
-                type=str,
-                help="Name of the class to push the records into, e.g: Pdf-elements",
-            ),
-            click.Option(
-                ["--access-token"], default=None, type=str, help="Used to create the bearer token."
-            ),
-            click.Option(
-                ["--refresh-token"],
-                default=None,
-                type=str,
-                help="Will tie this value to the bearer token. If not provided, "
-                "the authentication will expire once the lifetime of the access token is up.",
-            ),
-            click.Option(
-                ["--api-key"],
-                default=None,
-                type=str,
-            ),
-            click.Option(
-                ["--client-secret"],
-                default=None,
-                type=str,
-            ),
-            click.Option(
-                ["--scope"],
-                default=None,
-                type=DelimitedString(),
-            ),
-            click.Option(
-                ["--username"],
-                default=None,
-                type=str,
-            ),
-            click.Option(
-                ["--password"],
-                default=None,
-                type=str,
-            ),
-            click.Option(
-                ["--anonymous"],
-                is_flag=True,
-                default=False,
-                type=bool,
-                help="if set, all auth values will be ignored",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class WeaviateCliWriteConfig(WeaviateWriteConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--batch-size"],
-                default=100,
-                type=int,
-                help="Number of records per batch",
-            )
-        ]
-        return options
-
-
-def get_base_dest_cmd():
-    from unstructured.ingest.cli.base.dest import BaseDestCmd
-
-    cmd_cls = BaseDestCmd(
-        cmd_name=CMD_NAME,
-        cli_config=WeaviateCliConfig,
-        additional_cli_options=[WeaviateCliWriteConfig],
-        write_config=WeaviateWriteConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/cmds/wikipedia.py b/unstructured/ingest/cli/cmds/wikipedia.py
deleted file mode 100644
index a25f5c44c..000000000
--- a/unstructured/ingest/cli/cmds/wikipedia.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.cli.base.src import BaseSrcCmd
-from unstructured.ingest.cli.interfaces import (
-    CliConfig,
-)
-from unstructured.ingest.connector.wikipedia import SimpleWikipediaConfig
-
-
-@dataclass
-class WikipediaCliConfig(SimpleWikipediaConfig, CliConfig):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--page-title"],
-                required=True,
-                type=str,
-                help='Title of a Wikipedia page, e.g. "Open source software".',
-            ),
-            click.Option(
-                ["--auto-suggest"],
-                default=True,
-                is_flag=True,
-                help="Whether to automatically suggest a page if the exact page was not found."
-                " Set to False if the wrong Wikipedia page is fetched.",
-            ),
-        ]
-        return options
-
-
-def get_base_src_cmd() -> BaseSrcCmd:
-    cmd_cls = BaseSrcCmd(
-        cmd_name="wikipedia",
-        cli_config=WikipediaCliConfig,
-    )
-    return cmd_cls
diff --git a/unstructured/ingest/cli/common.py b/unstructured/ingest/cli/common.py
deleted file mode 100644
index 53dacafaf..000000000
--- a/unstructured/ingest/cli/common.py
+++ /dev/null
@@ -1,7 +0,0 @@
-import logging
-
-from unstructured.ingest.logger import ingest_log_streaming_init
-
-
-def log_options(options: dict, verbose=False):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
diff --git a/unstructured/ingest/cli/interfaces.py b/unstructured/ingest/cli/interfaces.py
deleted file mode 100644
index 4703a1c47..000000000
--- a/unstructured/ingest/cli/interfaces.py
+++ /dev/null
@@ -1,656 +0,0 @@
-from __future__ import annotations
-
-import json
-import os.path
-import typing as t
-from abc import abstractmethod
-from dataclasses import fields
-from gettext import gettext, ngettext
-from pathlib import Path
-
-import click
-from dataclasses_json.core import Json
-from typing_extensions import Self
-
-from unstructured.chunking import CHUNK_MAX_CHARS_DEFAULT, CHUNK_MULTI_PAGE_DEFAULT
-from unstructured.ingest.interfaces import (
-    BaseConfig,
-    ChunkingConfig,
-    EmbeddingConfig,
-    FileStorageConfig,
-    PartitionConfig,
-    PermissionsConfig,
-    ProcessorConfig,
-    ReadConfig,
-    RetryStrategyConfig,
-)
-
-
-class Dict(click.ParamType):
-    name = "dict"
-
-    def convert(
-        self,
-        value: t.Any,
-        param: t.Optional[click.Parameter] = None,
-        ctx: t.Optional[click.Context] = None,
-    ) -> t.Any:
-        try:
-            return json.loads(value)
-        except json.JSONDecodeError:
-            self.fail(
-                gettext(
-                    "{value} is not a valid json value.",
-                ).format(value=value),
-                param,
-                ctx,
-            )
-
-
-class FileOrJson(click.ParamType):
-    name = "file-or-json"
-
-    def __init__(self, allow_raw_str: bool = False):
-        self.allow_raw_str = allow_raw_str
-
-    def convert(
-        self,
-        value: t.Any,
-        param: t.Optional[click.Parameter] = None,
-        ctx: t.Optional[click.Context] = None,
-    ) -> t.Any:
-        # check if valid file
-        full_path = os.path.abspath(os.path.expanduser(value))
-        if os.path.isfile(full_path):
-            return str(Path(full_path).resolve())
-        if isinstance(value, str):
-            try:
-                return json.loads(value)
-            except json.JSONDecodeError:
-                if self.allow_raw_str:
-                    return value
-        self.fail(
-            gettext(
-                "{value} is not a valid json string nor an existing filepath.",
-            ).format(value=value),
-            param,
-            ctx,
-        )
-
-
-class DelimitedString(click.ParamType):
-    name = "delimited-string"
-
-    def __init__(self, delimiter: str = ",", choices: t.Optional[t.List[str]] = None):
-        self.choices = choices if choices else []
-        self.delimiter = delimiter
-
-    def convert(
-        self,
-        value: t.Any,
-        param: t.Optional[click.Parameter] = None,
-        ctx: t.Optional[click.Context] = None,
-    ) -> t.Any:
-        # In case a list is provided as the default, will not break
-        if isinstance(value, list):
-            split = [str(v).strip() for v in value]
-        else:
-            split = [v.strip() for v in value.split(self.delimiter)]
-        if not self.choices:
-            return split
-        choices_str = ", ".join(map(repr, self.choices))
-        for s in split:
-            if s not in self.choices:
-                self.fail(
-                    ngettext(
-                        "{value!r} is not {choice}.",
-                        "{value!r} is not one of {choices}.",
-                        len(self.choices),
-                    ).format(value=s, choice=choices_str, choices=choices_str),
-                    param,
-                    ctx,
-                )
-        return split
-
-
-class CliMixin:
-    @staticmethod
-    @abstractmethod
-    def get_cli_options() -> t.List[click.Option]:
-        pass
-
-    @classmethod
-    def add_cli_options(cls, cmd: click.Command) -> None:
-        options_to_add = cls.get_cli_options()
-        CliMixin.add_params(cmd, params=options_to_add)
-
-    def add_params(cmd: click.Command, params: t.List[click.Parameter]):
-        existing_opts = []
-        for param in cmd.params:
-            existing_opts.extend(param.opts)
-
-        for param in params:
-            for opt in param.opts:
-                if opt in existing_opts:
-                    raise ValueError(f"{opt} is already defined on the command {cmd.name}")
-                existing_opts.append(opt)
-                cmd.params.append(param)
-
-
-class CliConfig(BaseConfig, CliMixin):
-    pass
-
-
-class CliRetryStrategyConfig(RetryStrategyConfig, CliMixin):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--max-retries"],
-                default=None,
-                type=int,
-                help="If provided, will use this max retry for "
-                "back off strategy if http calls fail",
-            ),
-            click.Option(
-                ["--max-retry-time"],
-                default=None,
-                type=float,
-                help="If provided, will attempt retries for this long as part "
-                "of back off strategy if http calls fail",
-            ),
-        ]
-        return options
-
-    @classmethod
-    def from_dict(cls, kvs: Json, **kwargs):
-        """
-        Return None if none of the fields are being populated
-        """
-        if isinstance(kvs, dict):
-            field_names = {field.name for field in fields(cls) if field.name in kvs}
-            field_values = [kvs.get(n) for n in field_names if kvs.get(n)]
-            if not field_values:
-                return None
-        return super().from_dict(kvs=kvs, **kwargs)
-
-
-class CliProcessorConfig(ProcessorConfig, CliMixin):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--reprocess"],
-                is_flag=True,
-                default=False,
-                help="Reprocess a downloaded file even if the relevant structured "
-                "output .json file in output directory already exists.",
-            ),
-            click.Option(
-                ["--output-dir"],
-                default="structured-output",
-                help="Where to place structured output .json files.",
-            ),
-            click.Option(
-                ["--work-dir"],
-                type=str,
-                default=str(
-                    (Path.home() / ".cache" / "unstructured" / "ingest" / "pipeline").resolve(),
-                ),
-                show_default=True,
-                help="Where to place working files when processing each step",
-            ),
-            click.Option(
-                ["--num-processes"],
-                default=2,
-                show_default=True,
-                help="Number of parallel processes with which to process docs",
-            ),
-            click.Option(
-                ["--raise-on-error"],
-                is_flag=True,
-                default=False,
-                help="Is set, will raise error if any doc in the pipeline fail. Otherwise will "
-                "log error and continue with other docs",
-            ),
-            click.Option(["-v", "--verbose"], is_flag=True, default=False),
-        ]
-        return options
-
-
-class CliReadConfig(ReadConfig, CliMixin):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--download-dir"],
-                help="Where files are downloaded to, defaults to a location at"
-                "`$HOME/.cache/unstructured/ingest/<connector name>/<SHA256>`.",
-            ),
-            click.Option(
-                ["--re-download"],
-                is_flag=True,
-                default=False,
-                help="Re-download files even if they are already present in download dir.",
-            ),
-            click.Option(
-                ["--preserve-downloads"],
-                is_flag=True,
-                default=False,
-                help="Preserve downloaded files. Otherwise each file is removed "
-                "after being processed successfully.",
-            ),
-            click.Option(
-                ["--download-only"],
-                is_flag=True,
-                default=False,
-                help="Download any files that are not already present in either --download-dir or "
-                "the default download ~/.cache/... location in case --download-dir "
-                "is not specified and "
-                "skip processing them through unstructured.",
-            ),
-            click.Option(
-                ["--max-docs"],
-                default=None,
-                type=int,
-                help="If specified, process at most the specified number of documents.",
-            ),
-        ]
-        return options
-
-
-class CliPartitionConfig(PartitionConfig, CliMixin):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--pdf-infer-table-structure"],
-                is_flag=True,
-                default=False,
-                help="Partition will include the table's text_as_html " "in the response metadata.",
-            ),
-            click.Option(
-                ["--strategy"],
-                default="auto",
-                help="The method that will be used to process the documents. "
-                "Default: auto. Other strategies include `fast` and `hi_res`.",
-            ),
-            click.Option(
-                ["--ocr-languages"],
-                default=None,
-                type=DelimitedString(delimiter="+"),
-                help="A list of language packs to specify which languages to use for OCR, "
-                "separated by '+' e.g. 'eng+deu' to use the English and German language packs. "
-                "The appropriate Tesseract "
-                "language pack needs to be installed.",
-            ),
-            click.Option(
-                ["--encoding"],
-                default=None,
-                help="Text encoding to use when reading documents. By default the encoding is "
-                "detected automatically.",
-            ),
-            click.Option(
-                ["--skip-infer-table-types"],
-                type=DelimitedString(),
-                default=None,
-                help="Optional list of document types to skip table extraction on",
-            ),
-            click.Option(
-                ["--additional-partition-args"],
-                type=Dict(),
-                help="A json string representation of values to pass through to partition()",
-            ),
-            click.Option(
-                ["--fields-include"],
-                type=DelimitedString(),
-                default=["element_id", "text", "type", "metadata", "embeddings"],
-                help="Comma-delimited list. If set, include the specified top-level "
-                "fields in an element.",
-            ),
-            click.Option(
-                ["--flatten-metadata"],
-                is_flag=True,
-                default=False,
-                help="Results in flattened json elements. "
-                "Specifically, the metadata key values are brought to "
-                "the top-level of the element, and the `metadata` key itself is removed.",
-            ),
-            click.Option(
-                ["--metadata-include"],
-                default=[],
-                type=DelimitedString(),
-                help="Comma-delimited list. If set, include the specified metadata "
-                "fields if they exist and drop all other fields. ",
-            ),
-            click.Option(
-                ["--metadata-exclude"],
-                default=[],
-                type=DelimitedString(),
-                help="Comma-delimited list. If set, drop the specified metadata "
-                "fields if they exist.",
-            ),
-            click.Option(
-                ["--partition-by-api"],
-                is_flag=True,
-                default=False,
-                help="Use a remote API to partition the files."
-                " Otherwise, use the function from partition.auto",
-            ),
-            click.Option(
-                ["--partition-endpoint"],
-                default="https://api.unstructured.io/general/v0/general",
-                help="If partitioning via api, use the following host. "
-                "Default: https://api.unstructured.io/general/v0/general",
-            ),
-            click.Option(
-                ["--api-key"],
-                default=None,
-                help="API Key for partition endpoint.",
-            ),
-            click.Option(
-                ["--hi-res-model-name"],
-                default=None,
-                help="Model name for hi-res strategy.",
-            ),
-        ]
-        return options
-
-
-class CliRecursiveConfig(CliConfig):
-    recursive: bool
-
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--recursive"],
-                is_flag=True,
-                default=False,
-                help="Recursively download files in their respective folders "
-                "otherwise stop at the files in provided folder level.",
-            ),
-        ]
-        return options
-
-
-class CliFilesStorageConfig(FileStorageConfig, CliMixin):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--remote-url"],
-                required=True,
-                help="Remote fsspec URL formatted as `protocol://dir/path`",
-            ),
-            click.Option(
-                ["--uncompress"],
-                type=bool,
-                default=False,
-                is_flag=True,
-                help="Uncompress any archived files. Currently supporting zip and tar "
-                "files based on file extension.",
-            ),
-            click.Option(
-                ["--recursive"],
-                is_flag=True,
-                default=False,
-                help="Recursively download files in their respective folders "
-                "otherwise stop at the files in provided folder level.",
-            ),
-            click.Option(
-                ["--file-glob"],
-                default=None,
-                type=DelimitedString(),
-                help="A comma-separated list of file globs to limit which types of "
-                "local files are accepted, e.g. '*.html,*.txt'",
-            ),
-        ]
-        return options
-
-
-class CliEmbeddingConfig(EmbeddingConfig, CliMixin):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        from unstructured.embed import EMBEDDING_PROVIDER_TO_CLASS_MAP
-
-        options = [
-            click.Option(
-                ["--embedding-provider"],
-                help="Type of the embedding class to be used. Can be one of: "
-                f"{list(EMBEDDING_PROVIDER_TO_CLASS_MAP)}",
-                type=click.Choice(list(EMBEDDING_PROVIDER_TO_CLASS_MAP)),
-            ),
-            click.Option(
-                ["--embedding-api-key"],
-                help="API key for the embedding model, for the case an API key is needed.",
-                type=str,
-                default=None,
-            ),
-            click.Option(
-                ["--embedding-model-name"],
-                help="Embedding model name, if needed. "
-                "Chooses a particular LLM between different options, to embed with it.",
-                type=str,
-                default=None,
-            ),
-            click.Option(
-                ["--embedding-aws-access-key-id"],
-                help="AWS access key used for AWS-based embedders, such as bedrock",
-                type=str,
-                default=None,
-            ),
-            click.Option(
-                ["--embedding-aws-secret-access-key"],
-                help="AWS secret key used for AWS-based embedders, such as bedrock",
-                type=str,
-                default=None,
-            ),
-            click.Option(
-                ["--embedding-aws-region"],
-                help="AWS region used for AWS-based embedders, such as bedrock",
-                type=str,
-                default="us-west-2",
-            ),
-        ]
-        return options
-
-    @classmethod
-    def from_dict(cls, kvs: Json, **kwargs):
-        """
-        Extension of the dataclass from_dict() to avoid a naming conflict with other CLI params.
-        This allows CLI arguments to be prepended with embedding_ during CLI invocation but
-        doesn't require that as part of the field names in this class
-        """
-        if isinstance(kvs, dict):
-            new_kvs = {
-                k[len("embedding_") :]: v  # noqa: E203
-                for k, v in kvs.items()
-                if k.startswith("embedding_")
-            }
-            if len(new_kvs.keys()) == 0:
-                return None
-            if not new_kvs.get("provider"):
-                return None
-            return super().from_dict(new_kvs, **kwargs)
-        return super().from_dict(kvs, **kwargs)
-
-
-class CliChunkingConfig(ChunkingConfig, CliMixin):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--chunk-elements"],
-                is_flag=True,
-                default=False,
-                help="Deprecated, use --chunking-strategy instead.",
-            ),
-            click.Option(
-                ["--chunking-strategy"],
-                type=str,
-                help="The rule-set to use to form chunks. Omit to disable chunking.",
-            ),
-            click.Option(
-                ["--chunk-combine-text-under-n-chars"],
-                type=int,
-                help=(
-                    "Combine consecutive chunks when the first does not exceed this length and"
-                    " the second will fit without exceeding the hard-maximum length. Only"
-                    " operative for 'by_title' chunking-strategy."
-                ),
-            ),
-            click.Option(
-                ["--chunk-include-orig-elements/--chunk-no-include-orig-elements"],
-                is_flag=True,
-                default=True,
-                help=(
-                    "When chunking, add the original elements consolidated to form each chunk to"
-                    " `.metadata.orig_elements` on that chunk."
-                ),
-            ),
-            click.Option(
-                ["--chunk-max-characters"],
-                type=int,
-                default=CHUNK_MAX_CHARS_DEFAULT,
-                show_default=True,
-                help=(
-                    "Hard maximum chunk length. No chunk will exceed this length. An oversized"
-                    " element will be divided by text-splitting to fit this window."
-                ),
-            ),
-            click.Option(
-                ["--chunk-multipage-sections/--chunk-no-multipage-sections"],
-                is_flag=True,
-                default=CHUNK_MULTI_PAGE_DEFAULT,
-                help=(
-                    "Ignore page boundaries when chunking such that elements from two different"
-                    " pages can appear in the same chunk. Only operative for 'by_title'"
-                    " chunking-strategy."
-                ),
-            ),
-            click.Option(
-                ["--chunk-new-after-n-chars"],
-                type=int,
-                help=(
-                    "Soft-maximum chunk length. Another element will not be added to a chunk of"
-                    " this length even when it would fit without exceeding the hard-maximum"
-                    " length."
-                ),
-            ),
-            click.Option(
-                ["--chunk-overlap"],
-                type=int,
-                default=0,
-                show_default=True,
-                help=(
-                    "Prefix chunk text with last overlap=N characters of prior chunk. Only"
-                    " applies to oversized chunks divided by text-splitting. To apply overlap to"
-                    " non-oversized chunks use the --overlap-all option."
-                ),
-            ),
-            click.Option(
-                ["--chunk-overlap-all"],
-                is_flag=True,
-                default=False,
-                help=(
-                    "Apply overlap to chunks formed from whole elements as well as those formed"
-                    " by text-splitting oversized elements. Overlap length is take from --overlap"
-                    " option value."
-                ),
-            ),
-        ]
-        return options
-
-    @classmethod
-    def from_dict(cls, kvs: Json, **kwargs: t.Any) -> t.Optional[Self]:
-        """Extension of dataclass from_dict() to avoid a naming conflict with other CLI params.
-
-        This allows CLI arguments to be prefixed with "chunking_" during CLI invocation but doesn't
-        require that as part of the field names in this class
-        """
-        if not isinstance(kvs, dict):
-            return super().from_dict(kvs=kvs, **kwargs)
-
-        options: t.Dict[str, t.Any] = kvs.copy()
-        chunk_elements = options.pop("chunk_elements", None)
-        chunking_strategy = options.pop("chunking_strategy", None)
-        # -- when neither are specified, chunking is not requested --
-        if not chunk_elements and not chunking_strategy:
-            return None
-
-        def iter_kv_pairs() -> t.Iterator[t.Tuple[str, t.Any]]:
-            # -- newer `chunking_strategy` option takes precedence over legacy `chunk_elements` --
-            if chunking_strategy:
-                yield "chunking_strategy", chunking_strategy
-            # -- but legacy case is still supported, equivalent to `chunking_strategy="by_title" --
-            elif chunk_elements:
-                yield "chunking_strategy", "by_title"
-
-            yield from (
-                (key[len("chunk_") :], value)
-                for key, value in options.items()
-                if key.startswith("chunk_")
-            )
-
-        new_kvs = dict(iter_kv_pairs())
-        return None if len(new_kvs) == 0 else super().from_dict(kvs=new_kvs, **kwargs)
-
-
-class CliPermissionsConfig(PermissionsConfig, CliMixin):
-    @staticmethod
-    def get_cli_options() -> t.List[click.Option]:
-        options = [
-            click.Option(
-                ["--permissions-application-id"],
-                type=str,
-                help="Microsoft Graph API application id",
-            ),
-            click.Option(
-                ["--permissions-client-cred"],
-                type=str,
-                help="Microsoft Graph API application credentials",
-            ),
-            click.Option(
-                ["--permissions-tenant"],
-                type=str,
-                help="e.g https://contoso.onmicrosoft.com to get permissions data within tenant.",
-            ),
-        ]
-        return options
-
-    @classmethod
-    def from_dict(cls, kvs: Json, **kwargs):
-        """
-        Extension of the dataclass from_dict() to avoid a naming conflict with other CLI params.
-        This allows CLI arguments to be prepended with permissions_ during CLI invocation but
-        doesn't require that as part of the field names in this class. It also checks if the
-        CLI params are provided as intended.
-        """
-
-        if isinstance(kvs, dict):
-            permissions_application_id = kvs.get("permissions_application_id")
-            permissions_client_cred = kvs.get("permissions_client_cred")
-            permissions_tenant = kvs.get("permissions_tenant")
-            permission_values = [
-                permissions_application_id,
-                permissions_client_cred,
-                permissions_tenant,
-            ]
-            if any(permission_values) and not all(permission_values):
-                raise ValueError(
-                    "Please provide either none or all of the following optional values:\n"
-                    "--permissions-application-id\n"
-                    "--permissions-client-cred\n"
-                    "--permissions-tenant",
-                )
-
-            new_kvs = {
-                k[len("permissions_") :]: v  # noqa: E203
-                for k, v in kvs.items()
-                if k.startswith("permissions_")
-            }
-            if len(new_kvs.keys()) == 0:
-                return None
-            return super().from_dict(kvs=new_kvs, **kwargs)
-        return super().from_dict(kvs=kvs, **kwargs)
diff --git a/unstructured/ingest/cli/utils.py b/unstructured/ingest/cli/utils.py
deleted file mode 100644
index 701355f26..000000000
--- a/unstructured/ingest/cli/utils.py
+++ /dev/null
@@ -1,205 +0,0 @@
-import typing as t
-from dataclasses import fields, is_dataclass
-from gettext import gettext as _
-
-import click
-
-from unstructured.ingest.cli.interfaces import (
-    CliChunkingConfig,
-    CliConfig,
-    CliEmbeddingConfig,
-    CliPartitionConfig,
-    CliPermissionsConfig,
-    CliProcessorConfig,
-    CliReadConfig,
-    CliRetryStrategyConfig,
-)
-from unstructured.ingest.interfaces import BaseConfig
-from unstructured.ingest.logger import logger
-
-
-def conform_click_options(options: dict):
-    # Click sets all multiple fields as tuple, this needs to be updated to list
-    for k, v in options.items():
-        if isinstance(v, tuple):
-            options[k] = list(v)
-
-
-def extract_config(flat_data: dict, config: t.Type[BaseConfig]) -> BaseConfig:
-    """
-    To be able to extract a nested dataclass from a flat dictionary (as in one coming
-    from a click-based options input), the config class is dynamically looked through for
-    nested dataclass fields and new nested dictionaries are created to conform to the
-    shape the overall class expects whn parsing from a dict. During the process, this will create
-    copies of the original dictionary to avoid pruning fields but this isn't a
-    problem since the `from_dict()` method ignores unneeded values.
-
-    Not handling more complex edge cases for now such as nested types i.e Union[List[List[...]]]
-    """
-
-    def conform_dict(inner_d: dict, inner_config: t.Type[BaseConfig]):
-        # Catch edge cases (i.e. Dict[str, ...]) where underlying type is not a concrete Class,
-        # causing 'issubclass() arg 1 must be a class' errors, return False
-        def is_subclass(instance, class_type) -> bool:
-            try:
-                return issubclass(instance, class_type)
-            except Exception:
-                return False
-
-        dd = inner_d.copy()
-        for field in fields(inner_config):
-            f_type = field.type
-            # Handle the case where the type of a value if a Union (possibly optional)
-            if t.get_origin(f_type) is t.Union:
-                union_values = t.get_args(f_type)
-                # handle List types
-                union_values = [
-                    t.get_args(u)[0] if t.get_origin(u) is list else u for u in union_values
-                ]
-                # Ignore injected NoneType when optional
-                concrete_union_values = [v for v in union_values if not is_subclass(v, type(None))]
-                dataclass_union_values = [v for v in concrete_union_values if is_dataclass(v)]
-                non_dataclass_union_values = [
-                    v for v in concrete_union_values if not is_dataclass(v)
-                ]
-                if not dataclass_union_values:
-                    continue
-                # Check if the key for this field already exists in the dictionary,
-                # if so it might map to one of these non dataclass fields and this
-                # can't be enforced
-                if non_dataclass_union_values and field.name in dd:
-                    continue
-                if len(dataclass_union_values) > 1:
-                    logger.warning(
-                        "more than one dataclass type possible for field {}, "
-                        "not extracting: {}".format(field.name, ", ".join(dataclass_union_values))
-                    )
-                    continue
-                f_type = dataclass_union_values[0]
-            origin = t.get_origin(f_type)
-            if origin:
-                f_type = origin
-            if is_subclass(f_type, BaseConfig):
-                dd[field.name] = conform_dict(inner_d=dd, inner_config=f_type)
-        return dd
-
-    adjusted_dict = conform_dict(inner_d=flat_data, inner_config=config)
-    return config.from_dict(adjusted_dict, apply_name_overload=False)
-
-
-def extract_configs(
-    data: dict,
-    extras: t.Optional[t.Dict[str, t.Type[BaseConfig]]] = None,
-    validate: t.Optional[t.List[t.Type[BaseConfig]]] = None,
-    add_defaults: bool = True,
-) -> t.Dict[str, BaseConfig]:
-    """
-    Extract all common configs used across CLI command and validate that any
-    command-specific configs have all their needed information from the Click
-    options that are passed in during invocation.
-    """
-    validate = validate if validate else []
-    res = (
-        {
-            "read_config": extract_config(flat_data=data, config=CliReadConfig),
-            "partition_config": extract_config(flat_data=data, config=CliPartitionConfig),
-            "embedding_config": extract_config(flat_data=data, config=CliEmbeddingConfig),
-            "chunking_config": extract_config(flat_data=data, config=CliChunkingConfig),
-            "processor_config": extract_config(flat_data=data, config=CliProcessorConfig),
-            "permissions_config": extract_config(flat_data=data, config=CliPermissionsConfig),
-            "retry_strategy_config": extract_config(flat_data=data, config=CliRetryStrategyConfig),
-        }
-        if add_defaults
-        else {}
-    )
-    if extras:
-        for k, conf in extras.items():
-            try:
-                res[k] = extract_config(flat_data=data, config=conf)
-            except Exception as e:
-                logger.error(f"failed to extract config from {conf.__name__}")
-                raise e
-    for v in validate:
-        try:
-            extract_config(flat_data=data, config=v)
-        except Exception as e:
-            raise Exception(f"failed to validate config {v.__name__}") from e
-
-    return res
-
-
-def add_options(
-    cmd: click.Command, extras: t.List[t.Type[CliConfig]], is_src: bool = True
-) -> click.Command:
-    configs: t.List[t.Type[CliConfig]] = (
-        [
-            CliPartitionConfig,
-            CliReadConfig,
-            CliEmbeddingConfig,
-            CliChunkingConfig,
-            CliProcessorConfig,
-            CliPermissionsConfig,
-            CliRetryStrategyConfig,
-        ]
-        if is_src
-        else []
-    )
-    # make sure what's unique to this cmd appears first
-    extras.extend(configs)
-    for config in extras:
-        try:
-            config.add_cli_options(cmd=cmd)
-        except ValueError as e:
-            raise ValueError(f"failed to set configs from {config.__name__}: {e}")
-    return cmd
-
-
-class Group(click.Group):
-    def parse_args(self, ctx, args):
-        """
-        This allows for subcommands to be called with the --help flag without breaking
-        if parent command is missing any of its required parameters
-        """
-
-        try:
-            return super().parse_args(ctx, args)
-        except click.MissingParameter:
-            if "--help" not in args:
-                raise
-
-            # remove the required params so that help can display
-            for param in self.params:
-                param.required = False
-            return super().parse_args(ctx, args)
-
-    def format_commands(self, ctx: click.Context, formatter: click.HelpFormatter) -> None:
-        """
-        Copy of the original click.Group format_commands() method but replacing
-        'Commands' -> 'Destinations'
-        """
-        commands = []
-        for subcommand in self.list_commands(ctx):
-            cmd = self.get_command(ctx, subcommand)
-            # What is this, the tool lied about a command.  Ignore it
-            if cmd is None:
-                continue
-            if cmd.hidden:
-                continue
-
-            commands.append((subcommand, cmd))
-
-        # allow for 3 times the default spacing
-        if len(commands):
-            if formatter.width:
-                limit = formatter.width - 6 - max(len(cmd[0]) for cmd in commands)
-            else:
-                limit = -6 - max(len(cmd[0]) for cmd in commands)
-
-            rows = []
-            for subcommand, cmd in commands:
-                help = cmd.get_short_help_str(limit)
-                rows.append((subcommand, help))
-
-            if rows:
-                with formatter.section(_("Destinations")):
-                    formatter.write_dl(rows)
diff --git a/unstructured/ingest/connector/__init__.py b/unstructured/ingest/connector/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/unstructured/ingest/connector/airtable.py b/unstructured/ingest/connector/airtable.py
deleted file mode 100644
index 27669d4a3..000000000
--- a/unstructured/ingest/connector/airtable.py
+++ /dev/null
@@ -1,309 +0,0 @@
-import typing as t
-from dataclasses import dataclass, field
-from datetime import datetime
-from pathlib import Path
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from pyairtable import Api
-
-
-@dataclass
-class AirtableAccessConfig(AccessConfig):
-    personal_access_token: str = enhanced_field(sensitive=True)
-
-
-@dataclass
-class SimpleAirtableConfig(BaseConnectorConfig):
-    """Connector config where:
-    auth_token is the authentication token to authenticate into Airtable.
-
-    Check https://support.airtable.com/docs/airtable-api-key-deprecation-notice
-    for more info on authentication.
-    """
-
-    access_config: AirtableAccessConfig
-    list_of_paths: t.Optional[str] = None
-
-
-@dataclass
-class AirtableTableMeta:
-    """Metadata specifying a table id, a base id which the table is stored in,
-    and an t.Optional view id in case particular rows and fields are to be ingested"""
-
-    base_id: str
-    table_id: str
-    view_id: t.Optional[str] = None
-
-
-@dataclass
-class AirtableIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing).
-
-    Current implementation creates an Airtable connection object
-    to fetch each document, rather than creating a it for each thread.
-    """
-
-    connector_config: SimpleAirtableConfig
-    table_meta: AirtableTableMeta
-    registry_name: str = "airtable"
-
-    @property
-    def filename(self):
-        return (
-            Path(self.read_config.download_dir)
-            / self.table_meta.base_id
-            / f"{self.table_meta.table_id}.csv"
-        ).resolve()
-
-    @property
-    def _output_filename(self):
-        """Create output file path based on output directory, base id, and table id"""
-        output_file = f"{self.table_meta.table_id}.json"
-        return Path(self.processor_config.output_dir) / self.table_meta.base_id / output_file
-
-    @property
-    def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]:
-        return {
-            "base_id": self.table_meta.base_id,
-            "table_id": self.table_meta.table_id,
-            "view_id": self.table_meta.view_id,
-        }
-
-    @property
-    def version(self) -> t.Optional[str]:
-        return None
-
-    @requires_dependencies(["pyairtable"], extras="airtable")
-    def _query_table(self):
-        from pyairtable import Api
-
-        api = Api(self.connector_config.access_config.personal_access_token)
-        table = api.table(self.table_meta.base_id, self.table_meta.table_id)
-        table_url = table.url
-        rows = table.all(
-            view=self.table_meta.view_id,
-        )
-        return rows, table_url
-
-    @SourceConnectionNetworkError.wrap
-    def _get_table_rows(self):
-        rows, table_url = self._query_table()
-
-        if len(rows) == 0:
-            logger.info("Empty document, retrieved table but it has no rows.")
-        return rows, table_url
-
-    def update_source_metadata(self, **kwargs):
-        """Gets file metadata from the current table."""
-
-        rows, table_url = kwargs.get("rows_tuple", self._get_table_rows())
-        if rows is None or len(rows) < 1:
-            self.source_metadata = SourceMetadata(
-                exists=False,
-            )
-            return
-        dates = [r.get("createdTime", "") for r in rows]
-        dates.sort()
-
-        date_created = datetime.strptime(
-            dates[0],
-            "%Y-%m-%dT%H:%M:%S.%fZ",
-        ).isoformat()
-
-        date_modified = datetime.strptime(
-            dates[-1],
-            "%Y-%m-%dT%H:%M:%S.%fZ",
-        ).isoformat()
-
-        self.source_metadata = SourceMetadata(
-            date_created=date_created,
-            date_modified=date_modified,
-            source_url=table_url,
-            exists=True,
-        )
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["pandas"])
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        import pandas as pd
-
-        rows, table_url = self._get_table_rows()
-        self.update_source_metadata(rows_tuple=(rows, table_url))
-        if rows is None:
-            raise ValueError(
-                "Failed to retrieve rows from table "
-                f"{self.table_meta.base_id}/{self.table_meta.table_id}. Check logs",
-            )
-        # NOTE: Might be a good idea to add pagination for large tables
-        df = pd.DataFrame.from_dict(
-            [row["fields"] for row in rows],
-        ).sort_index(axis=1)
-
-        self.document = df.to_csv()
-        self.filename.parent.mkdir(parents=True, exist_ok=True)
-
-        with open(self.filename, "w", encoding="utf8") as f:
-            f.write(self.document)
-
-
-airtable_id_prefixes = ["app", "tbl", "viw"]
-
-
-def raise_airtable_path_error(piece):
-    if any(piece[:3] == prefix for prefix in airtable_id_prefixes):
-        raise (
-            ValueError(
-                "Path components are not correctly ordered.\
-                            Valid path structures: \
-                            - base_id/table_id/view_id , \
-                            - base_id/table_id, \
-                            - base_id .\
-                            It is also possible to leave --airtable-list-of-paths \
-                            argument empty (this will ingest everything).",
-            )
-        )
-    else:
-        raise (
-            ValueError(
-                """Path components are not valid Airtable ids.
-                        base_id should look like: appAbcDeF1ghijKlm,
-                        table_id should look like: tblAbcDeF1ghijKlm,
-                        view_id should look like:  viwAbcDeF1ghijKlm""",
-            )
-        )
-
-
-def check_path_validity(path):
-    pieces = path.split("/")
-    assert (
-        1 <= len(pieces) <= 3
-    ), "Path should be composed of between 1-3 \
-                                components (base_id, table_id, view_id)."
-
-    for i, piece in enumerate(pieces):
-        try:
-            assert piece[:3] == airtable_id_prefixes[i]
-        except AssertionError:
-            raise_airtable_path_error(piece)
-
-
-@dataclass
-class AirtableSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    """Fetches tables or views from an Airtable org."""
-
-    connector_config: SimpleAirtableConfig
-    _api: t.Optional["Api"] = field(init=False, default=None)
-
-    @property
-    def api(self):
-        if self._api is None:
-            self._api = Api(self.connector_config.access_config.personal_access_token)
-        return self._api
-
-    @api.setter
-    def api(self, api: "Api"):
-        self._api = api
-
-    def check_connection(self):
-        import requests
-
-        try:
-            self.api.request(method="HEAD", url=self.api.build_url("meta", "bases"))
-        except requests.HTTPError as http_error:
-            logger.error(f"failed to validate connection: {http_error}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {http_error}")
-
-    @requires_dependencies(["pyairtable"], extras="airtable")
-    def initialize(self):
-        from pyairtable import Api
-
-        self.base_ids_to_fetch_tables_from = []
-        if self.connector_config.list_of_paths:
-            self.list_of_paths = self.connector_config.list_of_paths.split()
-
-        self.api = Api(self.connector_config.access_config.personal_access_token)
-
-    @requires_dependencies(["pyairtable"], extras="airtable")
-    def use_all_bases(self):
-        from pyairtable.metadata import get_api_bases
-
-        self.base_ids_to_fetch_tables_from = [
-            base["id"] for base in get_api_bases(self.api)["bases"]
-        ]
-
-    @requires_dependencies(["pyairtable"], extras="airtable")
-    def fetch_table_ids(self):
-        from pyairtable.metadata import get_base_schema
-
-        bases = [
-            (base_id, self.api.base(base_id)) for base_id in self.base_ids_to_fetch_tables_from
-        ]
-
-        metadata_for_each_base = [
-            (base_id, get_base_schema(base)["tables"]) for base_id, base in bases
-        ]
-
-        baseid_tableid_viewid_tuples = [
-            (base_id, table["id"], None)
-            for base_id, base_metadata in metadata_for_each_base
-            for table in base_metadata
-        ]
-
-        return baseid_tableid_viewid_tuples
-
-    def get_ingest_docs(self):
-        """Fetches documents in an Airtable org."""
-
-        # When no list of paths provided, the connector ingests everything.
-        if not self.connector_config.list_of_paths:
-            self.use_all_bases()
-            baseid_tableid_viewid_tuples = self.fetch_table_ids()
-
-        # When there is a list of paths, the connector checks the validity
-        # of the paths, and fetches table_ids to be ingested, based on the paths.
-        else:
-            self.paths = self.connector_config.list_of_paths.split()
-            self.paths = [path.strip("/") for path in self.paths]
-
-            [check_path_validity(path) for path in self.paths]
-
-            self.base_ids_to_fetch_tables_from = []
-            baseid_tableid_viewid_tuples = []
-
-            for path in self.paths:
-                components = path.split("/")
-                if len(components) == 1:  # only a base_id is provided
-                    self.base_ids_to_fetch_tables_from.append(components[0])
-                elif len(components) == 2:  # a base_id and a table_id are provided
-                    baseid_tableid_viewid_tuples.append((components[0], components[1], None))
-                elif len(components) == 3:  # a base_id, table_id, and a view_id are provided
-                    baseid_tableid_viewid_tuples.append(
-                        (components[0], components[1], components[2]),
-                    )
-
-            baseid_tableid_viewid_tuples += self.fetch_table_ids()
-        return [
-            AirtableIngestDoc(
-                processor_config=self.processor_config,
-                connector_config=self.connector_config,
-                read_config=self.read_config,
-                table_meta=AirtableTableMeta(base_id, table_id, view_id),
-            )
-            for base_id, table_id, view_id in baseid_tableid_viewid_tuples
-        ]
diff --git a/unstructured/ingest/connector/astradb.py b/unstructured/ingest/connector/astradb.py
deleted file mode 100644
index 2642ea191..000000000
--- a/unstructured/ingest/connector/astradb.py
+++ /dev/null
@@ -1,238 +0,0 @@
-import copy
-import typing as t
-from dataclasses import dataclass, field
-from pathlib import Path
-
-from unstructured import __name__ as integration_name
-from unstructured.__version__ import __version__ as integration_version
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.enhanced_dataclass.core import _asdict
-from unstructured.ingest.error import DestinationConnectionError, SourceConnectionError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-    WriteConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.ingest.utils.data_prep import batch_generator
-from unstructured.staging.base import flatten_dict
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from astrapy.db import AstraDB, AstraDBCollection
-
-NON_INDEXED_FIELDS = ["metadata._node_content", "content"]
-
-
-@dataclass
-class AstraDBAccessConfig(AccessConfig):
-    token: str = enhanced_field(sensitive=True)
-    api_endpoint: str = enhanced_field(sensitive=True)
-
-
-@dataclass
-class SimpleAstraDBConfig(BaseConnectorConfig):
-    access_config: AstraDBAccessConfig
-    collection_name: str
-    namespace: t.Optional[str] = None
-
-
-@dataclass
-class AstraDBIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    connector_config: SimpleAstraDBConfig
-    metadata: t.Dict[str, str] = field(default_factory=dict)
-    registry_name: str = "astradb"
-
-    @property
-    def filename(self):
-        return (
-            Path(self.read_config.download_dir)
-            / self.connector_config.collection_name
-            / f"{self.metadata['_id']}.txt"
-        ).resolve()
-
-    @property
-    def _output_filename(self):
-        return (
-            Path(self.processor_config.output_dir)
-            / self.connector_config.collection_name
-            / f"{self.metadata['_id']}.json"
-        ).resolve()
-
-    def update_source_metadata(self, **kwargs):
-        if not self.metadata:
-            self.source_metadata = SourceMetadata(
-                exists=False,
-            )
-            return
-        self.source_metadata = SourceMetadata(
-            exists=True,
-        )
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["astrapy"], extras="astradb")
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        self.filename.parent.mkdir(parents=True, exist_ok=True)
-
-        flattened_dict = flatten_dict(dictionary=self.metadata)
-        str_values = [str(value) for value in flattened_dict.values()]
-        concatenated_values = "\n".join(str_values)
-
-        with open(self.filename, "w") as f:
-            f.write(concatenated_values)
-
-
-@dataclass
-class AstraDBSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    connector_config: SimpleAstraDBConfig
-    _astra_db: t.Optional["AstraDB"] = field(init=False, default=None)
-    _astra_db_collection: t.Optional["AstraDBCollection"] = field(init=False, default=None)
-
-    @property
-    @requires_dependencies(["astrapy"], extras="astradb")
-    def astra_db_collection(self) -> "AstraDBCollection":
-        if self._astra_db_collection is None:
-            from astrapy.db import AstraDB
-
-            # Build the Astra DB object.
-            # caller_name/version for Astra DB tracking
-            self._astra_db = AstraDB(
-                api_endpoint=self.connector_config.access_config.api_endpoint,
-                token=self.connector_config.access_config.token,
-                namespace=self.connector_config.namespace,
-                caller_name=integration_name,
-                caller_version=integration_version,
-            )
-
-            # Create and connect to the collection
-            self._astra_db_collection = self._astra_db.collection(
-                collection_name=self.connector_config.collection_name,
-            )
-        return self._astra_db_collection  # type: ignore
-
-    @requires_dependencies(["astrapy"], extras="astradb")
-    @SourceConnectionError.wrap  # type: ignore
-    def initialize(self):
-        _ = self.astra_db_collection
-
-    @requires_dependencies(["astrapy"], extras="astradb")
-    def check_connection(self):
-        try:
-            _ = self.astra_db_collection
-        except Exception as e:
-            logger.error(f"Failed to validate connection {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    @requires_dependencies(["astrapy"], extras="astradb")
-    def get_ingest_docs(self):  # type: ignore
-        # Perform the find operation
-        astra_docs = list(self.astra_db_collection.paginated_find())
-
-        doc_list = []
-        for record in astra_docs:
-            doc = AstraDBIngestDoc(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                metadata=record,
-            )
-
-            doc.update_source_metadata()
-
-            doc_list.append(doc)
-
-        return doc_list
-
-
-@dataclass
-class AstraDBWriteConfig(WriteConfig):
-    embedding_dimension: int
-    requested_indexing_policy: t.Optional[t.Dict[str, t.Any]] = None
-    batch_size: int = 20
-
-
-@dataclass
-class AstraDBDestinationConnector(BaseDestinationConnector):
-    write_config: AstraDBWriteConfig
-    connector_config: SimpleAstraDBConfig
-    _astra_db: t.Optional["AstraDB"] = field(init=False, default=None)
-    _astra_db_collection: t.Optional["AstraDBCollection"] = field(init=False, default=None)
-
-    def to_dict(self, **kwargs):
-        """
-        The _astra_db_collection variable in this dataclass breaks deepcopy due to:
-        TypeError: cannot pickle '_thread.lock' object
-        When serializing, remove it, meaning client data will need to be reinitialized
-        when deserialized
-        """
-        self_cp = copy.copy(self)
-
-        if hasattr(self_cp, "_astra_db_collection"):
-            setattr(self_cp, "_astra_db_collection", None)
-
-        return _asdict(self_cp, **kwargs)
-
-    @property
-    @requires_dependencies(["astrapy"], extras="astradb")
-    def astra_db_collection(self) -> "AstraDBCollection":
-        if self._astra_db_collection is None:
-            from astrapy.db import AstraDB
-
-            collection_name = self.connector_config.collection_name
-            embedding_dimension = self.write_config.embedding_dimension
-
-            # If the user has requested an indexing policy, pass it to the Astra DB
-            requested_indexing_policy = self.write_config.requested_indexing_policy
-            options = {"indexing": requested_indexing_policy} if requested_indexing_policy else None
-
-            # caller_name/version for Astra DB tracking
-            self._astra_db = AstraDB(
-                api_endpoint=self.connector_config.access_config.api_endpoint,
-                token=self.connector_config.access_config.token,
-                namespace=self.connector_config.namespace,
-                caller_name=integration_name,
-                caller_version=integration_version,
-            )
-
-            # Create and connect to the newly created collection
-            self._astra_db_collection = self._astra_db.create_collection(
-                collection_name=collection_name,
-                dimension=embedding_dimension,
-                options=options,
-            )
-        return self._astra_db_collection
-
-    @requires_dependencies(["astrapy"], extras="astradb")
-    @DestinationConnectionError.wrap
-    def initialize(self):
-        _ = self.astra_db_collection
-
-    @requires_dependencies(["astrapy"], extras="astradb")
-    def check_connection(self):
-        try:
-            _ = self.astra_db_collection
-        except Exception as e:
-            logger.error(f"Failed to validate connection {e}", exc_info=True)
-            raise DestinationConnectionError(f"failed to validate connection: {e}")
-
-    def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
-        logger.info(f"Inserting / updating {len(elements_dict)} documents to Astra DB.")
-
-        astra_batch_size = self.write_config.batch_size
-
-        for batch in batch_generator(elements_dict, astra_batch_size):
-            self._astra_db_collection.insert_many(batch)
-
-    def normalize_dict(self, element_dict: dict) -> dict:
-        return {
-            "$vector": element_dict.pop("embeddings", None),
-            "content": element_dict.pop("text", None),
-            "metadata": element_dict,
-        }
diff --git a/unstructured/ingest/connector/azure_cognitive_search.py b/unstructured/ingest/connector/azure_cognitive_search.py
deleted file mode 100644
index fc932eb5e..000000000
--- a/unstructured/ingest/connector/azure_cognitive_search.py
+++ /dev/null
@@ -1,142 +0,0 @@
-import json
-import typing as t
-import uuid
-from dataclasses import dataclass, field
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import DestinationConnectionError, WriteError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-    WriteConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from azure.search.documents import SearchClient
-
-
-@dataclass
-class AzureCognitiveSearchAccessConfig(AccessConfig):
-    key: str = enhanced_field(sensitive=True)
-
-
-@dataclass
-class SimpleAzureCognitiveSearchStorageConfig(BaseConnectorConfig):
-    endpoint: str
-    access_config: AzureCognitiveSearchAccessConfig
-
-
-@dataclass
-class AzureCognitiveSearchWriteConfig(WriteConfig):
-    index: str
-
-
-@dataclass
-class AzureCognitiveSearchDestinationConnector(BaseDestinationConnector):
-    write_config: AzureCognitiveSearchWriteConfig
-    connector_config: SimpleAzureCognitiveSearchStorageConfig
-    _client: t.Optional["SearchClient"] = field(init=False, default=None)
-
-    @requires_dependencies(["azure.search"], extras="azure-cognitive-search")
-    def generate_client(self) -> "SearchClient":
-        from azure.core.credentials import AzureKeyCredential
-        from azure.search.documents import SearchClient
-
-        # Create a client
-        credential = AzureKeyCredential(self.connector_config.access_config.key)
-        return SearchClient(
-            endpoint=self.connector_config.endpoint,
-            index_name=self.write_config.index,
-            credential=credential,
-        )
-
-    @property
-    def client(self) -> "SearchClient":
-        if self._client is None:
-            self._client = self.generate_client()
-        return self._client
-
-    def check_connection(self):
-        try:
-            self.client.get_document_count()
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise DestinationConnectionError(f"failed to validate connection: {e}")
-
-    def initialize(self):
-        _ = self.client
-
-    def conform_dict(self, data: dict) -> None:
-        """
-        updates the dictionary that is from each Element being converted into a dict/json
-        into a dictionary that conforms to the schema expected by the
-        Azure Cognitive Search index
-        """
-        from dateutil import parser  # type: ignore
-
-        data["id"] = str(uuid.uuid4())
-
-        if points := data.get("metadata", {}).get("coordinates", {}).get("points"):
-            data["metadata"]["coordinates"]["points"] = json.dumps(points)
-        if version := data.get("metadata", {}).get("data_source", {}).get("version"):
-            data["metadata"]["data_source"]["version"] = str(version)
-        if record_locator := data.get("metadata", {}).get("data_source", {}).get("record_locator"):
-            data["metadata"]["data_source"]["record_locator"] = json.dumps(record_locator)
-        if permissions_data := (
-            data.get("metadata", {}).get("data_source", {}).get("permissions_data")
-        ):
-            data["metadata"]["data_source"]["permissions_data"] = json.dumps(permissions_data)
-        if links := data.get("metadata", {}).get("links"):
-            data["metadata"]["links"] = [json.dumps(link) for link in links]
-        if last_modified := data.get("metadata", {}).get("last_modified"):
-            data["metadata"]["last_modified"] = parser.parse(last_modified).strftime(
-                "%Y-%m-%dT%H:%M:%S.%fZ",
-            )
-        if date_created := data.get("metadata", {}).get("data_source", {}).get("date_created"):
-            data["metadata"]["data_source"]["date_created"] = parser.parse(date_created).strftime(
-                "%Y-%m-%dT%H:%M:%S.%fZ",
-            )
-        if date_modified := data.get("metadata", {}).get("data_source", {}).get("date_modified"):
-            data["metadata"]["data_source"]["date_modified"] = parser.parse(date_modified).strftime(
-                "%Y-%m-%dT%H:%M:%S.%fZ",
-            )
-        if date_processed := data.get("metadata", {}).get("data_source", {}).get("date_processed"):
-            data["metadata"]["data_source"]["date_processed"] = parser.parse(
-                date_processed,
-            ).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
-        if page_number := data.get("metadata", {}).get("page_number"):
-            data["metadata"]["page_number"] = str(page_number)
-
-    @requires_dependencies(["azure"], extras="azure-cognitive-search")
-    def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
-        import azure.core.exceptions
-
-        logger.info(
-            f"writing {len(elements_dict)} documents to destination "
-            f"index at {self.write_config.index}",
-        )
-        try:
-            results = self.client.upload_documents(documents=elements_dict)
-
-        except azure.core.exceptions.HttpResponseError as http_error:
-            raise WriteError(f"http error: {http_error}") from http_error
-        errors = []
-        success = []
-        for result in results:
-            if result.succeeded:
-                success.append(result)
-            else:
-                errors.append(result)
-        logger.debug(f"results: {len(success)} successes, {len(errors)} failures")
-        if errors:
-            raise WriteError(
-                ", ".join(
-                    [
-                        f"{error.key}: [{error.status_code}] {error.error_message}"
-                        for error in errors
-                    ],
-                ),
-            )
diff --git a/unstructured/ingest/connector/biomed.py b/unstructured/ingest/connector/biomed.py
deleted file mode 100644
index 7371699e3..000000000
--- a/unstructured/ingest/connector/biomed.py
+++ /dev/null
@@ -1,313 +0,0 @@
-import os
-import typing as t
-import urllib.request
-from dataclasses import dataclass
-from ftplib import FTP, error_perm
-from pathlib import Path
-
-import requests
-from requests.adapters import HTTPAdapter
-
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.interfaces import (
-    BaseConnectorConfig,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import (
-    validate_date_args,
-)
-
-DOMAIN = "ftp.ncbi.nlm.nih.gov"
-FTP_DOMAIN = f"ftp://{DOMAIN}"
-PMC_DIR = "pub/pmc"
-PDF_DIR = "oa_pdf"
-
-
-@dataclass
-class BiomedFileMeta:
-    ftp_path: str
-    download_filepath: str
-    output_filepath: str
-
-
-@dataclass
-class SimpleBiomedConfig(BaseConnectorConfig):
-    """Connector config where path is the FTP directory path and
-    id_, from_, until, format are API parameters."""
-
-    path: t.Optional[str] = None
-    # OA Web Service API Options
-    api_id: t.Optional[str] = None
-    api_from: t.Optional[str] = None
-    api_until: t.Optional[str] = None
-    max_request_time: int = 45
-
-    def validate_api_inputs(self):
-        valid = False
-
-        if self.api_from:
-            valid = validate_date_args(self.api_from)
-
-        if self.api_until:
-            valid = validate_date_args(self.api_until)
-
-        return valid
-
-    def __post_init__(self):
-        self.is_file = False
-        self.is_dir = False
-        self.is_api = False
-
-        if not self.path:
-            is_valid = self.validate_api_inputs()
-            if not is_valid:
-                raise ValueError(
-                    "Path argument or at least one of the "
-                    "OA Web Service arguments MUST be provided.",
-                )
-
-            self.is_api = True
-        else:
-            self.path = self.path.strip("/")
-            is_valid = self.path.lower().startswith(PDF_DIR)
-
-            if not is_valid:
-                raise ValueError(f"Path MUST start with {PDF_DIR}")
-
-            ftp = FTP(DOMAIN)
-            ftp.login()
-
-            path = Path(PMC_DIR) / self.path
-            response = ""
-            try:
-                if path.suffix == ".pdf":
-                    response = ftp.cwd(str(path.parent))
-                    self.is_file = True
-                else:
-                    response = ftp.cwd(str(path))
-            except error_perm as exc:
-                if "no such file or directory" in exc.args[0].lower():
-                    raise ValueError(f"The path: {path} is not valid.")
-                elif "not a directory" in exc.args[0].lower():
-                    self.is_file = True
-                elif "command successful" in response:
-                    self.is_dir = True
-                else:
-                    raise ValueError(
-                        "Something went wrong when validating the path: {path}.",
-                    )
-
-
-@dataclass
-class BiomedIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    connector_config: SimpleBiomedConfig
-    file_meta: BiomedFileMeta
-    registry_name: str = "biomed"
-
-    @property
-    def filename(self):
-        return Path(self.file_meta.download_filepath).resolve()  # type: ignore
-
-    @property
-    def _output_filename(self):
-        return Path(f"{self.file_meta.output_filepath}.json").resolve()
-
-    def cleanup_file(self):
-        if (
-            not self.read_config.preserve_downloads
-            and self.filename.is_file()
-            and not self.read_config.download_only
-        ):
-            logger.debug(f"Cleaning up {self}")
-            Path.unlink(self.filename)
-
-    @SourceConnectionError.wrap
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        download_path = self.file_meta.download_filepath  # type: ignore
-        dir_ = Path(os.path.dirname(download_path))  # type: ignore
-        if not dir_.is_dir():
-            logger.debug(f"Creating directory: {dir_}")
-
-            if dir_:
-                dir_.mkdir(parents=True, exist_ok=True)
-        self._retrieve()
-        logger.debug(f"File downloaded: {self.file_meta.download_filepath}")
-
-    @SourceConnectionNetworkError.wrap
-    def _retrieve(self):
-        urllib.request.urlretrieve(
-            self.file_meta.ftp_path,  # type: ignore
-            self.file_meta.download_filepath,
-        )
-
-
-class BiomedSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    """Objects of this class support fetching documents from Biomedical literature FTP directory"""
-
-    connector_config: SimpleBiomedConfig
-
-    def get_base_endpoints_url(self) -> str:
-        endpoint_url = "https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi?format=pdf"
-
-        if self.connector_config.api_id:
-            endpoint_url += f"&id={self.connector_config.api_id}"
-
-        if self.connector_config.api_from:
-            endpoint_url += f"&from={self.connector_config.api_from}"
-
-        if self.connector_config.api_until:
-            endpoint_url += f"&until={self.connector_config.api_until}"
-
-        return endpoint_url
-
-    def _list_objects_api(self) -> t.List[BiomedFileMeta]:
-        from bs4 import BeautifulSoup
-
-        def urls_to_metadata(urls):
-            files = []
-            for url in urls:
-                parts = url.split(PDF_DIR)
-                if len(parts) > 1:
-                    local_path = parts[1].strip("/")
-                    files.append(
-                        BiomedFileMeta(
-                            ftp_path=url,
-                            download_filepath=(Path(self.read_config.download_dir) / local_path)
-                            .resolve()
-                            .as_posix(),
-                            output_filepath=(Path(self.processor_config.output_dir) / local_path)
-                            .resolve()
-                            .as_posix(),
-                        ),
-                    )
-
-            return files
-
-        files: t.List[BiomedFileMeta] = []
-
-        endpoint_url = self.get_base_endpoints_url()
-
-        while endpoint_url:
-            session = requests.Session()
-            adapter = HTTPAdapter()
-            session.mount("http://", adapter)
-            session.mount("https://", adapter)
-            response = self._get_request(session=session, endpoint_url=endpoint_url)
-            soup = BeautifulSoup(response.content, features="lxml")
-            urls = [link["href"] for link in soup.find_all("link")]
-
-            if not urls:
-                return files
-
-            endpoint_url = urls[-1] if "resumptiontoken" in urls[-1].lower() else None
-            if endpoint_url:
-                urls = urls[:-1]
-
-            files.extend(urls_to_metadata(urls))
-
-        return files
-
-    @SourceConnectionNetworkError.wrap
-    def _get_request(self, session: requests.Session, endpoint_url: str) -> requests.Response:
-        return session.get(endpoint_url, timeout=self.connector_config.max_request_time)
-
-    def _list_objects(self) -> t.List[BiomedFileMeta]:
-        files = []
-
-        # Conform to mypy, null check performed elsewhere.
-        # Wouldn't be in this method unless self.config.path exists
-        path: str = self.connector_config.path if self.connector_config.path else ""
-
-        def traverse(path, download_dir, output_dir):
-            full_path = Path(PMC_DIR) / path
-            logger.debug(f"Traversing directory: {full_path}")
-
-            ftp = FTP(DOMAIN)
-            ftp.login()
-
-            try:
-                response = ftp.cwd(str(full_path))
-            except error_perm:
-                raise ValueError(f"{full_path} is not a valid directory.")
-
-            if "command successful" in response.lower():
-                sub_paths = [path / p for p in ftp.nlst()]
-
-                if not sub_paths:
-                    return
-
-                ext = Path(sub_paths[0]).suffix
-                if ext:
-                    for sub_path in sub_paths:
-                        ftp_path = f"{FTP_DOMAIN}/{PMC_DIR}/{sub_path}"
-                        local_path = "/".join(str(sub_path).split("/")[1:])
-                        files.append(
-                            BiomedFileMeta(
-                                ftp_path=ftp_path,
-                                download_filepath=(Path(self.read_config.download_dir) / local_path)
-                                .resolve()
-                                .as_posix(),
-                                output_filepath=(
-                                    Path(self.processor_config.output_dir) / local_path
-                                )
-                                .resolve()
-                                .as_posix(),
-                            ),
-                        )
-
-                else:
-                    for sub_path in sub_paths:
-                        traverse(sub_path, download_dir, output_dir)
-
-            else:
-                raise ValueError(f"{full_path} is not a valid directory.")
-
-        ftp_path = f"{FTP_DOMAIN}/{PMC_DIR}/{self.connector_config.path}"
-        if self.connector_config.is_file:
-            local_path = "/".join(path.split("/")[1:])
-            return [
-                BiomedFileMeta(
-                    ftp_path=ftp_path,
-                    download_filepath=(Path(self.read_config.download_dir) / local_path)
-                    .resolve()
-                    .as_posix(),
-                    output_filepath=(Path(self.processor_config.output_dir) / local_path)
-                    .resolve()
-                    .as_posix(),
-                ),
-            ]
-        else:
-            traverse(
-                Path(path),
-                Path(self.read_config.download_dir),
-                Path(self.processor_config.output_dir),
-            )
-
-        return files
-
-    def initialize(self):
-        pass
-
-    def check_connection(self):
-        resp = requests.head(self.get_base_endpoints_url())
-        try:
-            resp.raise_for_status()
-        except requests.HTTPError as http_error:
-            raise SourceConnectionError(f"failed to validate connection: {http_error}")
-
-    def get_ingest_docs(self):
-        files = self._list_objects_api() if self.connector_config.is_api else self._list_objects()
-        return [
-            BiomedIngestDoc(
-                processor_config=self.processor_config,
-                connector_config=self.connector_config,
-                read_config=self.read_config,
-                file_meta=file,
-            )
-            for file in files
-        ]
diff --git a/unstructured/ingest/connector/chroma.py b/unstructured/ingest/connector/chroma.py
deleted file mode 100644
index 547b988a2..000000000
--- a/unstructured/ingest/connector/chroma.py
+++ /dev/null
@@ -1,159 +0,0 @@
-import copy
-import typing as t
-import uuid
-from dataclasses import dataclass
-
-from unstructured.ingest.enhanced_dataclass.core import _asdict
-from unstructured.ingest.error import DestinationConnectionError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-    WriteConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.ingest.utils.data_prep import batch_generator
-from unstructured.staging.base import flatten_dict
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from chromadb import Collection as ChromaCollection
-
-
-@dataclass
-class ChromaAccessConfig(AccessConfig):
-    settings: t.Optional[t.Dict[str, str]] = None
-    headers: t.Optional[t.Dict[str, str]] = None
-
-
-@dataclass
-class SimpleChromaConfig(BaseConnectorConfig):
-    access_config: ChromaAccessConfig
-    collection_name: str
-    path: t.Optional[str] = None
-    tenant: t.Optional[str] = "default_tenant"
-    database: t.Optional[str] = "default_database"
-    host: t.Optional[str] = None
-    port: t.Optional[int] = None
-    ssl: bool = False
-
-
-@dataclass
-class ChromaWriteConfig(WriteConfig):
-    batch_size: int = 100
-
-
-@dataclass
-class ChromaDestinationConnector(BaseDestinationConnector):
-    write_config: ChromaWriteConfig
-    connector_config: SimpleChromaConfig
-    _collection: t.Optional["ChromaCollection"] = None
-
-    @property
-    def chroma_collection(self):
-        if self._collection is None:
-            self._collection = self.create_collection()
-        return self._collection
-
-    def initialize(self):
-        pass
-
-    @DestinationConnectionError.wrap
-    def check_connection(self):
-        _ = self.chroma_collection
-
-    def to_dict(self, **kwargs):
-        """
-        The _collection variable in this dataclass breaks deepcopy due to:
-        TypeError: cannot pickle 'module' object
-        When serializing, remove it, meaning collection data will need to be reinitialized
-        when deserialized
-        """
-        self_cp = copy.copy(self)
-        if hasattr(self_cp, "_collection"):
-            setattr(self_cp, "_collection", None)
-        return _asdict(self_cp, **kwargs)
-
-    @requires_dependencies(["chromadb"], extras="chroma")
-    def create_collection(self) -> "ChromaCollection":
-        import chromadb
-
-        if self.connector_config.path:
-            chroma_client = chromadb.PersistentClient(
-                path=self.connector_config.path,
-                settings=self.connector_config.settings,
-                tenant=self.connector_config.tenant,
-                database=self.connector_config.database,
-            )
-
-        elif self.connector_config.host and self.connector_config.port:
-            chroma_client = chromadb.HttpClient(
-                host=self.connector_config.host,
-                port=self.connector_config.port,
-                ssl=self.connector_config.ssl,
-                headers=self.connector_config.access_config.headers,
-                settings=self.connector_config.access_config.settings,
-                tenant=self.connector_config.tenant,
-                database=self.connector_config.database,
-            )
-        else:
-            raise ValueError("Chroma connector requires either path or host and port to be set.")
-
-        collection = chroma_client.get_or_create_collection(
-            name=self.connector_config.collection_name
-        )
-        return collection
-
-    @DestinationConnectionError.wrap
-    @requires_dependencies(["chromadb"], extras="chroma")
-    def upsert_batch(self, batch):
-        collection = self.chroma_collection
-
-        try:
-            # Chroma wants lists even if there is only one element
-            # Upserting to prevent duplicates
-            collection.upsert(
-                ids=batch["ids"],
-                documents=batch["documents"],
-                embeddings=batch["embeddings"],
-                metadatas=batch["metadatas"],
-            )
-        except Exception as e:
-            raise ValueError(f"chroma error: {e}") from e
-
-    @staticmethod
-    def prepare_chroma_list(chunk: t.Tuple[t.Dict[str, t.Any]]) -> t.Dict[str, t.List[t.Any]]:
-        """Helper function to break a tuple of dicts into list of parallel lists for ChromaDb.
-        ({'id':1}, {'id':2}, {'id':3}) -> {'ids':[1,2,3]}"""
-        chroma_dict = {}
-        chroma_dict["ids"] = [x.get("id") for x in chunk]
-        chroma_dict["documents"] = [x.get("document") for x in chunk]
-        chroma_dict["embeddings"] = [x.get("embedding") for x in chunk]
-        chroma_dict["metadatas"] = [x.get("metadata") for x in chunk]
-        # Make sure all lists are of the same length
-        assert (
-            len(chroma_dict["ids"])
-            == len(chroma_dict["documents"])
-            == len(chroma_dict["embeddings"])
-            == len(chroma_dict["metadatas"])
-        )
-        return chroma_dict
-
-    def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
-        logger.info(f"Inserting / updating {len(elements_dict)} documents to destination ")
-
-        chroma_batch_size = self.write_config.batch_size
-
-        for chunk in batch_generator(elements_dict, chroma_batch_size):
-            self.upsert_batch(self.prepare_chroma_list(chunk))
-
-    def normalize_dict(self, element_dict: dict) -> dict:
-        element_id = element_dict.get("element_id", str(uuid.uuid4()))
-        return {
-            "id": element_id,
-            "embedding": element_dict.pop("embeddings", None),
-            "document": element_dict.pop("text", None),
-            "metadata": flatten_dict(
-                element_dict, separator="-", flatten_lists=True, remove_none=True
-            ),
-        }
diff --git a/unstructured/ingest/connector/clarifai.py b/unstructured/ingest/connector/clarifai.py
deleted file mode 100644
index 1c1e06412..000000000
--- a/unstructured/ingest/connector/clarifai.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import typing as t
-import uuid
-from dataclasses import dataclass, field
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import DestinationConnectionError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-    WriteConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.staging.base import flatten_dict
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from clarifai.client.input import Inputs
-
-
-@dataclass
-class ClarifaiAccessConfig(AccessConfig):
-    api_key: str = enhanced_field(sensitive=True)
-
-
-@dataclass
-class SimpleClarifaiConfig(BaseConnectorConfig):
-    access_config: ClarifaiAccessConfig
-    app_id: str
-    user_id: str
-    dataset_id: t.Optional[str] = None
-
-
-@dataclass
-class ClarifaiWriteConfig(WriteConfig):
-    batch_size: int = 50
-
-
-@dataclass
-class ClarifaiDestinationConnector(BaseDestinationConnector):
-    write_config: ClarifaiWriteConfig
-    connector_config: SimpleClarifaiConfig
-    _client: t.Optional["Inputs"] = field(init=False, default=None)
-
-    @property
-    @requires_dependencies(["clarifai"], extras="clarifai")
-    def client(self) -> "Inputs":
-        if self._client is None:
-            from clarifai.client.input import Inputs
-
-            access_conf = self.connector_config.access_config
-            try:
-                if access_conf.api_key is not None:
-                    clarifai_pat = access_conf.api_key
-            except Exception as e:
-                raise (f"please provide clarifai PAT key : {e}")
-
-            self._client = Inputs(
-                app_id=self.connector_config.app_id,
-                user_id=self.connector_config.user_id,
-                pat=clarifai_pat,
-            )
-        return self._client
-
-    @requires_dependencies(["clarifai"], extras="clarifai")
-    @DestinationConnectionError.wrap
-    def initialize(self):
-        _ = self.client
-
-    def check_connection(self):
-        try:
-            _ = [inp for inp in self.client.list_inputs(page_no=1, per_page=1)]  # noqa: C416
-        except Exception as e:
-            logger.error(f"Failed to validate connection {e}", exc_info=True)
-            raise DestinationConnectionError(f"failed to validate connection: {e}")
-
-    def normalize_dict(self, element_dict: dict) -> dict:
-        """Modifying schema of the dict in order to compile with clarifai input formats"""
-        return {
-            "input_id": str(uuid.uuid4().hex),
-            "text": element_dict.pop("text", None),
-            "metadata": {
-                **flatten_dict(
-                    element_dict,
-                    separator="_",
-                    flatten_lists=True,
-                    remove_none=True,
-                ),
-            },
-        }
-
-    def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
-        from google.protobuf.struct_pb2 import Struct
-
-        logger.info(
-            f"writing {len(elements_dict)} objects to destination "
-            f"app {self.connector_config.app_id} "
-        )
-        try:
-            batch_size = self.write_config.batch_size
-            for idx in range(0, len(elements_dict), batch_size):
-                batch_dict = elements_dict[idx : batch_size + idx]
-                input_batch = []
-                for elem in batch_dict:
-                    meta_struct = Struct()
-                    meta_struct.update(elem["metadata"])
-                    input_batch.append(
-                        self._client.get_text_input(
-                            input_id=elem["input_id"],
-                            raw_text=elem["text"],
-                            dataset_id=self.connector_config.dataset_id,
-                            metadata=meta_struct,
-                        )
-                    )
-                result_id = self._client.upload_inputs(inputs=input_batch)
-                logger.debug(
-                    f"Input posted successfully into {self.connector_config.app_id}. \
-                    Result id: {result_id}"
-                )
-
-        except Exception as e:
-            raise e
diff --git a/unstructured/ingest/connector/confluence.py b/unstructured/ingest/connector/confluence.py
deleted file mode 100644
index 4e1369349..000000000
--- a/unstructured/ingest/connector/confluence.py
+++ /dev/null
@@ -1,285 +0,0 @@
-import math
-import typing as t
-from dataclasses import dataclass, field
-from datetime import datetime
-from pathlib import Path
-
-import requests
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from atlassian import Confluence
-
-
-@dataclass
-class ConfluenceAccessConfig(AccessConfig):
-    api_token: str = enhanced_field(sensitive=True)
-
-
-@dataclass
-class SimpleConfluenceConfig(BaseConnectorConfig):
-    """Connector config where:
-    user_email is the email to authenticate into Confluence Cloud,
-    api_token is the api token to authenticate into Confluence Cloud,
-    and url is the URL pointing to the Confluence Cloud instance.
-
-    Check https://developer.atlassian.com/cloud/confluence/basic-auth-for-rest-apis/
-    for more info on the api_token.
-    """
-
-    user_email: str
-    access_config: ConfluenceAccessConfig
-    url: str
-    max_num_of_spaces: int = 500
-    max_num_of_docs_from_each_space: int = 100
-    spaces: t.List[str] = field(default_factory=list)
-
-
-@dataclass
-class ConfluenceDocumentMeta:
-    """Metadata specifying:
-    id for the confluence space that the document locates in,
-    and the id of document that is being reached to.
-    """
-
-    space_id: str
-    document_id: str
-
-
-def scroll_wrapper(func):
-    def wrapper(*args, **kwargs):
-        """Wraps a function to obtain scroll functionality."""
-        number_of_items_to_fetch = kwargs["number_of_items_to_fetch"]
-        del kwargs["number_of_items_to_fetch"]
-
-        kwargs["limit"] = min(100, number_of_items_to_fetch)
-        kwargs["start"] = kwargs.get("start", 0)
-
-        all_results = []
-        num_iterations = math.ceil(number_of_items_to_fetch / kwargs["limit"])
-
-        for _ in range(num_iterations):
-            response = func(*args, **kwargs)
-            if isinstance(response, list):
-                all_results += func(*args, **kwargs)
-            elif isinstance(response, dict):
-                all_results += func(*args, **kwargs)["results"]
-
-            kwargs["start"] += kwargs["limit"]
-
-        return all_results[:number_of_items_to_fetch]
-
-    return wrapper
-
-
-@dataclass
-class ConfluenceIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing).
-
-    Current implementation creates a Confluence connection object
-    to fetch each doc, rather than creating a it for each thread.
-    """
-
-    connector_config: SimpleConfluenceConfig
-    document_meta: ConfluenceDocumentMeta
-    registry_name: str = "confluence"
-
-    # TODO: remove one of filename or _tmp_download_file, using a wrapper
-    @property
-    def filename(self):
-        if not self.read_config.download_dir:
-            return None
-        return (
-            Path(self.read_config.download_dir)
-            / self.document_meta.space_id
-            / f"{self.document_meta.document_id}.html"
-        ).resolve()
-
-    @property
-    def _output_filename(self):
-        """Create output file path based on output directory, space id and document id."""
-        output_file = f"{self.document_meta.document_id}.json"
-        return Path(self.processor_config.output_dir) / self.document_meta.space_id / output_file
-
-    @property
-    def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]:
-        return {
-            "url": self.connector_config.url,
-            "page_id": self.document_meta.document_id,
-        }
-
-    @SourceConnectionNetworkError.wrap
-    @requires_dependencies(["atlassian"], extras="Confluence")
-    def _get_page(self):
-        from atlassian import Confluence
-        from atlassian.errors import ApiError
-
-        try:
-            confluence = Confluence(
-                self.connector_config.url,
-                username=self.connector_config.user_email,
-                password=self.connector_config.access_config.api_token,
-            )
-            result = confluence.get_page_by_id(
-                page_id=self.document_meta.document_id,
-                expand="history.lastUpdated,version,body.view",
-            )
-        except ApiError as e:
-            logger.error(e)
-            return None
-        return result
-
-    def update_source_metadata(self, **kwargs):
-        """Fetches file metadata from the current page."""
-        page = kwargs.get("page", self._get_page())
-        if page is None:
-            self.source_metadata = SourceMetadata(
-                exists=False,
-            )
-            return
-        document_history = page["history"]
-        date_created = datetime.strptime(
-            document_history["createdDate"],
-            "%Y-%m-%dT%H:%M:%S.%fZ",
-        ).isoformat()
-        if last_updated := document_history.get("lastUpdated", {}).get("when", ""):
-            date_modified = datetime.strptime(
-                last_updated,
-                "%Y-%m-%dT%H:%M:%S.%fZ",
-            ).isoformat()
-        else:
-            date_modified = date_created
-        version = page["version"]["number"]
-        self.source_metadata = SourceMetadata(
-            date_created=date_created,
-            date_modified=date_modified,
-            version=version,
-            source_url=page["_links"].get("self", None),
-            exists=True,
-        )
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["atlassian"], extras="confluence")
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        # TODO: instead of having a separate connection object for each doc,
-        # have a separate connection object for each process
-
-        result = self._get_page()
-        self.update_source_metadata(page=result)
-        if result is None:
-            raise ValueError(f"Failed to retrieve page with ID {self.document_meta.document_id}")
-        self.document = result["body"]["view"]["value"]
-        self.filename.parent.mkdir(parents=True, exist_ok=True)
-        with open(self.filename, "w", encoding="utf8") as f:
-            f.write(self.document)
-
-
-@dataclass
-class ConfluenceSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    """Fetches body fields from all documents within all spaces in a Confluence Cloud instance."""
-
-    connector_config: SimpleConfluenceConfig
-    _confluence: t.Optional["Confluence"] = field(init=False, default=None)
-
-    @property
-    def confluence(self) -> "Confluence":
-        from atlassian import Confluence
-
-        if self._confluence is None:
-            self._confluence = Confluence(
-                url=self.connector_config.url,
-                username=self.connector_config.user_email,
-                password=self.connector_config.access_config.api_token,
-            )
-        return self._confluence
-
-    @requires_dependencies(["atlassian"], extras="Confluence")
-    def check_connection(self):
-        url = "rest/api/space"
-        try:
-            self.confluence.request(method="HEAD", path=url)
-        except requests.HTTPError as http_error:
-            logger.error(f"failed to validate connection: {http_error}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {http_error}")
-
-    @requires_dependencies(["atlassian"], extras="Confluence")
-    def initialize(self):
-        self.list_of_spaces = None
-        if self.connector_config.spaces:
-            self.list_of_spaces = self.connector_config.spaces
-            if self.connector_config.max_num_of_spaces:
-                logger.warning(
-                    """--confluence-list-of-spaces and --confluence-num-of-spaces cannot
-                    be used at the same time. Connector will only fetch the
-                    --confluence-list-of-spaces that you've provided.""",
-                )
-
-    @requires_dependencies(["atlassian"], extras="Confluence")
-    def _get_space_ids(self):
-        """Fetches spaces in a confluence domain."""
-
-        get_spaces_with_scroll = scroll_wrapper(self.confluence.get_all_spaces)
-
-        all_results = get_spaces_with_scroll(
-            number_of_items_to_fetch=self.connector_config.max_num_of_spaces,
-        )
-
-        space_ids = [space["key"] for space in all_results]
-        return space_ids
-
-    @requires_dependencies(["atlassian"], extras="Confluence")
-    def _get_docs_ids_within_one_space(
-        self,
-        space_id: str,
-        content_type: str = "page",
-    ):
-        get_pages_with_scroll = scroll_wrapper(self.confluence.get_all_pages_from_space)
-        results = get_pages_with_scroll(
-            space=space_id,
-            number_of_items_to_fetch=self.connector_config.max_num_of_docs_from_each_space,
-            content_type=content_type,
-        )
-
-        doc_ids = [(space_id, doc["id"]) for doc in results]
-        return doc_ids
-
-    @requires_dependencies(["atlassian"], extras="Confluence")
-    def _get_doc_ids_within_spaces(self):
-        space_ids = self._get_space_ids() if not self.list_of_spaces else self.list_of_spaces
-
-        doc_ids_all = [self._get_docs_ids_within_one_space(space_id=id) for id in space_ids]
-
-        doc_ids_flattened = [
-            (space_id, doc_id)
-            for doc_ids_space in doc_ids_all
-            for space_id, doc_id in doc_ids_space
-        ]
-        return doc_ids_flattened
-
-    def get_ingest_docs(self):
-        """Fetches all documents in a confluence space."""
-        doc_ids = self._get_doc_ids_within_spaces()
-        return [
-            ConfluenceIngestDoc(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                document_meta=ConfluenceDocumentMeta(space_id, doc_id),
-            )
-            for space_id, doc_id in doc_ids
-        ]
diff --git a/unstructured/ingest/connector/databricks_volumes.py b/unstructured/ingest/connector/databricks_volumes.py
deleted file mode 100644
index 5662d65cd..000000000
--- a/unstructured/ingest/connector/databricks_volumes.py
+++ /dev/null
@@ -1,137 +0,0 @@
-import copy
-import json
-import os
-import typing as t
-from dataclasses import dataclass, field
-from io import BytesIO
-from pathlib import PurePath
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.enhanced_dataclass.core import _asdict
-from unstructured.ingest.error import DestinationConnectionError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-    BaseSingleIngestDoc,
-    WriteConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from databricks.sdk import WorkspaceClient
-
-
-@dataclass
-class DatabricksVolumesAccessConfig(AccessConfig):
-    account_id: t.Optional[str] = None
-    username: t.Optional[str] = None
-    password: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    client_id: t.Optional[str] = None
-    client_secret: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    token: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    profile: t.Optional[str] = None
-    azure_workspace_resource_id: t.Optional[str] = None
-    azure_client_secret: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    azure_client_id: t.Optional[str] = None
-    azure_tenant_id: t.Optional[str] = None
-    azure_environment: t.Optional[str] = None
-    auth_type: t.Optional[str] = None
-    cluster_id: t.Optional[str] = None
-    google_credentials: t.Optional[str] = None
-    google_service_account: t.Optional[str] = None
-
-
-@dataclass
-class SimpleDatabricksVolumesConfig(BaseConnectorConfig):
-    access_config: DatabricksVolumesAccessConfig
-    host: t.Optional[str] = None
-
-
-@dataclass
-class DatabricksVolumesWriteConfig(WriteConfig):
-    volume: str
-    catalog: str
-    volume_path: t.Optional[str] = None
-    overwrite: bool = False
-    encoding: str = "utf-8"
-    schema: str = "default"
-
-    @property
-    def path(self) -> str:
-        path = f"/Volumes/{self.catalog}/{self.schema}/{self.volume}"
-        if self.volume_path:
-            path = f"{path}/{self.volume_path}"
-        return path
-
-
-@dataclass
-class DatabricksVolumesDestinationConnector(BaseDestinationConnector):
-    write_config: DatabricksVolumesWriteConfig
-    connector_config: SimpleDatabricksVolumesConfig
-    _client: t.Optional["WorkspaceClient"] = field(init=False, default=None)
-
-    def to_dict(self, **kwargs):
-        self_cp = copy.copy(self)
-        if hasattr(self_cp, "_client"):
-            setattr(self_cp, "_client", None)
-        return _asdict(self_cp, **kwargs)
-
-    @requires_dependencies(dependencies=["databricks.sdk"], extras="databricks-volumes")
-    def generate_client(self) -> "WorkspaceClient":
-        from databricks.sdk import WorkspaceClient
-
-        return WorkspaceClient(
-            host=self.connector_config.host, **self.connector_config.access_config.to_dict()
-        )
-
-    @property
-    def client(self) -> "WorkspaceClient":
-        if self._client is None:
-            self._client = self.generate_client()
-        return self._client
-
-    def check_connection(self):
-        try:
-            assert self.client.current_user.me().active
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise DestinationConnectionError(f"failed to validate connection: {e}")
-
-    def initialize(self):
-        _ = self.client
-
-    def write_dict(
-        self,
-        *args,
-        elements_dict: t.List[t.Dict[str, t.Any]],
-        filename: t.Optional[str] = None,
-        indent: int = 4,
-        encoding: str = "utf-8",
-        **kwargs,
-    ) -> None:
-        output_folder = self.write_config.path
-        output_folder = os.path.join(output_folder)  # Make sure folder ends with file seperator
-        filename = (
-            filename.strip(os.sep) if filename else filename
-        )  # Make sure filename doesn't begin with file seperator
-        output_path = str(PurePath(output_folder, filename)) if filename else output_folder
-        logger.debug(f"uploading content to {output_path}")
-        self.client.files.upload(
-            file_path=output_path,
-            contents=BytesIO(json.dumps(elements_dict).encode(encoding=self.write_config.encoding)),
-            overwrite=self.write_config.overwrite,
-        )
-
-    def get_elements_dict(self, docs: t.List[BaseSingleIngestDoc]) -> t.List[t.Dict[str, t.Any]]:
-        pass
-
-    def write(self, docs: t.List[BaseSingleIngestDoc]) -> None:
-        for doc in docs:
-            file_path = doc.base_output_filename
-            filename = file_path if file_path else None
-            with open(doc._output_filename) as json_file:
-                logger.debug(f"uploading content from {doc._output_filename}")
-                json_list = json.load(json_file)
-                self.write_dict(elements_dict=json_list, filename=filename)
diff --git a/unstructured/ingest/connector/delta_table.py b/unstructured/ingest/connector/delta_table.py
deleted file mode 100644
index 1382ed05d..000000000
--- a/unstructured/ingest/connector/delta_table.py
+++ /dev/null
@@ -1,203 +0,0 @@
-import os
-import typing as t
-from dataclasses import dataclass
-from datetime import datetime as dt
-from multiprocessing import Process
-from pathlib import Path
-
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.interfaces import (
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-    WriteConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from deltalake import DeltaTable
-
-
-@dataclass
-class SimpleDeltaTableConfig(BaseConnectorConfig):
-    table_uri: t.Union[str, Path]
-    version: t.Optional[int] = None
-    storage_options: t.Optional[t.Dict[str, str]] = None
-    without_files: bool = False
-
-
-@dataclass
-class DeltaTableIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    connector_config: SimpleDeltaTableConfig
-    uri: str
-    modified_date: str
-    created_at: str
-    registry_name: str = "delta-table"
-
-    def uri_filename(self) -> str:
-        basename = os.path.basename(self.uri)
-        return os.path.splitext(basename)[0]
-
-    @property
-    def filename(self):
-        return (Path(self.read_config.download_dir) / f"{self.uri_filename()}.csv").resolve()
-
-    @property
-    def _output_filename(self):
-        """Create filename document id combined with a hash of the query to uniquely identify
-        the output file."""
-        return Path(self.processor_config.output_dir) / f"{self.uri_filename()}.json"
-
-    def _create_full_tmp_dir_path(self):
-        self.filename.parent.mkdir(parents=True, exist_ok=True)
-        self._output_filename.parent.mkdir(parents=True, exist_ok=True)
-
-    @requires_dependencies(["fsspec"], extras="delta-table")
-    def _get_fs_from_uri(self):
-        from fsspec.core import url_to_fs
-
-        try:
-            fs, _ = url_to_fs(self.uri)
-        except ImportError as error:
-            raise ImportError(
-                f"uri {self.uri} may be associated with a filesystem that "
-                f"requires additional dependencies: {error}",
-            )
-        return fs
-
-    def update_source_metadata(self, **kwargs):
-        fs = kwargs.get("fs", self._get_fs_from_uri())
-        version = (
-            fs.checksum(self.uri) if fs.protocol != "gs" else fs.info(self.uri).get("etag", "")
-        )
-        file_exists = fs.exists(self.uri)
-        self.source_metadata = SourceMetadata(
-            date_created=self.created_at,
-            date_modified=self.modified_date,
-            version=version,
-            source_url=self.uri,
-            exists=file_exists,
-        )
-
-    @SourceConnectionError.wrap
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        fs = self._get_fs_from_uri()
-        self.update_source_metadata(fs=fs)
-        logger.info(f"using a {fs} filesystem to collect table data")
-        self._create_full_tmp_dir_path()
-
-        df = self._get_df(filesystem=fs)
-
-        logger.info(f"writing {len(df)} rows to {self.filename}")
-        df.to_csv(self.filename)
-
-    @SourceConnectionNetworkError.wrap
-    def _get_df(self, filesystem):
-        import pyarrow.parquet as pq
-
-        return pq.ParquetDataset(self.uri, filesystem=filesystem).read_pandas().to_pandas()
-
-
-@dataclass
-class DeltaTableSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    connector_config: SimpleDeltaTableConfig
-    delta_table: t.Optional["DeltaTable"] = None
-
-    def check_connection(self):
-        pass
-
-    @requires_dependencies(["deltalake"], extras="delta-table")
-    def initialize(self):
-        from deltalake import DeltaTable
-
-        self.delta_table = DeltaTable(
-            table_uri=self.connector_config.table_uri,
-            version=self.connector_config.version,
-            storage_options=self.connector_config.storage_options,
-            without_files=self.connector_config.without_files,
-        )
-        rows = self.delta_table.to_pyarrow_dataset().count_rows()
-        if not rows > 0:
-            raise ValueError(f"no data found at {self.connector_config.table_uri}")
-        logger.info(f"processing {rows} rows of data")
-
-    def get_ingest_docs(self):
-        """Batches the results into distinct docs"""
-        if not self.delta_table:
-            raise ValueError("delta table was never initialized")
-        actions = self.delta_table.get_add_actions().to_pandas()
-        mod_date_dict = {
-            row["path"]: str(row["modification_time"]) for _, row in actions.iterrows()
-        }
-        created_at = dt.fromtimestamp(self.delta_table.metadata().created_time / 1000)
-        return [
-            DeltaTableIngestDoc(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                uri=uri,
-                modified_date=mod_date_dict[os.path.basename(uri)],
-                created_at=str(created_at),
-            )
-            for uri in self.delta_table.file_uris()
-        ]
-
-
-@dataclass
-class DeltaTableWriteConfig(WriteConfig):
-    drop_empty_cols: bool = False
-    mode: t.Literal["error", "append", "overwrite", "ignore"] = "error"
-    schema_mode: t.Optional[t.Literal["merge", "overwrite"]] = None
-    engine: t.Literal["pyarrow", "rust"] = "pyarrow"
-
-
-@dataclass
-class DeltaTableDestinationConnector(BaseDestinationConnector):
-    write_config: DeltaTableWriteConfig
-    connector_config: SimpleDeltaTableConfig
-
-    @requires_dependencies(["deltalake"], extras="delta-table")
-    def initialize(self):
-        pass
-
-    def check_connection(self):
-        pass
-
-    @requires_dependencies(["deltalake"], extras="delta-table")
-    def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
-        from deltalake.writer import write_deltalake
-
-        from unstructured.ingest.utils.table import convert_to_pandas_dataframe
-
-        df = convert_to_pandas_dataframe(
-            elements_dict=elements_dict,
-            drop_empty_cols=self.write_config.drop_empty_cols,
-        )
-        logger.info(
-            f"writing {len(df)} rows to destination table "
-            f"at {self.connector_config.table_uri}\ndtypes: {df.dtypes}",
-        )
-        writer_kwargs = {
-            "table_or_uri": self.connector_config.table_uri,
-            "data": df,
-            "mode": self.write_config.mode,
-            "engine": self.write_config.engine,
-        }
-        if self.write_config.schema_mode is not None:
-            writer_kwargs["schema_mode"] = self.write_config.schema_mode
-        # NOTE: deltalake writer on Linux sometimes can finish but still trigger a SIGABRT and cause
-        # ingest to fail, even though all tasks are completed normally. Putting the writer into a
-        # process mitigates this issue by ensuring python interpreter waits properly for deltalake's
-        # rust backend to finish
-        writer = Process(
-            target=write_deltalake,
-            kwargs=writer_kwargs,
-        )
-        writer.start()
-        writer.join()
diff --git a/unstructured/ingest/connector/discord.py b/unstructured/ingest/connector/discord.py
deleted file mode 100644
index bfbfc8fbd..000000000
--- a/unstructured/ingest/connector/discord.py
+++ /dev/null
@@ -1,180 +0,0 @@
-import datetime as dt
-import typing as t
-from dataclasses import dataclass
-from pathlib import Path
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import (
-    requires_dependencies,
-)
-
-
-@dataclass
-class DiscordAccessConfig(AccessConfig):
-    token: str = enhanced_field(sensitive=True)
-
-
-@dataclass
-class SimpleDiscordConfig(BaseConnectorConfig):
-    """Connector config where channels is a comma separated list of
-    Discord channels to pull messages from.
-    """
-
-    # Discord Specific Options
-    access_config: DiscordAccessConfig
-    channels: t.List[str]
-    period: t.Optional[int] = None
-
-
-@dataclass
-class DiscordIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing!).
-    Also includes a cleanup method. When things go wrong and the cleanup
-    method is not called, the file is left behind on the filesystem to assist debugging.
-    """
-
-    connector_config: SimpleDiscordConfig
-    channel: str
-    days: t.Optional[int] = None
-    registry_name: str = "discord"
-
-    # NOTE(crag): probably doesn't matter,  but intentionally not defining tmp_download_file
-    # __post_init__ for multiprocessing simplicity (no Path objects in initially
-    # instantiated object)
-    def _tmp_download_file(self):
-        channel_file = self.channel + ".txt"
-        return Path(self.read_config.download_dir) / channel_file
-
-    @property
-    def _output_filename(self):
-        output_file = self.channel + ".json"
-        return Path(self.processor_config.output_dir) / output_file
-
-    def _create_full_tmp_dir_path(self):
-        self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True)
-
-    @SourceConnectionNetworkError.wrap
-    @requires_dependencies(dependencies=["discord"], extras="discord")
-    def _get_messages(self):
-        """Actually fetches the data from discord."""
-        import discord
-        from discord.ext import commands
-
-        messages: t.List[discord.Message] = []
-        jumpurl: t.List[str] = []
-        intents = discord.Intents.default()
-        intents.message_content = True
-        bot = commands.Bot(command_prefix=">", intents=intents)
-
-        @bot.event
-        async def on_ready():
-            try:
-                after_date = None
-                if self.days:
-                    after_date = dt.datetime.utcnow() - dt.timedelta(days=self.days)
-                channel = bot.get_channel(int(self.channel))
-                jumpurl.append(channel.jump_url)  # type: ignore
-                async for msg in channel.history(after=after_date):  # type: ignore
-                    messages.append(msg)
-                await bot.close()
-            except Exception:
-                logger.error("Error fetching messages")
-                await bot.close()
-                raise
-
-        bot.run(self.connector_config.access_config.token)
-        jump_url = None if len(jumpurl) < 1 else jumpurl[0]
-        return messages, jump_url
-
-    def update_source_metadata(self, **kwargs):
-        messages, jump_url = kwargs.get("messages_tuple", self._get_messages())
-        if messages == []:
-            self.source_metadata = SourceMetadata(
-                exists=False,
-            )
-            return
-        dates = [m.created_at for m in messages if m.created_at]
-        dates.sort()
-        self.source_metadata = SourceMetadata(
-            date_created=dates[0].isoformat(),
-            date_modified=dates[-1].isoformat(),
-            source_url=jump_url,
-            exists=True,
-        )
-
-    @SourceConnectionError.wrap
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        self._create_full_tmp_dir_path()
-
-        messages, jump_url = self._get_messages()
-        self.update_source_metadata(messages_tuple=(messages, jump_url))
-        if messages == []:
-            raise ValueError(f"Failed to retrieve messages from Discord channel {self.channel}")
-        self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True)
-        with open(self._tmp_download_file(), "w") as f:
-            for m in messages:
-                f.write(m.content + "\n")
-
-    @property
-    def filename(self):
-        """The filename of the file created from a discord channel"""
-        return self._tmp_download_file()
-
-    @property
-    def version(self) -> t.Optional[str]:
-        return None
-
-    @property
-    def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]:
-        return {
-            "channel": self.channel,
-        }
-
-
-class DiscordSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    """Objects of this class support fetching document(s) from"""
-
-    connector_config: SimpleDiscordConfig
-
-    def initialize(self):
-        pass
-
-    @requires_dependencies(dependencies=["discord"], extras="discord")
-    def check_connection(self):
-        import asyncio
-
-        import discord
-        from discord.client import Client
-
-        intents = discord.Intents.default()
-        try:
-            client = Client(intents=intents)
-            asyncio.run(client.start(token=self.connector_config.access_config.token))
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    def get_ingest_docs(self):
-        return [
-            DiscordIngestDoc(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                channel=channel,
-                days=self.connector_config.period,
-            )
-            for channel in self.connector_config.channels
-        ]
diff --git a/unstructured/ingest/connector/elasticsearch.py b/unstructured/ingest/connector/elasticsearch.py
deleted file mode 100644
index aa8ff1d9e..000000000
--- a/unstructured/ingest/connector/elasticsearch.py
+++ /dev/null
@@ -1,397 +0,0 @@
-import copy
-import hashlib
-import typing as t
-import uuid
-from dataclasses import dataclass, field
-from pathlib import Path
-
-from dataclasses_json.core import Json
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.enhanced_dataclass.core import _asdict
-from unstructured.ingest.error import DestinationConnectionError, SourceConnectionError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-    BaseIngestDocBatch,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-    WriteConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.ingest.utils.data_prep import generator_batching_wbytes
-from unstructured.staging.base import flatten_dict
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from elasticsearch import Elasticsearch
-
-
-@dataclass
-class ElasticsearchAccessConfig(AccessConfig):
-    hosts: t.Optional[t.List[str]] = None
-    username: t.Optional[str] = None
-    password: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    cloud_id: t.Optional[str] = None
-    api_key: t.Optional[str] = enhanced_field(
-        default=None, sensitive=True, overload_name="es_api_key"
-    )
-    api_key_id: t.Optional[str] = None
-    bearer_auth: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    ca_certs: t.Optional[str] = None
-    ssl_assert_fingerprint: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-
-    def to_dict(self, **kwargs) -> t.Dict[str, Json]:
-        d = super().to_dict(**kwargs)
-        # Update auth related fields to conform to what the SDK expects based on the
-        # supported methods:
-        # https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html
-        if not self.ca_certs:
-            # ES library already sets a default for this, don't want to
-            # introduce data by setting it to None
-            d.pop("ca_certs")
-        if self.password and (self.cloud_id or self.ca_certs or self.ssl_assert_fingerprint):
-            d.pop("password")
-            d["basic_auth"] = ("elastic", self.password)
-        elif not self.cloud_id and self.username and self.password:
-            d.pop("username", None)
-            d.pop("password", None)
-            d["basic_auth"] = (self.username, self.password)
-        elif self.api_key and self.api_key_id:
-            d.pop("api_key_id", None)
-            d.pop("api_key", None)
-            d["api_key"] = (self.api_key_id, self.api_key)
-        # This doesn't exist on the client init, remove:
-        d.pop("api_key_id", None)
-        return d
-
-
-@dataclass
-class SimpleElasticsearchConfig(BaseConnectorConfig):
-    """Connector config where:
-    url is the url to access the elasticsearch server,
-    index_name is the name of the index to reach to,
-    """
-
-    index_name: str
-    batch_size: int = 100
-    fields: t.List[str] = field(default_factory=list)
-    access_config: ElasticsearchAccessConfig = None
-
-
-@dataclass
-class ElasticsearchDocumentMeta:
-    """Metadata specifying:
-    name of the elasticsearch index that is being reached to,
-    and the id of document that is being reached to,
-    """
-
-    index_name: str
-    document_id: str
-
-
-@dataclass
-class ElasticsearchIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing!).
-
-    Current implementation creates a python Elasticsearch client to fetch each doc,
-    rather than creating a client for each thread.
-    """
-
-    connector_config: SimpleElasticsearchConfig
-    document_meta: ElasticsearchDocumentMeta
-    document: dict = field(default_factory=dict)
-    registry_name: str = "elasticsearch"
-
-    # TODO: remove one of filename or _tmp_download_file, using a wrapper
-    @property
-    def filename(self):
-        f = self.document_meta.document_id
-        if self.connector_config.fields:
-            f = "{}-{}".format(
-                f,
-                hashlib.sha256(",".join(self.connector_config.fields).encode()).hexdigest()[:8],
-            )
-        return (
-            Path(self.read_config.download_dir) / self.document_meta.index_name / f"{f}.txt"
-        ).resolve()
-
-    @property
-    def _output_filename(self):
-        """Create filename document id combined with a hash of the query to uniquely identify
-        the output file."""
-        # Generate SHA256 hash and take the first 8 characters
-        filename = self.document_meta.document_id
-        if self.connector_config.fields:
-            filename = "{}-{}".format(
-                filename,
-                hashlib.sha256(",".join(self.connector_config.fields).encode()).hexdigest()[:8],
-            )
-        output_file = f"{filename}.json"
-        return (
-            Path(self.processor_config.output_dir) / self.connector_config.index_name / output_file
-        )
-
-    def update_source_metadata(self, **kwargs):
-        if self.document is None:
-            self.source_metadata = SourceMetadata(
-                exists=False,
-            )
-            return
-        self.source_metadata = SourceMetadata(
-            version=self.document["_version"],
-            exists=True,
-        )
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["elasticsearch"], extras="elasticsearch")
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        pass
-
-    @property
-    def date_created(self) -> t.Optional[str]:
-        return None
-
-    @property
-    def date_modified(self) -> t.Optional[str]:
-        return None
-
-    @property
-    def source_url(self) -> t.Optional[str]:
-        return None
-
-    @property
-    def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]:
-        return {
-            "hosts": self.connector_config.access_config.hosts,
-            "index_name": self.connector_config.index_name,
-            "document_id": self.document_meta.document_id,
-        }
-
-
-@dataclass
-class ElasticsearchIngestDocBatch(BaseIngestDocBatch):
-    connector_config: SimpleElasticsearchConfig
-    ingest_docs: t.List[ElasticsearchIngestDoc] = field(default_factory=list)
-    list_of_ids: t.List[str] = field(default_factory=list)
-    registry_name: str = "elasticsearch_batch"
-
-    def __post_init__(self):
-        # Until python3.8 is deprecated, this is a limitation of dataclass inheritance
-        # to make it a required field
-        if len(self.list_of_ids) == 0:
-            raise ValueError("list_of_ids is required")
-
-    @property
-    def unique_id(self) -> str:
-        return ",".join(sorted(self.list_of_ids))
-
-    @requires_dependencies(["elasticsearch"], extras="elasticsearch")
-    def _get_docs(self):
-        from elasticsearch import Elasticsearch
-        from elasticsearch.helpers import scan
-
-        es = Elasticsearch(**self.connector_config.access_config.to_dict(apply_name_overload=False))
-        scan_query = {
-            "_source": self.connector_config.fields,
-            "version": True,
-            "query": {"ids": {"values": self.list_of_ids}},
-        }
-
-        result = scan(
-            es,
-            query=scan_query,
-            scroll="1m",
-            index=self.connector_config.index_name,
-        )
-        return list(result)
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["elasticsearch"], extras="elasticsearch")
-    def get_files(self):
-        documents = self._get_docs()
-        for doc in documents:
-            ingest_doc = ElasticsearchIngestDoc(
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                connector_config=self.connector_config,
-                document=doc,
-                document_meta=ElasticsearchDocumentMeta(
-                    self.connector_config.index_name, doc["_id"]
-                ),
-            )
-            ingest_doc.update_source_metadata()
-            doc_body = doc["_source"]
-            filename = ingest_doc.filename
-            flattened_dict = flatten_dict(dictionary=doc_body)
-            str_values = [str(value) for value in flattened_dict.values()]
-            concatenated_values = "\n".join(str_values)
-
-            filename.parent.mkdir(parents=True, exist_ok=True)
-            with open(filename, "w", encoding="utf8") as f:
-                f.write(concatenated_values)
-            self.ingest_docs.append(ingest_doc)
-
-
-@dataclass
-class ElasticsearchSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    """Fetches particular fields from all documents in a given elasticsearch cluster and index"""
-
-    connector_config: SimpleElasticsearchConfig
-    _es: t.Optional["Elasticsearch"] = field(init=False, default=None)
-
-    @property
-    def es(self):
-        from elasticsearch import Elasticsearch
-
-        if self._es is None:
-            self._es = Elasticsearch(
-                **self.connector_config.access_config.to_dict(apply_name_overload=False)
-            )
-        return self._es
-
-    def check_connection(self):
-        try:
-            self.es.perform_request("HEAD", "/", headers={"accept": "application/json"})
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    def __post_init__(self):
-        self.scan_query: dict = {"stored_fields": [], "query": {"match_all": {}}}
-
-    def initialize(self):
-        pass
-
-    @requires_dependencies(["elasticsearch"], extras="elasticsearch")
-    def _get_doc_ids(self):
-        """Fetches all document ids in an index"""
-        from elasticsearch.helpers import scan
-
-        hits = scan(
-            self.es,
-            query=self.scan_query,
-            scroll="1m",
-            index=self.connector_config.index_name,
-        )
-
-        return [hit["_id"] for hit in hits]
-
-    def get_ingest_docs(self):
-        """Fetches all documents in an index, using ids that are fetched with _get_doc_ids"""
-        ids = self._get_doc_ids()
-        id_batches = [
-            ids[
-                i
-                * self.connector_config.batch_size : (i + 1)  # noqa
-                * self.connector_config.batch_size
-            ]
-            for i in range(
-                (len(ids) + self.connector_config.batch_size - 1)
-                // self.connector_config.batch_size
-            )
-        ]
-        return [
-            ElasticsearchIngestDocBatch(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                list_of_ids=batched_ids,
-            )
-            for batched_ids in id_batches
-        ]
-
-
-@dataclass
-class ElasticsearchWriteConfig(WriteConfig):
-    batch_size_bytes: int = 15_000_000
-    num_processes: int = 1
-
-
-@dataclass
-class ElasticsearchDestinationConnector(BaseDestinationConnector):
-    write_config: ElasticsearchWriteConfig
-    connector_config: SimpleElasticsearchConfig
-    _client: t.Optional["Elasticsearch"] = field(init=False, default=None)
-
-    def to_dict(self, **kwargs):
-        """
-        The _client variable in this dataclass breaks deepcopy due to:
-        TypeError: cannot pickle '_thread.lock' object
-        When serializing, remove it, meaning client data will need to be reinitialized
-        when deserialized
-        """
-        self_cp = copy.copy(self)
-        if hasattr(self_cp, "_client"):
-            setattr(self_cp, "_client", None)
-        return _asdict(self_cp, **kwargs)
-
-    @DestinationConnectionError.wrap
-    @requires_dependencies(["elasticsearch"], extras="elasticsearch")
-    def generate_client(self) -> "Elasticsearch":
-        from elasticsearch import Elasticsearch
-
-        return Elasticsearch(
-            **self.connector_config.access_config.to_dict(apply_name_overload=False)
-        )
-
-    @property
-    def client(self):
-        if self._client is None:
-            self._client = self.generate_client()
-        return self._client
-
-    def initialize(self):
-        _ = self.client
-
-    @DestinationConnectionError.wrap
-    def check_connection(self):
-        try:
-            assert self.client.ping()
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise DestinationConnectionError(f"failed to validate connection: {e}")
-
-    @requires_dependencies(["elasticsearch"], extras="elasticsearch")
-    def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
-        logger.info(
-            f"writing document batches to destination"
-            f" index named {self.connector_config.index_name}"
-            f" at {self.connector_config.access_config.hosts}"
-            f" with batch size (in bytes) {self.write_config.batch_size_bytes}"
-            f" with {self.write_config.num_processes} (number of) processes"
-        )
-        from elasticsearch.helpers import parallel_bulk
-
-        for batch in generator_batching_wbytes(
-            elements_dict, batch_size_limit_bytes=self.write_config.batch_size_bytes
-        ):
-            for success, info in parallel_bulk(
-                self.client, batch, thread_count=self.write_config.num_processes
-            ):
-                if not success:
-                    logger.error(
-                        "upload failed for a batch in elasticsearch destination connector:", info
-                    )
-
-    def normalize_dict(self, element_dict: dict) -> dict:
-        return {
-            "_index": self.connector_config.index_name,
-            "_id": str(uuid.uuid4()),
-            "_source": {
-                "element_id": element_dict.pop("element_id", None),
-                "embeddings": element_dict.pop("embeddings", None),
-                "text": element_dict.pop("text", None),
-                "type": element_dict.pop("type", None),
-                "metadata": flatten_dict(
-                    element_dict.pop("metadata", None),
-                    separator="-",
-                ),
-            },
-        }
diff --git a/unstructured/ingest/connector/fsspec/__init__.py b/unstructured/ingest/connector/fsspec/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/unstructured/ingest/connector/fsspec/azure.py b/unstructured/ingest/connector/fsspec/azure.py
deleted file mode 100644
index 169cda6a0..000000000
--- a/unstructured/ingest/connector/fsspec/azure.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.connector.fsspec.fsspec import (
-    FsspecDestinationConnector,
-    FsspecIngestDoc,
-    FsspecSourceConnector,
-    FsspecWriteConfig,
-    SimpleFsspecConfig,
-    WriteTextConfig,
-)
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import DestinationConnectionError, SourceConnectionError
-from unstructured.ingest.interfaces import AccessConfig
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-
-@dataclass
-class AzureWriteTextConfig(WriteTextConfig):
-    overwrite: bool = False
-
-
-@dataclass
-class AzureWriteConfig(FsspecWriteConfig):
-    write_text_config: t.Optional[AzureWriteTextConfig] = None
-
-
-@dataclass
-class AzureAccessConfig(AccessConfig):
-    account_name: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    account_key: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    connection_string: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    sas_token: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-
-
-@dataclass
-class SimpleAzureBlobStorageConfig(SimpleFsspecConfig):
-    access_config: AzureAccessConfig = None
-
-
-@dataclass
-class AzureBlobStorageIngestDoc(FsspecIngestDoc):
-    connector_config: SimpleAzureBlobStorageConfig
-    registry_name: str = "azure"
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    def get_file(self):
-        super().get_file()
-
-
-@dataclass
-class AzureBlobStorageSourceConnector(FsspecSourceConnector):
-    connector_config: SimpleAzureBlobStorageConfig
-
-    def __post_init__(self):
-        self.ingest_doc_cls: t.Type[AzureBlobStorageIngestDoc] = AzureBlobStorageIngestDoc
-
-
-@dataclass
-class AzureBlobStorageDestinationConnector(FsspecDestinationConnector):
-    connector_config: SimpleAzureBlobStorageConfig
-    write_config: AzureWriteConfig
-
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    def initialize(self):
-        super().initialize()
-
-    @requires_dependencies(["adlfs"], extras="azure")
-    def check_connection(self):
-        from adlfs import AzureBlobFileSystem
-
-        try:
-            AzureBlobFileSystem(**self.connector_config.get_access_config())
-        except ValueError as connection_error:
-            logger.error(f"failed to validate connection: {connection_error}", exc_info=True)
-            raise DestinationConnectionError(f"failed to validate connection: {connection_error}")
diff --git a/unstructured/ingest/connector/fsspec/box.py b/unstructured/ingest/connector/fsspec/box.py
deleted file mode 100644
index 67a56fa69..000000000
--- a/unstructured/ingest/connector/fsspec/box.py
+++ /dev/null
@@ -1,109 +0,0 @@
-"""
-Box Connector
-Box does not make it simple to download files with an App.
-First of all, this does not work with a free Box account.
-Make sure the App service email is a collaborator for your folder (co-owner or editor)
-Make sure you have the 'write all files' application scope
-Maybe check 'Make api calls as the as-user header'
-REAUTHORIZE app after making any of the above changes
-"""
-
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.connector.fsspec.fsspec import (
-    FsspecDestinationConnector,
-    FsspecIngestDoc,
-    FsspecSourceConnector,
-    FsspecWriteConfig,
-    SimpleFsspecConfig,
-)
-from unstructured.ingest.error import DestinationConnectionError, SourceConnectionError
-from unstructured.ingest.interfaces import AccessConfig
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-
-class AccessTokenError(Exception):
-    """There is a problem with the Access Token."""
-
-
-@dataclass
-class BoxWriteConfig(FsspecWriteConfig):
-    pass
-
-
-@dataclass
-class BoxAccessConfig(AccessConfig):
-    box_app_config: t.Optional[str] = None
-
-
-@dataclass
-class SimpleBoxConfig(SimpleFsspecConfig):
-    access_config: BoxAccessConfig = None
-
-    @requires_dependencies(["boxfs"], extras="box")
-    def get_access_config(self) -> dict:
-        # Return access_kwargs with oauth. The oauth object can not be stored directly in the config
-        # because it is not serializable.
-        from boxsdk import JWTAuth
-
-        access_kwargs_with_oauth: dict[str, t.Any] = {
-            "oauth": JWTAuth.from_settings_file(
-                self.access_config.box_app_config,
-            ),
-        }
-        access_config: dict[str, t.Any] = self.access_config.to_dict()
-        access_config.pop("box_app_config", None)
-        access_kwargs_with_oauth.update(access_config)
-
-        return access_kwargs_with_oauth
-
-
-@dataclass
-class BoxIngestDoc(FsspecIngestDoc):
-    connector_config: SimpleBoxConfig
-    registry_name: str = "box"
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["boxfs", "fsspec"], extras="box")
-    def get_file(self):
-        super().get_file()
-
-
-@dataclass
-class BoxSourceConnector(FsspecSourceConnector):
-    connector_config: SimpleBoxConfig
-
-    @requires_dependencies(["boxfs"], extras="box")
-    def check_connection(self):
-        from boxfs import BoxFileSystem
-
-        try:
-            BoxFileSystem(**self.connector_config.get_access_config())
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    def __post_init__(self):
-        self.ingest_doc_cls: t.Type[BoxIngestDoc] = BoxIngestDoc
-
-
-@dataclass
-class BoxDestinationConnector(FsspecDestinationConnector):
-    connector_config: SimpleBoxConfig
-    write_config: BoxWriteConfig
-
-    @requires_dependencies(["boxfs", "fsspec"], extras="box")
-    def initialize(self):
-        super().initialize()
-
-    @requires_dependencies(["boxfs"], extras="box")
-    def check_connection(self):
-        from boxfs import BoxFileSystem
-
-        try:
-            BoxFileSystem(**self.connector_config.get_access_config())
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise DestinationConnectionError(f"failed to validate connection: {e}")
diff --git a/unstructured/ingest/connector/fsspec/dropbox.py b/unstructured/ingest/connector/fsspec/dropbox.py
deleted file mode 100644
index 23647bb6d..000000000
--- a/unstructured/ingest/connector/fsspec/dropbox.py
+++ /dev/null
@@ -1,160 +0,0 @@
-"""
-Dropbox Connector
-The Dropbox Connector presents a couple abnormal situations.
-1) They don't have an unexpiring token
-2) They require a forward slash `/` in front of the remote_file_path. This presents
-some real problems creating paths. When appending a path that begins with a
-forward slash to any path, whether using the / shorthand or joinpath, causes the
-starting path to disappear. So the `/` needs to be stripped off.
-3) To list and get files from the root directory Dropbox you need a ""," ", or " /"
-"""
-
-import re
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Type
-
-from unstructured.ingest.connector.fsspec.fsspec import (
-    FsspecDestinationConnector,
-    FsspecIngestDoc,
-    FsspecSourceConnector,
-    FsspecWriteConfig,
-    SimpleFsspecConfig,
-)
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError
-from unstructured.ingest.interfaces import AccessConfig
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-
-class MissingFolderError(Exception):
-    """There is no folder by that name. For root try `dropbox:// /`"""
-
-
-@dataclass
-class DropboxAccessConfig(AccessConfig):
-    token: str = enhanced_field(sensitive=True)
-
-
-@dataclass
-class DropboxWriteConfig(FsspecWriteConfig):
-    pass
-
-
-@dataclass
-class SimpleDropboxConfig(SimpleFsspecConfig):
-    access_config: DropboxAccessConfig = None
-
-
-@dataclass
-class DropboxIngestDoc(FsspecIngestDoc):
-    connector_config: SimpleDropboxConfig
-    registry_name: str = "dropbox"
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    def get_file(self):
-        super().get_file()
-
-    @property
-    def _output_filename(self):
-        # Dropbox requires a forward slash at the front of the folder path. This
-        # creates some complications in path joining so a custom path is created here.
-        # Dropbox uses an empty string `""`, or a space `" "`` or a `" /"` to list root
-        if self.connector_config.dir_path == " ":
-            return Path(self.processor_config.output_dir) / re.sub(
-                "^/",
-                "",
-                f"{self.remote_file_path}.json",
-            )
-        else:
-            return (
-                Path(self.processor_config.output_dir)
-                / f"{self.remote_file_path.replace(f'/{self.connector_config.dir_path}/', '')}.json"
-            )
-
-    def _tmp_download_file(self):
-        # Dropbox requires a forward slash at the front of the folder path. This
-        # creates some complications in path joining so a custom path is created here.
-        # Dropbox uses an empty string `""`, or a space `" "`` or a `" /"` to list root
-        download_dir: str = self.read_config.download_dir if self.read_config.download_dir else ""
-        if not download_dir:
-            return ""
-        if self.connector_config.dir_path == " ":
-            return Path(download_dir) / re.sub(
-                "^/",
-                "",
-                self.remote_file_path,
-            )
-        else:
-            return Path(download_dir) / self.remote_file_path.replace(
-                f"/{self.connector_config.dir_path}/",
-                "",
-            )
-
-
-@dataclass
-class DropboxSourceConnector(FsspecSourceConnector):
-    connector_config: SimpleDropboxConfig
-
-    def __post_init__(self):
-        self.ingest_doc_cls: Type[DropboxIngestDoc] = DropboxIngestDoc
-
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    def initialize(self):
-        from fsspec import AbstractFileSystem, get_filesystem_class
-
-        try:
-            self.fs: AbstractFileSystem = get_filesystem_class(self.connector_config.protocol)(
-                **self.connector_config.get_access_config(),
-            )
-            # Dropbox requires a forward slash at the front of the folder path. This
-            # creates some complications in path joining so a custom path is created here.
-            ls_output = self.fs.ls(f"/{self.connector_config.path_without_protocol}")
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-        if ls_output and len(ls_output) >= 1:
-            return
-        elif ls_output:
-            raise ValueError(
-                f"No objects found in {self.connector_config.remote_url}.",
-            )
-        else:
-            raise MissingFolderError(
-                "There is no folder by that name. For root try `dropbox:// /`",
-            )
-
-    def _list_files(self):
-        # Dropbox requires a forward slash at the front of the folder path. This
-        # creates some complications in path joining so a custom path is created here.
-        if not self.connector_config.recursive:
-            # fs.ls does not walk directories
-            # directories that are listed in cloud storage can cause problems because they are seen
-            # as 0byte files
-            return [
-                x.get("name")
-                for x in self.fs.ls(
-                    f"/{self.connector_config.path_without_protocol}",
-                    detail=True,
-                )
-                if x.get("size")
-            ]
-        else:
-            # fs.find will recursively walk directories
-            # "size" is a common key for all the cloud protocols with fs
-            return [
-                k
-                for k, v in self.fs.find(
-                    f"/{self.connector_config.path_without_protocol}",
-                    detail=True,
-                ).items()
-                if v.get("size")
-            ]
-
-
-@dataclass
-class DropboxDestinationConnector(FsspecDestinationConnector):
-    connector_config: SimpleFsspecConfig
-    write_config: DropboxWriteConfig
diff --git a/unstructured/ingest/connector/fsspec/fsspec.py b/unstructured/ingest/connector/fsspec/fsspec.py
deleted file mode 100644
index 1b60a1d87..000000000
--- a/unstructured/ingest/connector/fsspec/fsspec.py
+++ /dev/null
@@ -1,359 +0,0 @@
-import fnmatch
-import json
-import os
-import typing as t
-from abc import ABC
-from contextlib import suppress
-from dataclasses import dataclass
-from pathlib import Path, PurePath
-
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
-from unstructured.ingest.error import (
-    DestinationConnectionError,
-    SourceConnectionError,
-    SourceConnectionNetworkError,
-)
-from unstructured.ingest.interfaces import (
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    FsspecConfig,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-    WriteConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.ingest.utils.compression import (
-    TAR_FILE_EXT,
-    ZIP_FILE_EXT,
-    CompressionSourceConnectorMixin,
-)
-from unstructured.utils import (
-    requires_dependencies,
-)
-
-SUPPORTED_REMOTE_FSSPEC_PROTOCOLS = [
-    "s3",
-    "s3a",
-    "abfs",
-    "az",
-    "gs",
-    "gcs",
-    "box",
-    "dropbox",
-    "sftp",
-]
-
-
-@dataclass
-class SimpleFsspecConfig(FsspecConfig, BaseConnectorConfig):
-    pass
-
-
-@dataclass
-class FsspecIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing!).
-
-    Also includes a cleanup method. When things go wrong and the cleanup
-    method is not called, the file is left behind on the filesystem to assist debugging.
-    """
-
-    connector_config: SimpleFsspecConfig
-    remote_file_path: str
-
-    def _tmp_download_file(self):
-        download_dir = self.read_config.download_dir if self.read_config.download_dir else ""
-        return Path(download_dir) / self.remote_file_path.replace(
-            f"{self.connector_config.dir_path}/",
-            "",
-        )
-
-    @property
-    def _output_filename(self):
-        # Dynamically parse filename , can change if remote path was pointing to the single
-        # file, a directory, or nested directory
-        if self.remote_file_path == self.connector_config.path_without_protocol:
-            file = self.remote_file_path.split("/")[-1]
-            filename = f"{file}.json"
-        else:
-            path_without_protocol = (
-                self.connector_config.path_without_protocol
-                if self.connector_config.path_without_protocol.endswith("/")
-                else f"{self.connector_config.path_without_protocol}/"
-            )
-            filename = f"{self.remote_file_path.replace(path_without_protocol, '')}.json"
-        return Path(self.processor_config.output_dir) / filename
-
-    def _create_full_tmp_dir_path(self):
-        """Includes "directories" in the object path"""
-        self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True)
-
-    @SourceConnectionError.wrap
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        """Fetches the file from the current filesystem and stores it locally."""
-        from fsspec import AbstractFileSystem, get_filesystem_class
-
-        self._create_full_tmp_dir_path()
-        fs: AbstractFileSystem = get_filesystem_class(self.connector_config.protocol)(
-            **self.connector_config.get_access_config(),
-        )
-        self._get_file(fs=fs)
-        fs.get(rpath=self.remote_file_path, lpath=self._tmp_download_file().as_posix())
-        self.update_source_metadata()
-
-    @SourceConnectionNetworkError.wrap
-    def _get_file(self, fs):
-        fs.get(rpath=self.remote_file_path, lpath=self._tmp_download_file().as_posix())
-
-    @requires_dependencies(["fsspec"])
-    def update_source_metadata(self):
-        from fsspec import AbstractFileSystem, get_filesystem_class
-
-        fs: AbstractFileSystem = get_filesystem_class(self.connector_config.protocol)(
-            **self.connector_config.get_access_config(),
-        )
-
-        date_created = None
-        with suppress(NotImplementedError):
-            date_created = fs.created(self.remote_file_path).isoformat()
-
-        date_modified = None
-        with suppress(NotImplementedError):
-            date_modified = fs.modified(self.remote_file_path).isoformat()
-
-        version = (
-            fs.checksum(self.remote_file_path)
-            if self.connector_config.protocol != "gs"
-            else fs.info(self.remote_file_path).get("etag", "")
-        )
-        file_exists = fs.exists(self.remote_file_path)
-        self.source_metadata = SourceMetadata(
-            date_created=date_created,
-            date_modified=date_modified,
-            version=str(version),
-            source_url=f"{self.connector_config.protocol}://{self.remote_file_path}",
-            exists=file_exists,
-        )
-
-    @property
-    def filename(self):
-        """The filename of the file after downloading from cloud"""
-        return self._tmp_download_file()
-
-    @property
-    def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]:
-        """Returns the equivalent of ls in dict"""
-        return {
-            "protocol": self.connector_config.protocol,
-            "remote_file_path": self.remote_file_path,
-        }
-
-
-@dataclass
-class FsspecSourceConnector(
-    SourceConnectorCleanupMixin,
-    CompressionSourceConnectorMixin,
-    BaseSourceConnector,
-):
-    """Objects of this class support fetching document(s) from"""
-
-    connector_config: SimpleFsspecConfig
-
-    def check_connection(self):
-        from fsspec import get_filesystem_class
-
-        try:
-            fs = get_filesystem_class(self.connector_config.protocol)(
-                **self.connector_config.get_access_config(),
-            )
-            fs.ls(path=self.connector_config.path_without_protocol, detail=False)
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    def __post_init__(self):
-        self.ingest_doc_cls: t.Type[FsspecIngestDoc] = FsspecIngestDoc
-
-    def initialize(self):
-        from fsspec import AbstractFileSystem, get_filesystem_class
-
-        self.fs: AbstractFileSystem = get_filesystem_class(self.connector_config.protocol)(
-            **self.connector_config.get_access_config(),
-        )
-
-        """Verify that can get metadata for an object, validates connections info."""
-        ls_output = self.fs.ls(self.connector_config.path_without_protocol, detail=False)
-        if len(ls_output) < 1:
-            raise ValueError(
-                f"No objects found in {self.connector_config.remote_url}.",
-            )
-
-    def _list_files(self):
-        if not self.connector_config.recursive:
-            # fs.ls does not walk directories
-            # directories that are listed in cloud storage can cause problems
-            # because they are seen as 0 byte files
-            return [
-                x.get("name")
-                for x in self.fs.ls(self.connector_config.path_without_protocol, detail=True)
-                if x.get("size") > 0
-            ]
-        else:
-            # fs.find will recursively walk directories
-            # "size" is a common key for all the cloud protocols with fs
-            return [
-                k
-                for k, v in self.fs.find(
-                    self.connector_config.path_without_protocol,
-                    detail=True,
-                ).items()
-                if v.get("size") > 0
-            ]
-
-    def does_path_match_glob(self, path: str) -> bool:
-        if self.connector_config.file_glob is None:
-            return True
-        patterns = self.connector_config.file_glob
-        for pattern in patterns:
-            if fnmatch.filter([path], pattern):
-                return True
-        logger.debug(f"The file {path!r} is discarded as it does not match any given glob.")
-        return False
-
-    def get_ingest_docs(self):
-        raw_files = self._list_files()
-        # If glob filters provided, use to fiter on filepaths
-        files = [f for f in raw_files if self.does_path_match_glob(f)]
-        # remove compressed files
-        compressed_file_ext = TAR_FILE_EXT + ZIP_FILE_EXT
-        compressed_files = []
-        uncompressed_files = []
-        docs: t.List[BaseSingleIngestDoc] = []
-        for file in files:
-            if any(file.endswith(ext) for ext in compressed_file_ext):
-                compressed_files.append(file)
-            else:
-                uncompressed_files.append(file)
-        docs.extend(
-            [
-                self.ingest_doc_cls(
-                    read_config=self.read_config,
-                    connector_config=self.connector_config,
-                    processor_config=self.processor_config,
-                    remote_file_path=file,
-                )
-                for file in uncompressed_files
-            ],
-        )
-        if not self.connector_config.uncompress:
-            return docs
-        for compressed_file in compressed_files:
-            compressed_doc = self.ingest_doc_cls(
-                read_config=self.read_config,
-                processor_config=self.processor_config,
-                connector_config=self.connector_config,
-                remote_file_path=compressed_file,
-            )
-            try:
-                local_ingest_docs = self.process_compressed_doc(doc=compressed_doc)
-                logger.info(f"adding {len(local_ingest_docs)} from {compressed_file}")
-                docs.extend(local_ingest_docs)
-            finally:
-                compressed_doc.cleanup_file()
-        return docs
-
-
-@dataclass
-class WriteTextConfig(EnhancedDataClassJsonMixin, ABC):
-    pass
-
-
-@dataclass
-class FsspecWriteConfig(WriteConfig):
-    write_text_config: t.Optional[WriteTextConfig] = None
-
-    def get_write_text_config(self) -> t.Dict[str, t.Any]:
-        if write_text_kwargs := self.write_text_config:
-            return write_text_kwargs.to_dict()
-        return {}
-
-
-@dataclass
-class FsspecDestinationConnector(BaseDestinationConnector):
-    connector_config: SimpleFsspecConfig
-    write_config: FsspecWriteConfig
-
-    def initialize(self):
-        from fsspec import AbstractFileSystem, get_filesystem_class
-
-        self.fs: AbstractFileSystem = get_filesystem_class(self.connector_config.protocol)(
-            **self.connector_config.get_access_config(),
-        )
-        self.check_connection()
-
-    def check_connection(self):
-        from fsspec import AbstractFileSystem, get_filesystem_class
-
-        try:
-            fs: AbstractFileSystem = get_filesystem_class(self.connector_config.protocol)(
-                **self.connector_config.get_access_config(),
-            )
-
-            # e.g. Dropbox path starts with /
-            bucket_name = "/" if self.connector_config.path_without_protocol.startswith("/") else ""
-            bucket_name += self.connector_config.dir_path.split("/")[0]
-
-            logger.info(f"checking connection for destination {bucket_name}")
-            fs.ls(path=bucket_name, detail=False)
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise DestinationConnectionError(f"failed to validate connection: {e}")
-
-    def write_dict(
-        self,
-        *args,
-        elements_dict: t.List[t.Dict[str, t.Any]],
-        filename: t.Optional[str] = None,
-        indent: int = 4,
-        encoding: str = "utf-8",
-        **kwargs,
-    ) -> None:
-        from fsspec import AbstractFileSystem, get_filesystem_class
-
-        fs: AbstractFileSystem = get_filesystem_class(self.connector_config.protocol)(
-            **self.connector_config.get_access_config(),
-        )
-
-        logger.info(f"Writing content using filesystem: {type(fs).__name__}")
-
-        output_folder = self.connector_config.path_without_protocol
-        output_folder = os.path.join(output_folder)  # Make sure folder ends with file seperator
-        filename = (
-            filename.strip(os.sep) if filename else filename
-        )  # Make sure filename doesn't begin with file seperator
-        output_path = str(PurePath(output_folder, filename)) if filename else output_folder
-        full_output_path = f"{self.connector_config.protocol}://{output_path}"
-        logger.debug(f"uploading content to {full_output_path}")
-        write_text_configs = self.write_config.get_write_text_config() if self.write_config else {}
-        fs.write_text(
-            full_output_path,
-            json.dumps(elements_dict, indent=indent),
-            encoding=encoding,
-            **write_text_configs,
-        )
-
-    def get_elements_dict(self, docs: t.List[BaseSingleIngestDoc]) -> t.List[t.Dict[str, t.Any]]:
-        pass
-
-    def write(self, docs: t.List[BaseSingleIngestDoc]) -> None:
-        for doc in docs:
-            file_path = doc.base_output_filename
-            filename = file_path if file_path else None
-            with open(doc._output_filename) as json_file:
-                logger.debug(f"uploading content from {doc._output_filename}")
-                json_list = json.load(json_file)
-                self.write_dict(elements_dict=json_list, filename=filename)
diff --git a/unstructured/ingest/connector/fsspec/gcs.py b/unstructured/ingest/connector/fsspec/gcs.py
deleted file mode 100644
index db5b0de44..000000000
--- a/unstructured/ingest/connector/fsspec/gcs.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Type
-
-from unstructured.ingest.connector.fsspec.fsspec import (
-    FsspecDestinationConnector,
-    FsspecIngestDoc,
-    FsspecSourceConnector,
-    FsspecWriteConfig,
-    SimpleFsspecConfig,
-)
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError
-from unstructured.ingest.interfaces import AccessConfig
-from unstructured.ingest.utils.string_and_date_utils import json_to_dict
-from unstructured.utils import requires_dependencies
-
-
-@dataclass
-class GcsAccessConfig(AccessConfig):
-    token: t.Optional[str] = enhanced_field(
-        default=None, sensitive=True, overload_name="service_account_key"
-    )
-
-    def __post_init__(self):
-        ALLOWED_AUTH_VALUES = "google_default", "cache", "anon", "browser", "cloud"
-
-        # Case: null value
-        if not self.token:
-            return
-        # Case: one of auth constants
-        if self.token in ALLOWED_AUTH_VALUES:
-            return
-        # Case: token as json
-        if isinstance(json_to_dict(self.token), dict):
-            self.token = json_to_dict(self.token)
-            return
-        # Case: path to token
-        if Path(self.token).is_file():
-            return
-
-        raise ValueError("Invalid auth token value")
-
-
-@dataclass
-class GcsWriteConfig(FsspecWriteConfig):
-    pass
-
-
-@dataclass
-class SimpleGcsConfig(SimpleFsspecConfig):
-    access_config: GcsAccessConfig = None
-
-
-@dataclass
-class GcsIngestDoc(FsspecIngestDoc):
-    connector_config: SimpleGcsConfig
-    registry_name: str = "gcs"
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["gcsfs", "fsspec"], extras="gcs")
-    def get_file(self):
-        super().get_file()
-
-
-@dataclass
-class GcsSourceConnector(FsspecSourceConnector):
-    connector_config: SimpleGcsConfig
-
-    @requires_dependencies(["gcsfs", "fsspec"], extras="gcs")
-    def initialize(self):
-        super().initialize()
-
-    def __post_init__(self):
-        self.ingest_doc_cls: Type[GcsIngestDoc] = GcsIngestDoc
-
-
-@dataclass
-class GcsDestinationConnector(FsspecDestinationConnector):
-    connector_config: SimpleGcsConfig
-    write_config: GcsWriteConfig
diff --git a/unstructured/ingest/connector/fsspec/s3.py b/unstructured/ingest/connector/fsspec/s3.py
deleted file mode 100644
index 799276a27..000000000
--- a/unstructured/ingest/connector/fsspec/s3.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-from typing import Type
-
-from unstructured.ingest.connector.fsspec.fsspec import (
-    FsspecDestinationConnector,
-    FsspecIngestDoc,
-    FsspecSourceConnector,
-    FsspecWriteConfig,
-    SimpleFsspecConfig,
-)
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.interfaces import AccessConfig
-from unstructured.utils import requires_dependencies
-
-
-@dataclass
-class S3AccessConfig(AccessConfig):
-    anon: bool = enhanced_field(default=False, overload_name="anonymous")
-    endpoint_url: t.Optional[str] = None
-    key: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    secret: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    token: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-
-
-@dataclass
-class S3WriteConfig(FsspecWriteConfig):
-    pass
-
-
-@dataclass
-class SimpleS3Config(SimpleFsspecConfig):
-    access_config: S3AccessConfig = enhanced_field(default=None)
-
-
-@dataclass
-class S3IngestDoc(FsspecIngestDoc):
-    connector_config: SimpleS3Config
-    remote_file_path: str
-    registry_name: str = "s3"
-
-    @requires_dependencies(["s3fs", "fsspec"], extras="s3")
-    def get_file(self):
-        super().get_file()
-
-
-@dataclass
-class S3SourceConnector(FsspecSourceConnector):
-    connector_config: SimpleS3Config
-
-    def __post_init__(self):
-        self.ingest_doc_cls: Type[S3IngestDoc] = S3IngestDoc
-
-
-@dataclass
-class S3DestinationConnector(FsspecDestinationConnector):
-    connector_config: SimpleS3Config
-    write_config: S3WriteConfig
-
-    @requires_dependencies(["s3fs", "fsspec"], extras="s3")
-    def initialize(self):
-        super().initialize()
diff --git a/unstructured/ingest/connector/fsspec/sftp.py b/unstructured/ingest/connector/fsspec/sftp.py
deleted file mode 100644
index f179fc233..000000000
--- a/unstructured/ingest/connector/fsspec/sftp.py
+++ /dev/null
@@ -1,81 +0,0 @@
-import os
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Type
-from urllib.parse import urlparse
-
-from unstructured.ingest.connector.fsspec.fsspec import (
-    FsspecIngestDoc,
-    FsspecSourceConnector,
-    SimpleFsspecConfig,
-)
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError
-from unstructured.ingest.interfaces import AccessConfig
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-
-@dataclass
-class SftpAccessConfig(AccessConfig):
-    username: str
-    password: str = enhanced_field(sensitive=True)
-    host: str = ""
-    port: int = 22
-    look_for_keys: bool = False
-    allow_agent: bool = False
-
-
-@dataclass
-class SimpleSftpConfig(SimpleFsspecConfig):
-    access_config: SftpAccessConfig = None
-
-    def __post_init__(self):
-        super().__post_init__()
-
-        _, ext = os.path.splitext(self.remote_url)
-        parsed_url = urlparse(self.remote_url)
-        if ext:
-            # We only want the file_path if it has an extension
-            self.file_path = Path(self.remote_url).name
-            self.dir_path = Path(parsed_url.path).parent.as_posix().lstrip("/")
-            self.path_without_protocol = self.dir_path
-        else:
-            self.file_path = ""
-            self.dir_path = parsed_url.path.lstrip("/")
-            self.path_without_protocol = self.dir_path
-        self.access_config.host = parsed_url.hostname or self.access_config.host
-        self.access_config.port = parsed_url.port or self.access_config.port
-
-
-@dataclass
-class SftpIngestDoc(FsspecIngestDoc):
-    connector_config: SimpleSftpConfig
-    registry_name: str = "sftp"
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["paramiko", "fsspec"], extras="sftp")
-    def get_file(self):
-        super().get_file()
-
-
-@dataclass
-class SftpSourceConnector(FsspecSourceConnector):
-    connector_config: SimpleSftpConfig
-
-    @requires_dependencies(["paramiko", "fsspec"], extras="sftp")
-    def initialize(self):
-        super().initialize()
-
-    @requires_dependencies(["paramiko", "fsspec"], extras="sftp")
-    def check_connection(self):
-        from fsspec.implementations.sftp import SFTPFileSystem
-
-        try:
-            SFTPFileSystem(**self.connector_config.get_access_config())
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    def __post_init__(self):
-        self.ingest_doc_cls: Type[SftpIngestDoc] = SftpIngestDoc
diff --git a/unstructured/ingest/connector/git.py b/unstructured/ingest/connector/git.py
deleted file mode 100644
index e03b6f4e7..000000000
--- a/unstructured/ingest/connector/git.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import fnmatch
-import typing as t
-from dataclasses import dataclass, field
-from pathlib import Path
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-)
-from unstructured.ingest.logger import logger
-
-
-@dataclass
-class GitAccessConfig(AccessConfig):
-    access_token: t.Optional[str] = enhanced_field(
-        default=None, sensitive=True, overload_name="git_access_token"
-    )
-
-
-@dataclass
-class SimpleGitConfig(BaseConnectorConfig):
-    url: str
-    access_config: GitAccessConfig
-    branch: t.Optional[str] = enhanced_field(default=None, overload_name="git_branch")
-    file_glob: t.Optional[t.List[str]] = enhanced_field(default=None, overload_name="git_file_glob")
-    repo_path: str = field(init=False, repr=False)
-
-
-@dataclass
-class GitIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    connector_config: SimpleGitConfig = field(repr=False)
-    path: str
-
-    @property
-    def filename(self):
-        return (Path(self.read_config.download_dir) / self.path).resolve()
-
-    @property
-    def _output_filename(self):
-        return Path(self.processor_config.output_dir) / f"{self.path}.json"
-
-    @property
-    def record_locator(self) -> t.Dict[str, t.Any]:
-        record_locator = {
-            "repo_path": self.connector_config.repo_path,
-            "file_path": self.path,
-        }
-        if self.connector_config.branch is not None:
-            record_locator["branch"] = self.connector_config.branch
-        return record_locator
-
-    def _create_full_tmp_dir_path(self):
-        """includes directories in in the gitlab repository"""
-        self.filename.parent.mkdir(parents=True, exist_ok=True)
-
-    def update_source_metadata(self, **kwargs):
-        raise NotImplementedError()
-
-    @SourceConnectionError.wrap
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        """Fetches the "remote" doc and stores it locally on the filesystem."""
-        self._create_full_tmp_dir_path()
-        self._fetch_and_write()
-
-    def _fetch_content(self) -> None:
-        raise NotImplementedError()
-
-    def _fetch_and_write(self) -> None:
-        raise NotImplementedError()
-
-
-@dataclass
-class GitSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    connector_config: SimpleGitConfig
-
-    def initialize(self):
-        pass
-
-    def check_connection(self):
-        pass
-
-    @staticmethod
-    def is_file_type_supported(path: str) -> bool:
-        # Workaround to ensure that auto.partition isn't fed with .yaml, .py, etc. files
-        # TODO: What to do with no filenames? e.g. LICENSE, Makefile, etc.
-        supported = path.endswith(
-            (
-                ".md",
-                ".txt",
-                ".pdf",
-                ".doc",
-                ".docx",
-                ".eml",
-                ".heic",
-                ".html",
-                ".png",
-                ".jpg",
-                ".ppt",
-                ".pptx",
-                ".xml",
-            ),
-        )
-        if not supported:
-            logger.debug(
-                f"The file {path!r} is discarded as it does not contain a supported filetype.",
-            )
-        return supported
-
-    def does_path_match_glob(self, path: str) -> bool:
-        if not self.connector_config.file_glob:
-            return True
-        patterns = self.connector_config.file_glob
-        for pattern in patterns:
-            if fnmatch.filter([path], pattern):
-                return True
-        logger.debug(f"The file {path!r} is discarded as it does not match any given glob.")
-        return False
diff --git a/unstructured/ingest/connector/github.py b/unstructured/ingest/connector/github.py
deleted file mode 100644
index 2a63b8f32..000000000
--- a/unstructured/ingest/connector/github.py
+++ /dev/null
@@ -1,173 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-from datetime import datetime
-from urllib.parse import urlparse
-
-import requests
-
-from unstructured.ingest.connector.git import (
-    GitIngestDoc,
-    GitSourceConnector,
-    SimpleGitConfig,
-)
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.interfaces import SourceMetadata
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from github.Repository import Repository
-
-
-@dataclass
-class SimpleGitHubConfig(SimpleGitConfig):
-    def __post_init__(self):
-        parsed_gh_url = urlparse(self.url)
-        path_fragments = [fragment for fragment in parsed_gh_url.path.split("/") if fragment]
-
-        # If a scheme and netloc are provided, ensure they are correct
-        # Additionally, ensure that the path contains two fragments
-        if (
-            (parsed_gh_url.scheme and parsed_gh_url.scheme != "https")
-            or (parsed_gh_url.netloc and parsed_gh_url.netloc != "github.com")
-            or len(path_fragments) != 2
-        ):
-            raise ValueError(
-                'Please provide a valid URL, e.g. "https://github.com/Unstructured-IO/unstructured"'
-                ' or a repository owner/name pair, e.g. "Unstructured-IO/unstructured".',
-            )
-
-        # If there's no issues, store the core repository info
-        self.repo_path = parsed_gh_url.path
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["github"], extras="github")
-    def get_repo(self) -> "Repository":
-        from github import Github
-
-        github = Github(self.access_config.access_token)
-        return github.get_repo(self.repo_path)
-
-
-@dataclass
-class GitHubIngestDoc(GitIngestDoc):
-    connector_config: SimpleGitHubConfig
-    registry_name: str = "github"
-
-    @property
-    def date_created(self) -> t.Optional[str]:
-        return None
-
-    @requires_dependencies(["github"], extras="github")
-    def _fetch_file(self):
-        from github.GithubException import UnknownObjectException
-
-        try:
-            content_file = self.connector_config.get_repo().get_contents(self.path)
-        except UnknownObjectException:
-            logger.error(f"File doesn't exists {self.connector_config.url}/{self.path}")
-            return None
-
-        return content_file
-
-    @SourceConnectionNetworkError.wrap
-    def _fetch_content(self, content_file):
-        contents = b""
-        if (
-            not content_file.content  # type: ignore
-            and content_file.encoding == "none"  # type: ignore
-            and content_file.size  # type: ignore
-        ):
-            logger.info("File too large for the GitHub API, using direct download link instead.")
-            # NOTE: Maybe add a raise_for_status to catch connection timeout or HTTP Errors?
-            response = requests.get(content_file.download_url)  # type: ignore
-            if response.status_code != 200:
-                logger.info("Direct download link has failed... Skipping this file.")
-                return None
-            else:
-                contents = response.content
-        else:
-            contents = content_file.decoded_content  # type: ignore
-        return contents
-
-    def update_source_metadata(self, **kwargs):
-        content_file = kwargs.get("content_file", self._fetch_file())
-        if content_file is None:
-            self.source_metadata = SourceMetadata(
-                exists=False,
-            )
-            return
-
-        date_modified = datetime.strptime(
-            content_file.last_modified,
-            "%a, %d %b %Y %H:%M:%S %Z",
-        ).isoformat()
-        self.source_metadata = SourceMetadata(
-            date_modified=date_modified,
-            version=content_file.etag,
-            source_url=content_file.download_url,
-            exists=True,
-        )
-
-    def _fetch_and_write(self) -> None:
-        content_file = self._fetch_file()
-        self.update_source_metadata(content_file=content_file)
-        contents = self._fetch_content(content_file)
-        if contents is None:
-            raise ValueError(
-                f"Failed to retrieve file from repo "
-                f"{self.connector_config.url}/{self.path}. Check logs",
-            )
-        with open(self.filename, "wb") as f:
-            f.write(contents)
-
-
-@dataclass
-class GitHubSourceConnector(GitSourceConnector):
-    connector_config: SimpleGitHubConfig
-
-    @requires_dependencies(["github"], extras="github")
-    def check_connection(self):
-        from github import Consts
-        from github.GithubRetry import GithubRetry
-        from github.Requester import Requester
-
-        try:
-            requester = Requester(
-                auth=self.connector_config.access_config.access_token,
-                base_url=Consts.DEFAULT_BASE_URL,
-                timeout=Consts.DEFAULT_TIMEOUT,
-                user_agent=Consts.DEFAULT_USER_AGENT,
-                per_page=Consts.DEFAULT_PER_PAGE,
-                verify=True,
-                retry=GithubRetry(),
-                pool_size=None,
-            )
-            url_base = (
-                "/repositories/" if isinstance(self.connector_config.repo_path, int) else "/repos/"
-            )
-            url = f"{url_base}{self.connector_config.repo_path}"
-            headers, _ = requester.requestJsonAndCheck("HEAD", url)
-            logger.debug(f"headers from HEAD request: {headers}")
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    def get_ingest_docs(self):
-        repo = self.connector_config.get_repo()
-        # Load the Git tree with all files, and then create Ingest docs
-        # for all blobs, i.e. all files, ignoring directories
-        sha = self.connector_config.branch or repo.default_branch
-        git_tree = repo.get_git_tree(sha, recursive=True)
-        return [
-            GitHubIngestDoc(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                path=element.path,
-            )
-            for element in git_tree.tree
-            if element.type == "blob"
-            and self.is_file_type_supported(element.path)
-            and (not self.connector_config.file_glob or self.does_path_match_glob(element.path))
-        ]
diff --git a/unstructured/ingest/connector/gitlab.py b/unstructured/ingest/connector/gitlab.py
deleted file mode 100644
index 1d1e6c5f8..000000000
--- a/unstructured/ingest/connector/gitlab.py
+++ /dev/null
@@ -1,142 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-from urllib.parse import urlparse
-
-from unstructured.ingest.connector.git import (
-    GitIngestDoc,
-    GitSourceConnector,
-    SimpleGitConfig,
-)
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.interfaces import SourceMetadata
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from gitlab.v4.objects.projects import Project
-
-
-@dataclass
-class SimpleGitlabConfig(SimpleGitConfig):
-    base_url: str = "https://gitlab.com"
-
-    def __post_init__(self):
-        parsed_gh_url = urlparse(self.url)
-        # If a scheme or netloc are provided, use the parsed base url
-        if parsed_gh_url.scheme or parsed_gh_url.netloc:
-            self.base_url = f"{parsed_gh_url.scheme}://{parsed_gh_url.netloc}"
-        self.repo_path = parsed_gh_url.path
-        while self.repo_path.startswith("/"):
-            self.repo_path = self.repo_path[1:]
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["gitlab"], extras="gitlab")
-    def get_project(self) -> "Project":
-        from gitlab import Gitlab
-
-        gitlab = Gitlab(self.base_url, private_token=self.access_config.access_token)
-        return gitlab.projects.get(self.repo_path)
-
-
-@dataclass
-class GitLabIngestDoc(GitIngestDoc):
-    connector_config: SimpleGitlabConfig
-    registry_name: str = "gitlab"
-
-    @property
-    def date_created(self) -> t.Optional[str]:
-        return None
-
-    @property
-    def date_modified(self) -> t.Optional[str]:
-        return None
-
-    @property
-    def source_url(self) -> t.Optional[str]:
-        return None
-
-    @SourceConnectionNetworkError.wrap
-    @requires_dependencies(["gitlab"], extras="gitlab")
-    def _fetch_content(self):
-        from gitlab.exceptions import GitlabHttpError
-
-        try:
-            project = self.connector_config.get_project()
-            content_file = project.files.get(
-                self.path,
-                ref=self.connector_config.branch or project.default_branch,
-            )
-        except GitlabHttpError as e:
-            if e.response_code == 404:
-                logger.error(f"File doesn't exists {self.connector_config.url}/{self.path}")
-                return None
-            raise
-        return content_file
-
-    def update_source_metadata(self, **kwargs):
-        content_file = kwargs.get("content_file", self._fetch_content())
-        if content_file is None:
-            self.source_metadata = SourceMetadata(
-                exists=None,
-            )
-            return
-        self.source_metadata = SourceMetadata(
-            version=content_file.attributes.get("last_commit_id", ""),
-            exists=True,
-        )
-
-    def _fetch_and_write(self) -> None:
-        content_file = self._fetch_content()
-        self.update_source_metadata(content_file=content_file)
-        if content_file is None:
-            raise ValueError(
-                f"Failed to retrieve file from repo "
-                f"{self.connector_config.url}/{self.path}. Check logs.",
-            )
-        contents = content_file.decode()
-        with open(self.filename, "wb") as f:
-            f.write(contents)
-
-
-@dataclass
-class GitLabSourceConnector(GitSourceConnector):
-    connector_config: SimpleGitlabConfig
-
-    @requires_dependencies(["gitlab"], extras="gitlab")
-    def check_connection(self):
-        from gitlab import Gitlab
-        from gitlab.exceptions import GitlabError
-
-        try:
-            gitlab = Gitlab(
-                self.connector_config.base_url,
-                private_token=self.connector_config.access_config.access_token,
-            )
-            gitlab.auth()
-        except GitlabError as gitlab_error:
-            logger.error(f"failed to validate connection: {gitlab_error}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {gitlab_error}")
-
-    def get_ingest_docs(self):
-        # Load the Git tree with all files, and then create Ingest docs
-        # for all blobs, i.e. all files, ignoring directories
-        project = self.connector_config.get_project()
-        ref = self.connector_config.branch or project.default_branch
-        git_tree = project.repository_tree(
-            ref=ref,
-            recursive=True,
-            iterator=True,
-            all=True,
-        )
-        return [
-            GitLabIngestDoc(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                path=element["path"],
-            )
-            for element in git_tree
-            if element["type"] == "blob"
-            and self.is_file_type_supported(element["path"])
-            and (not self.connector_config.file_glob or self.does_path_match_glob(element["path"]))
-        ]
diff --git a/unstructured/ingest/connector/google_drive.py b/unstructured/ingest/connector/google_drive.py
deleted file mode 100644
index e3b0f931c..000000000
--- a/unstructured/ingest/connector/google_drive.py
+++ /dev/null
@@ -1,348 +0,0 @@
-import io
-import json
-import os
-import typing as t
-from dataclasses import dataclass, field
-from datetime import datetime
-from mimetypes import guess_extension
-from pathlib import Path
-
-from unstructured.file_utils.google_filetype import GOOGLE_DRIVE_EXPORT_TYPES
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseSessionHandle,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    ConfigSessionHandleMixin,
-    IngestDocCleanupMixin,
-    IngestDocSessionHandleMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-)
-from unstructured.ingest.logger import logger
-from unstructured.ingest.utils.string_and_date_utils import json_to_dict
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from googleapiclient.discovery import Resource as GoogleAPIResource
-    from googleapiclient.http import MediaIoBaseDownload
-
-FILE_FORMAT = "{id}-{name}{ext}"
-DIRECTORY_FORMAT = "{id}-{name}"
-
-
-@dataclass
-class GoogleDriveSessionHandle(BaseSessionHandle):
-    service: "GoogleAPIResource"
-
-
-@requires_dependencies(["googleapiclient"], extras="google-drive")
-def create_service_account_object(key_path: t.Union[str, dict], id=None):
-    """
-    Creates a service object for interacting with Google Drive.
-
-    Providing a drive id enforces a key validation process.
-
-    Args:
-        key_path: Path to Google Drive service account json file. (or the actual json)
-        id: ID of a file on Google Drive. File has to be either publicly accessible or accessible
-            to the service account.
-
-    Returns:
-        Service account object
-    """
-    from google.auth import default, exceptions
-    from google.oauth2 import service_account
-    from googleapiclient.discovery import build
-    from googleapiclient.errors import HttpError
-
-    # Service account key can be a dict or a file path(str)
-    # But the dict may come in as a string
-    key_path = json_to_dict(key_path)
-
-    try:
-        if isinstance(key_path, dict):
-            creds = service_account.Credentials.from_service_account_info(key_path)
-        elif isinstance(key_path, str):
-            os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
-            creds, _ = default()
-        else:
-            raise ValueError(
-                f"key path not recognized as a dictionary or a file path: "
-                f"[{type(key_path)}] {key_path}",
-            )
-        service = build("drive", "v3", credentials=creds)
-
-        if id:
-            service.files().list(
-                spaces="drive",
-                fields="files(id)",
-                pageToken=None,
-                corpora="user",
-                q=f"'{id}' in parents",
-            ).execute()
-
-    except HttpError as exc:
-        raise ValueError(f"{exc.reason}")
-    except exceptions.DefaultCredentialsError:
-        raise ValueError("The provided API key is invalid.")
-
-    return service
-
-
-@dataclass
-class GoogleDriveAccessConfig(AccessConfig):
-    service_account_key: t.Union[str, dict] = enhanced_field(sensitive=True)
-
-
-@dataclass
-class SimpleGoogleDriveConfig(ConfigSessionHandleMixin, BaseConnectorConfig):
-    """Connector config where drive_id is the id of the document to process or
-    the folder to process all documents from."""
-
-    # Google Drive Specific Options
-    drive_id: str
-    access_config: GoogleDriveAccessConfig
-    extension: t.Optional[str] = None
-    recursive: bool = False
-
-    def create_session_handle(
-        self,
-    ) -> GoogleDriveSessionHandle:
-        service = create_service_account_object(self.access_config.service_account_key)
-        return GoogleDriveSessionHandle(service=service)
-
-
-@dataclass
-class GoogleDriveIngestDoc(IngestDocSessionHandleMixin, IngestDocCleanupMixin, BaseSingleIngestDoc):
-    connector_config: SimpleGoogleDriveConfig
-    meta: t.Dict[str, str] = field(default_factory=dict)
-    registry_name: str = "google_drive"
-
-    @property
-    def filename(self):
-        return Path(self.meta.get("download_filepath")).resolve()  # type: ignore
-
-    @property
-    def _output_filename(self):
-        return Path(f"{self.meta.get('output_filepath')}.json").resolve()
-
-    @property
-    def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]:
-        return {
-            "drive_id": self.connector_config.drive_id,
-            "file_id": self.meta["id"],
-        }
-
-    @requires_dependencies(["googleapiclient"], extras="google-drive")
-    def update_source_metadata(self):
-        from googleapiclient.errors import HttpError
-
-        try:
-            file_obj = (
-                self.session_handle.service.files()
-                .get(
-                    fileId=self.meta["id"],
-                    fields="id, createdTime, modifiedTime, version, webContentLink",
-                )
-                .execute()
-            )
-        except HttpError as e:
-            if e.status_code == 404:
-                logger.error(f"File {self.meta['name']} not found")
-                self.source_metadata = SourceMetadata(
-                    exists=True,
-                )
-                return
-            raise
-
-        date_created = None
-        if dc := file_obj.get("createdTime", ""):
-            date_created = datetime.strptime(
-                dc,
-                "%Y-%m-%dT%H:%M:%S.%fZ",
-            ).isoformat()
-
-        date_modified = None
-        if dm := file_obj.get("modifiedTime", ""):
-            date_modified = datetime.strptime(
-                dm,
-                "%Y-%m-%dT%H:%M:%S.%fZ",
-            ).isoformat()
-
-        self.source_metadata = SourceMetadata(
-            date_created=date_created,
-            date_modified=date_modified,
-            version=file_obj.get("version", ""),
-            source_url=file_obj.get("webContentLink", ""),
-            exists=True,
-        )
-
-    @SourceConnectionNetworkError.wrap
-    def _run_downloader(self, downloader: "MediaIoBaseDownload") -> bool:
-        downloaded = False
-        while downloaded is False:
-            _, downloaded = downloader.next_chunk()
-        return downloaded
-
-    @requires_dependencies(["googleapiclient"], extras="google-drive")
-    @SourceConnectionError.wrap
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        from googleapiclient.http import MediaIoBaseDownload
-
-        if self.meta.get("mimeType", "").startswith("application/vnd.google-apps"):
-            export_mime = GOOGLE_DRIVE_EXPORT_TYPES.get(
-                self.meta.get("mimeType"),  # type: ignore
-            )
-            if not export_mime:
-                logger.info(
-                    f"File not supported. Name: {self.meta.get('name')} "
-                    f"ID: {self.meta.get('id')} "
-                    f"MimeType: {self.meta.get('mimeType')}",
-                )
-                return
-
-            request = self.session_handle.service.files().export_media(
-                fileId=self.meta.get("id"),
-                mimeType=export_mime,
-            )
-        else:
-            request = self.session_handle.service.files().get_media(fileId=self.meta.get("id"))
-        file = io.BytesIO()
-        downloader = MediaIoBaseDownload(file, request)
-        self.update_source_metadata()
-        downloaded = self._run_downloader(downloader=downloader)
-
-        saved = False
-        if downloaded and file:
-            dir_ = Path(self.meta["download_dir"])
-            if dir_:
-                if not dir_.is_dir():
-                    logger.debug(f"Creating directory: {self.meta.get('download_dir')}")
-
-                    if dir_:
-                        dir_.mkdir(parents=True, exist_ok=True)
-
-                with open(self.filename, "wb") as handler:
-                    handler.write(file.getbuffer())
-                    saved = True
-                    logger.debug(f"File downloaded: {self.filename}.")
-        if not saved:
-            logger.error(f"Error while downloading and saving file: {self.filename}.")
-
-    def write_result(self):
-        """Write the structured json result for this doc. result must be json serializable."""
-        if self.read_config.download_only:
-            return
-        self._output_filename.parent.mkdir(parents=True, exist_ok=True)
-        with open(self._output_filename, "w") as output_f:
-            output_f.write(json.dumps(self.isd_elems_no_filename, ensure_ascii=False, indent=2))
-        logger.info(f"Wrote {self._output_filename}")
-
-
-@dataclass
-class GoogleDriveSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    """Objects of this class support fetching documents from Google Drive"""
-
-    connector_config: SimpleGoogleDriveConfig
-
-    def _list_objects(self, drive_id, recursive=False):
-        files = []
-        service = self.connector_config.create_session_handle().service
-
-        def traverse(drive_id, download_dir, output_dir, recursive=False):
-            page_token = None
-            while True:
-                response = (
-                    service.files()
-                    .list(
-                        spaces="drive",
-                        fields="nextPageToken, files(id, name, mimeType)",
-                        pageToken=page_token,
-                        corpora="user",
-                        q=f"'{drive_id}' in parents",
-                    )
-                    .execute()
-                )
-
-                for meta in response.get("files", []):
-                    if meta.get("mimeType") == "application/vnd.google-apps.folder":
-                        dir_ = DIRECTORY_FORMAT.format(name=meta.get("name"), id=meta.get("id"))
-                        if recursive:
-                            download_sub_dir = (download_dir / dir_).resolve()
-                            output_sub_dir = (output_dir / dir_).resolve()
-                            traverse(meta.get("id"), download_sub_dir, output_sub_dir, True)
-                    else:
-                        ext = ""
-                        if not Path(meta.get("name")).suffixes:
-                            guess = guess_extension(meta.get("mimeType"))
-                            ext = guess if guess else ext
-
-                        if meta.get("mimeType", "").startswith("application/vnd.google-apps"):
-                            export_mime = GOOGLE_DRIVE_EXPORT_TYPES.get(meta.get("mimeType"))
-                            if not export_mime:
-                                logger.info(
-                                    f"File {meta.get('name')} has an "
-                                    f"unsupported MimeType {meta.get('mimeType')}",
-                                )
-                                continue
-
-                            if not ext:
-                                guess = guess_extension(export_mime)
-                                ext = guess if guess else ext
-
-                        # TODO (Habeeb): Consider filtering at the query level.
-                        if (
-                            self.connector_config.extension
-                            and self.connector_config.extension != ext
-                        ):  # noqa: SIM102
-                            logger.debug(
-                                f"File {meta.get('name')} does not match "
-                                f"the file type {self.connector_config.extension}",
-                            )
-                            continue
-
-                        name = FILE_FORMAT.format(name=meta.get("name"), id=meta.get("id"), ext=ext)
-                        meta["download_dir"] = str(download_dir)
-                        meta["download_filepath"] = (download_dir / name).resolve().as_posix()
-                        meta["output_dir"] = str(output_dir)
-                        meta["output_filepath"] = (output_dir / name).resolve().as_posix()
-                        files.append(meta)
-
-                page_token = response.get("nextPageToken", None)
-                if page_token is None:
-                    break
-
-        traverse(
-            drive_id,
-            Path(self.read_config.download_dir),
-            Path(self.processor_config.output_dir),
-            recursive,
-        )
-        return files
-
-    def initialize(self):
-        pass
-
-    def check_connection(self):
-        try:
-            self.connector_config.create_session_handle().service
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    def get_ingest_docs(self):
-        files = self._list_objects(self.connector_config.drive_id, self.connector_config.recursive)
-        return [
-            GoogleDriveIngestDoc(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                meta=file,
-            )
-            for file in files
-        ]
diff --git a/unstructured/ingest/connector/hubspot.py b/unstructured/ingest/connector/hubspot.py
deleted file mode 100644
index 3f01f4e81..000000000
--- a/unstructured/ingest/connector/hubspot.py
+++ /dev/null
@@ -1,278 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-from enum import Enum
-from functools import reduce
-from pathlib import Path
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseSessionHandle,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    ConfigSessionHandleMixin,
-    IngestDocCleanupMixin,
-    IngestDocSessionHandleMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from hubspot import HubSpot
-
-CONTENT_TAG = "content"
-
-
-class HubSpotObjectTypes(Enum):
-    CALLS = "calls"
-    COMMUNICATIONS = "communications"
-    EMAILS = "emails"
-    NOTES = "notes"
-    PRODUCTS = "products"
-    TICKETS = "tickets"
-
-
-@dataclass
-class HubSpotSessionHandle(BaseSessionHandle):
-    service: "HubSpot"
-
-
-@dataclass
-class HubSpotAccessConfig(AccessConfig):
-    api_token: str = enhanced_field(repr=False, sensitive=True)
-
-
-@dataclass
-class SimpleHubSpotConfig(ConfigSessionHandleMixin, BaseConnectorConfig):
-    access_config: HubSpotAccessConfig
-    params: t.Optional[str] = None
-    properties: t.Optional[dict] = None
-    object_types: t.Optional[t.List[str]] = None
-    custom_properties: t.Optional[t.Dict[str, t.List[str]]] = None
-
-    @requires_dependencies(["hubspot"], extras="hubspot")
-    def create_session_handle(self) -> HubSpotSessionHandle:
-        from hubspot import HubSpot
-
-        service = HubSpot(access_token=self.access_config.api_token)
-        return HubSpotSessionHandle(service=service)
-
-
-@dataclass
-class HubSpotIngestDoc(IngestDocSessionHandleMixin, IngestDocCleanupMixin, BaseSingleIngestDoc):
-    connector_config: SimpleHubSpotConfig
-    object_id: str
-    object_type: str
-    content_properties: t.List[str]
-    registry_name: str = "hubspot"
-
-    def __post_init__(self):
-        self._add_custom_properties()
-
-    @property
-    def filename(self):
-        return (
-            Path(self.read_config.download_dir)
-            / f"{self.object_type}/{self.object_id}.txt"  # type: ignore
-        ).resolve()
-
-    @property
-    def _output_filename(self):
-        return (
-            Path(self.processor_config.output_dir)
-            / f"{self.object_type}/{self.object_id}.json"  # type: ignore
-        ).resolve()
-
-    @property
-    def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]:
-        return {
-            f"{self.registry_name}_id": self.object_id,
-        }
-
-    @property
-    def version(self) -> t.Optional[str]:
-        return None
-
-    @property
-    def source_url(self) -> t.Optional[str]:
-        return None
-
-    def _add_custom_properties(self):
-        if (self.connector_config.custom_properties is not None) and (
-            (cprops := self.connector_config.custom_properties.get(self.object_type)) is not None
-        ):
-            self.content_properties += cprops
-
-    def _join_object_properties(self, obj) -> str:
-        return "\n".join(
-            [
-                obj.properties[cprop]
-                for cprop in self.content_properties
-                if (obj.properties.get(cprop) is not None)
-            ],
-        )
-
-    def _resolve_getter(self):
-        method_path = ""
-        if self.object_type in [
-            HubSpotObjectTypes.CALLS.value,
-            HubSpotObjectTypes.COMMUNICATIONS.value,
-            HubSpotObjectTypes.EMAILS.value,
-            HubSpotObjectTypes.NOTES.value,
-        ]:
-            method_path = f"crm.objects.{self.object_type}.basic_api.get_by_id"
-        if self.object_type in [
-            HubSpotObjectTypes.PRODUCTS.value,
-            HubSpotObjectTypes.TICKETS.value,
-        ]:
-            method_path = f"crm.{self.object_type}.basic_api.get_by_id"
-
-        method = reduce(getattr, method_path.split("."), self.session_handle.service)
-        return method
-
-    @requires_dependencies(["hubspot"], extras="hubspot")
-    def _fetch_obj(self, check_only=False):
-        from hubspot.crm.objects.exceptions import NotFoundException
-
-        get_by_id_method = self._resolve_getter()
-        try:
-            response = get_by_id_method(
-                self.object_id,
-                properties=([] if check_only else self.content_properties),
-            )
-        except NotFoundException as e:
-            logger.error(e)
-            return None
-        return response
-
-    def update_source_metadata(self, **kwargs) -> None:
-        obj = kwargs.get("object", self._fetch_obj(check_only=True))  # type: ignore
-        if obj is None:
-            self.source_metadata = SourceMetadata(
-                exists=False,
-            )
-            return
-        self.source_metadata = SourceMetadata(
-            date_created=obj.created_at.isoformat(),
-            date_modified=obj.updated_at.isoformat(),
-            exists=True,
-        )
-
-    @SourceConnectionError.wrap
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        obj = self._fetch_obj()
-        if obj is None:
-            raise ValueError(
-                f"Failed to retrieve object {self.registry_name}",
-                f"with ID {self.object_id}",
-            )
-        self.update_source_metadata(object=obj)
-        output = self._join_object_properties(obj)
-        self.filename.parent.mkdir(parents=True, exist_ok=True)
-        with open(self.filename, "w", encoding="utf8") as f:
-            f.write(output)
-        return
-
-
-@dataclass
-class HubSpotSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    connector_config: SimpleHubSpotConfig
-
-    def initialize(self):
-        self.hubspot = self.connector_config.create_session_handle().service
-
-    def check_connection(self):
-        return self.connector_config.create_session_handle().service
-
-    @requires_dependencies(["hubspot"], extras="hubspot")
-    def _list_objects(self, get_page_method, object_type: str, content_properties: t.List[str]):
-        try:
-            objects = get_page_method()
-        except Exception as e:
-            logger.error(e)
-            logger.error(
-                f"Failed to retrieve {object_type}, omitting processing...",
-            )
-            return []
-        return [
-            HubSpotIngestDoc(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                object_id=obj.id,
-                object_type=object_type,
-                content_properties=content_properties,
-            )
-            for obj in objects.results
-        ]
-
-    def _get_calls(self) -> t.List[HubSpotIngestDoc]:
-        return self._list_objects(
-            self.hubspot.crm.objects.calls.basic_api.get_page,
-            HubSpotObjectTypes.CALLS.value,
-            ["hs_call_title", "hs_call_body"],
-        )
-
-    def _get_communications(self) -> t.List[HubSpotIngestDoc]:
-        return self._list_objects(
-            self.hubspot.crm.objects.communications.basic_api.get_page,
-            HubSpotObjectTypes.COMMUNICATIONS.value,
-            ["hs_communication_body"],
-        )
-
-    def _get_emails(self) -> t.List[HubSpotIngestDoc]:
-        return self._list_objects(
-            self.hubspot.crm.objects.emails.basic_api.get_page,
-            HubSpotObjectTypes.EMAILS.value,
-            ["hs_email_subject", "hs_email_text"],
-        )
-
-    def _get_notes(self) -> t.List[HubSpotIngestDoc]:
-        return self._list_objects(
-            self.hubspot.crm.objects.notes.basic_api.get_page,
-            HubSpotObjectTypes.NOTES.value,
-            ["hs_note_body"],
-        )
-
-    def _get_products(self) -> t.List[HubSpotIngestDoc]:
-        return self._list_objects(
-            self.hubspot.crm.products.basic_api.get_page,
-            HubSpotObjectTypes.PRODUCTS.value,
-            ["description"],
-        )
-
-    def _get_tickets(self) -> t.List[HubSpotIngestDoc]:
-        return self._list_objects(
-            self.hubspot.crm.tickets.basic_api.get_page,
-            HubSpotObjectTypes.TICKETS.value,
-            ["subject", "content"],
-        )
-
-    def get_ingest_docs(self):
-        obj_method_resolver = {
-            HubSpotObjectTypes.CALLS.value: self._get_calls,
-            HubSpotObjectTypes.COMMUNICATIONS.value: self._get_communications,
-            HubSpotObjectTypes.EMAILS.value: self._get_emails,
-            HubSpotObjectTypes.NOTES.value: self._get_notes,
-            HubSpotObjectTypes.PRODUCTS.value: self._get_products,
-            HubSpotObjectTypes.TICKETS.value: self._get_tickets,
-        }
-
-        if self.connector_config.object_types is not None:
-            obj_method_resolver = {
-                obj_name: obj_method_resolver.get(obj_name)  # type: ignore
-                for obj_name in self.connector_config.object_types
-            }
-
-        ingest_docs: t.List[HubSpotIngestDoc] = []
-        for obj_name, obj_method in obj_method_resolver.items():
-            logger.info(f"Retrieving - {obj_name}")
-            results: t.List[HubSpotIngestDoc] = obj_method()  # type: ignore
-            ingest_docs += results  # type: ignore
-
-        return ingest_docs
diff --git a/unstructured/ingest/connector/jira.py b/unstructured/ingest/connector/jira.py
deleted file mode 100644
index d29e1f2dc..000000000
--- a/unstructured/ingest/connector/jira.py
+++ /dev/null
@@ -1,469 +0,0 @@
-import math
-import typing as t
-from collections import abc
-from dataclasses import dataclass, field
-from datetime import datetime
-from functools import cached_property
-from pathlib import Path
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseSessionHandle,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    ConfigSessionHandleMixin,
-    IngestDocCleanupMixin,
-    IngestDocSessionHandleMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from atlassian import Jira
-
-
-@dataclass
-class JiraSessionHandle(BaseSessionHandle):
-    service: "Jira"
-
-
-@requires_dependencies(["atlassian"], extras="jira")
-def create_jira_object(url, user_email, api_token):
-    """
-    Creates a jira object for interacting with Jira Cloud.
-    Args:
-        url: URL to Jira Cloud organization
-        user_email: Email for the user with the permissions
-        api_token: API Token, generated for the user
-
-    Returns:
-        Jira object
-    """
-    from atlassian import Jira
-
-    jira = Jira(
-        url,
-        username=user_email,
-        password=api_token,
-    )
-
-    response = jira.get_permissions("BROWSE_PROJECTS")
-    permitted = response["permissions"]["BROWSE_PROJECTS"]["havePermission"]
-
-    if permitted:
-        return jira
-
-    else:
-        raise ValueError(
-            """The user with the provided *user_email* and the *api_token*
-                         is not permitted to browse projects for the jira organization
-                         for the provided *url*. Try checking user_email, api_token,
-                         and the url arguments.""",
-        )
-
-
-@dataclass
-class JiraAccessConfig(AccessConfig):
-    api_token: str = enhanced_field(sensitive=True)
-
-
-@dataclass
-class SimpleJiraConfig(ConfigSessionHandleMixin, BaseConnectorConfig):
-    """Connector config where:
-    user_email is the email to authenticate into Atlassian (Jira) Cloud,
-    api_token is the api token to authenticate into Atlassian (Jira) Cloud,
-    url is the URL pointing to the Atlassian (Jira) Cloud instance,
-    list_of_projects is a list of project that is aimed to be ingested.
-
-    Check ...
-    for more info on the api_token.
-    """
-
-    user_email: str
-    access_config: JiraAccessConfig
-    url: str
-    projects: t.Optional[t.List[str]] = None
-    boards: t.Optional[t.List[str]] = None
-    issues: t.Optional[t.List[str]] = None
-
-    def create_session_handle(
-        self,
-    ) -> JiraSessionHandle:
-        service = create_jira_object(
-            url=self.url, user_email=self.user_email, api_token=self.access_config.api_token
-        )
-        return JiraSessionHandle(service=service)
-
-
-@dataclass
-class JiraFileMeta:
-    """Metadata specifying:
-    project_id: id for the jira project that the issue locates in, and
-    issue_key: key for the issue that is being reached to.
-    """
-
-    project_id: str
-    board_id: t.Optional[str]
-    issue_key: str
-    issue_id: str
-
-
-# An implementation to obtain nested-defaultdict functionality.
-# Keys have default values in a recursive manner, allowing
-# limitless templates to parse an api response object.
-def nested_object_to_field_getter(object):
-    if isinstance(object, abc.Mapping):
-        new_object = {}
-        for k, v in object.items():
-            if isinstance(v, abc.Mapping):
-                new_object[k] = FieldGetter(nested_object_to_field_getter(v))
-            else:
-                new_object[k] = v
-        return FieldGetter(new_object)
-    else:
-        return object
-
-
-class FieldGetter(dict):
-    def __getitem__(self, key):
-        value = super().__getitem__(key) if key in self else None
-        if value is None:
-            value = FieldGetter({})
-        return value
-
-
-def form_templated_string(issue, parsed_fields, c_sep="|||", r_sep="\n\n\n"):
-    """Forms a template string via parsing the fields from the API response object on the issue
-    The template string will be saved to the disk, and then will be processed by partition."""
-    return r_sep.join(
-        [
-            _get_id_fields_for_issue(issue),
-            _get_project_fields_for_issue(parsed_fields),
-            _get_dropdown_fields_for_issue(parsed_fields),
-            _get_subtasks_for_issue(parsed_fields),
-            _get_comments_for_issue(parsed_fields),
-            _get_text_fields_for_issue(parsed_fields),
-        ],
-    )
-
-
-DEFAULT_C_SEP = " " * 5
-DEFAULT_R_SEP = "\n"
-
-
-def _get_id_fields_for_issue(issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP):
-    id, key = issue["id"], issue["key"]
-    return f"IssueID_IssueKey:{id}{c_sep}{key}{r_sep}"
-
-
-def _get_project_fields_for_issue(issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP):
-    if "project" in issue:
-        return (
-            f"""ProjectID_Key:{issue["project"]["key"]}{c_sep}{issue["project"]["name"]}{r_sep}"""
-        )
-    else:
-        return ""
-
-
-def _get_dropdown_fields_for_issue(issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP):
-    return f"""
-    IssueType:{issue["issuetype"]["name"]}
-    {r_sep}
-    Status:{issue["status"]["name"]}
-    {r_sep}
-    Priority:{issue["priority"]}
-    {r_sep}
-    AssigneeID_Name:{issue["assignee"]["accountId"]}{c_sep}{issue["assignee"]["displayName"]}
-    {r_sep}
-    ReporterAdr_Name:{issue["reporter"]["emailAddress"]}{c_sep}{issue["reporter"]["displayName"]}
-    {r_sep}
-    Labels:{c_sep.join(issue["labels"])}
-    {r_sep}
-    Components:{c_sep.join([component["name"] for component in issue["components"]])}
-    {r_sep}
-    """
-
-
-def _get_subtasks_for_issue(issue):
-    return ""
-
-
-def _get_text_fields_for_issue(issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP):
-    return f"""
-    {issue["summary"]}
-    {r_sep}
-    {issue["description"]}
-    {r_sep}
-    {c_sep.join([atch["self"] for atch in issue["attachment"]])}
-    {r_sep}
-    """
-
-
-def _get_comments_for_issue(issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP):
-    return c_sep.join(
-        [_get_fields_for_comment(comment) for comment in issue["comment"]["comments"]],
-    )
-
-
-def _get_fields_for_comment(comment, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP):
-    return f"{comment['author']['displayName']}{c_sep}{comment['body']}{r_sep}"
-
-
-def scroll_wrapper(func, results_key="results"):
-    def wrapper(*args, **kwargs):
-        """Wraps a function to obtain scroll functionality.
-        Function needs to be able to accept 'start' and 'limit' arguments."""
-        if "number_of_items_to_fetch" in kwargs:
-            number_of_items_to_fetch = kwargs["number_of_items_to_fetch"]
-            del kwargs["number_of_items_to_fetch"]
-        else:
-            number_of_items_to_fetch = 100
-
-        kwargs["limit"] = min(100, number_of_items_to_fetch)
-        kwargs["start"] = kwargs.get("start", 0)
-
-        all_results = []
-        num_iterations = math.ceil(number_of_items_to_fetch / kwargs["limit"])
-
-        for _ in range(num_iterations):
-            response = func(*args, **kwargs)
-            if isinstance(response, list):
-                all_results += func(*args, **kwargs)
-            elif isinstance(response, dict):
-                if results_key not in response:
-                    raise KeyError(
-                        "Response object has no known keys to \
-                                   access the results, such as 'results' or 'values'.",
-                    )
-                all_results += func(*args, **kwargs)[results_key]
-            kwargs["start"] += kwargs["limit"]
-
-        return all_results[:number_of_items_to_fetch]
-
-    return wrapper
-
-
-@dataclass
-class JiraIngestDoc(IngestDocSessionHandleMixin, IngestDocCleanupMixin, BaseSingleIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing).
-
-    Current implementation creates a Jira connection object
-    to fetch each doc, rather than creating a it for each thread.
-    """
-
-    connector_config: SimpleJiraConfig
-    file_meta: t.Optional[JiraFileMeta] = None
-    registry_name: str = "jira"
-
-    @cached_property
-    def record_locator(self):  # Values must be JSON-serializable
-        """A dictionary with any data necessary to uniquely identify the document on
-        the source system."""
-        return {
-            "base_url": self.connector_config.url,
-            "issue_key": self.file_meta.issue_key,
-        }
-
-    @cached_property
-    @SourceConnectionNetworkError.wrap
-    def issue(self):
-        """Gets issue data"""
-        jira = self.session_handle.service
-        return jira.issue(self.file_meta.issue_key)
-
-    @cached_property
-    def parsed_fields(self):
-        return nested_object_to_field_getter(self.issue["fields"])
-
-    @property
-    def grouping_folder_name(self):
-        if self.file_meta.board_id:
-            return self.file_meta.board_id
-        else:
-            return self.file_meta.project_id
-
-    @property
-    def filename(self):
-        download_file = f"{self.file_meta.issue_id}.txt"
-
-        return (
-            Path(self.read_config.download_dir) / self.grouping_folder_name / download_file
-        ).resolve()
-
-    @property
-    def _output_filename(self):
-        """Create output file path."""
-        output_file = f"{self.file_meta.issue_id}.json"
-
-        return (
-            Path(self.processor_config.output_dir) / self.grouping_folder_name / output_file
-        ).resolve()
-
-    @property
-    def version(self) -> t.Optional[str]:
-        return None
-
-    def update_source_metadata(self, **kwargs) -> None:
-        exists = bool(self.issue)
-        if not exists:
-            self.source_metadata = SourceMetadata(
-                exists=exists,
-            )
-            return
-
-        self.source_metadata = SourceMetadata(
-            date_created=datetime.strptime(
-                self.parsed_fields["created"],
-                "%Y-%m-%dT%H:%M:%S.%f%z",
-            ).isoformat(),
-            date_modified=datetime.strptime(
-                self.parsed_fields["updated"],
-                "%Y-%m-%dT%H:%M:%S.%f%z",
-            ).isoformat(),
-            source_url=f"{self.connector_config.url}/browse/{self.file_meta.issue_key}",
-            exists=exists,
-        )
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["atlassian"], extras="jira")
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        document = form_templated_string(self.issue, self.parsed_fields)
-        self.update_source_metadata()
-        self.filename.parent.mkdir(parents=True, exist_ok=True)
-
-        with open(self.filename, "w", encoding="utf8") as f:
-            f.write(document)
-
-
-@dataclass
-class JiraSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    """Fetches issues from projects in an Atlassian (Jira) Cloud instance."""
-
-    connector_config: SimpleJiraConfig
-    _jira: t.Optional["Jira"] = field(init=False, default=None)
-
-    @property
-    def jira(self) -> "Jira":
-        if self._jira is None:
-            try:
-                self._jira = self.connector_config.create_session_handle().service
-            except Exception as e:
-                logger.error(f"failed to validate connection: {e}", exc_info=True)
-                raise SourceConnectionError(f"failed to validate connection: {e}")
-        return self._jira
-
-    @requires_dependencies(["atlassian"], extras="jira")
-    def initialize(self):
-        _ = self.jira
-
-    def check_connection(self):
-        _ = self.jira
-
-    @requires_dependencies(["atlassian"], extras="jira")
-    def _get_all_project_ids(self):
-        """Fetches ids for all projects in a Jira domain."""
-        project_ids = [project["key"] for project in self.jira.projects()]
-        return project_ids
-
-    @requires_dependencies(["atlassian"], extras="jira")
-    def _get_issues_within_one_project(
-        self,
-        project_id: str,
-    ):
-        get_issues_with_scroll = scroll_wrapper(self.jira.get_all_project_issues)
-        results = get_issues_with_scroll(project=project_id, fields=["key"])
-
-        return [(issue["key"], issue["id"], None) for issue in results]
-
-    @requires_dependencies(["atlassian"], extras="jira")
-    def _get_issue_keys_within_projects(self, project_ids: t.Optional[t.List[str]] = None):
-        if project_ids is None:
-            # for when a component list is provided, without any projects
-            if bool(self.connector_config.boards or self.connector_config.issues):
-                return []
-            # for when no components are provided. all projects will be ingested
-            else:
-                return self._get_all_project_ids()
-
-        # for when a component list is provided, including some projects
-        issue_keys_all = [self._get_issues_within_one_project(project_id=id) for id in project_ids]
-
-        issue_keys_flattened = [
-            (issue_key, issue_id, None)
-            for issue_keys_project in issue_keys_all
-            for issue_key, issue_id, board_id in issue_keys_project
-        ]
-
-        return issue_keys_flattened
-
-    def _get_issues_within_one_board(self, board_id: str):
-        get_issues_with_scroll = scroll_wrapper(
-            self.jira.get_issues_for_board,
-            results_key="issues",
-        )
-        results = get_issues_with_scroll(board_id=board_id, fields=["key"], jql=None)
-
-        return [(issue["key"], issue["id"], board_id) for issue in results]
-
-    def _get_issue_keys_within_boards(self, board_ids):
-        if board_ids is None:
-            return []
-
-        issue_keys_all = [self._get_issues_within_one_board(board_id=id) for id in board_ids]
-
-        issue_keys_flattened = [
-            (issue_key, issue_id, board_id)
-            for issue_keys_board in issue_keys_all
-            for issue_key, issue_id, board_id in issue_keys_board
-        ]
-        return issue_keys_flattened
-
-    def get_issues_info(self, issues):
-        issues_info = [self.jira.get_issue(issue, ["key", "id"]) for issue in issues]
-        return [(info["key"], info["id"], None) for info in issues_info]
-
-    def get_issue_keys_for_given_components(self):
-        issues = []
-
-        if self.connector_config.projects:
-            issues += self._get_issue_keys_within_projects(self.connector_config.projects)
-        if self.connector_config.boards:
-            issues += self._get_issue_keys_within_boards(self.connector_config.boards)
-        if self.connector_config.issues:
-            issues += self.get_issues_info(self.connector_config.issues)
-
-        return issues
-
-    def get_ingest_docs(self):
-        """Fetches all issues in a project."""
-        if bool(
-            self.connector_config.projects
-            or self.connector_config.boards
-            or self.connector_config.issues,
-        ):
-            issue_keys_and_ids = self.get_issue_keys_for_given_components()
-        else:
-            # gets all issue ids from all projects
-            issue_keys_and_ids = self._get_issue_keys_within_projects()
-
-        return [
-            JiraIngestDoc(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                file_meta=JiraFileMeta(
-                    issue_id=issue_id,
-                    issue_key=issue_key,
-                    project_id=issue_key.split("-")[0],
-                    board_id=board_id,
-                ),
-            )
-            for issue_key, issue_id, board_id in issue_keys_and_ids
-        ]
diff --git a/unstructured/ingest/connector/kafka.py b/unstructured/ingest/connector/kafka.py
deleted file mode 100644
index 4510cf3d7..000000000
--- a/unstructured/ingest/connector/kafka.py
+++ /dev/null
@@ -1,294 +0,0 @@
-import base64
-import json
-import socket
-import typing as t
-from dataclasses import dataclass
-from pathlib import Path
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import DestinationConnectionError, SourceConnectionError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-    BaseIngestDoc,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    ConfigSessionHandleMixin,
-    IngestDocCleanupMixin,
-    IngestDocSessionHandleMixin,
-    SourceConnectorCleanupMixin,
-    WriteConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.ingest.utils.data_prep import batch_generator
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from confluent_kafka import Consumer, Producer
-
-
-@dataclass
-class KafkaAccessConfig(AccessConfig):
-    kafka_api_key: t.Optional[str] = enhanced_field(sensitive=True)
-    secret: t.Optional[str] = enhanced_field(sensitive=True)
-
-
-@dataclass
-class SimpleKafkaConfig(ConfigSessionHandleMixin, BaseConnectorConfig):
-    bootstrap_server: str
-    port: str
-    topic: str
-    access_config: KafkaAccessConfig
-    confluent: t.Optional[bool] = True
-    num_messages_to_consume: t.Optional[int] = 1
-    timeout: t.Optional[float] = 1.0
-
-
-@dataclass
-class KafkaIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    """Class encapsulating fetching a message and writing processed results."""
-
-    connector_config: SimpleKafkaConfig
-    raw_content: str
-    raw_filename: str
-    registry_name: str = "kafka"
-
-    def _tmp_download_file(self):
-        topic_file = self.connector_config.topic + "-" + self.raw_filename
-        return Path(self.read_config.download_dir) / topic_file
-
-    @property
-    def version(self) -> t.Optional[str]:
-        return None
-
-    @property
-    def source_url(self) -> t.Optional[str]:
-        return None
-
-    @property
-    def filename(self):
-        """The filename of the file created"""
-        return self._tmp_download_file()
-
-    def _create_full_tmp_dir_path(self):
-        self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True)
-
-    @property
-    def _output_filename(self):
-        """Create filename document id combined with a hash of the query to uniquely identify
-        the output file."""
-        output_file = self.connector_config.topic + ".json"
-        return Path(self.processor_config.output_dir) / output_file
-
-    @SourceConnectionError.wrap
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        self._create_full_tmp_dir_path()
-
-        pdf_data = base64.b64decode(self.raw_content)
-
-        with open(self.filename, "wb") as file:
-            file.write(pdf_data)
-
-
-@dataclass
-class KafkaSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    """Source connector for Kafka.
-    Main job is to consume from a Kafka topic and create instances of
-    KakfaIngestDoc.
-    Note that messages have the format of:
-    <filename>: the name of the file (with correct file extension)
-    <content>: base64 encoded (whether was binary or not)
-    """
-
-    connector_config: SimpleKafkaConfig
-    _consumer: t.Optional["Consumer"] = None
-
-    def check_connection(self):
-        try:
-            self.kafka_consumer
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    def initialize(self):
-        topic = self.connector_config.topic
-        logger.info(f"Subscribing to topic: {topic}")
-        self.kafka_consumer.subscribe([topic])
-
-    @property
-    def kafka_consumer(self):
-        if self._consumer is None:
-            self._consumer = self.create_consumer()
-        return self._consumer
-
-    @requires_dependencies(["confluent_kafka"], extras="kafka")
-    def create_consumer(self) -> "Consumer":
-        from confluent_kafka import Consumer
-
-        is_confluent = self.connector_config.confluent
-        bootstrap = self.connector_config.bootstrap_server
-        port = self.connector_config.port
-
-        conf = {
-            "bootstrap.servers": f"{bootstrap}:{port}",
-            "client.id": socket.gethostname(),
-            "group.id": "your_group_id",
-            "enable.auto.commit": "false",
-            "auto.offset.reset": "earliest",
-            "message.max.bytes": 10485760,
-        }
-
-        if is_confluent:
-            kafka_api_key = self.connector_config.access_config.kafka_api_key
-            secret = self.connector_config.access_config.secret
-            conf["sasl.mechanism"] = "PLAIN"
-            conf["security.protocol"] = "SASL_SSL"
-            conf["sasl.username"] = kafka_api_key
-            conf["sasl.password"] = secret
-
-        consumer = Consumer(conf)
-        logger.debug(f"Kafka Consumer connected to bootstrap: {bootstrap}")
-        return consumer
-
-    @SourceConnectionError.wrap
-    def get_ingest_docs(self):
-        from confluent_kafka import KafkaError
-
-        consumer = self.kafka_consumer
-        running = True
-
-        collected = []
-        num_messages_to_consume = self.connector_config.num_messages_to_consume
-        logger.info(f"Config set for blocking on {num_messages_to_consume} messages")
-        # Consume specified number of messages
-        while running:
-            msg = consumer.poll(timeout=self.connector_config.timeout)
-            if msg is None:
-                logger.debug("No Kafka messages found")
-                continue
-            if msg.error():
-                if msg.error().code() == KafkaError._PARTITION_EOF:
-                    # End of partition event
-                    logger.error(
-                        "%% %s [%d] reached end at offset %d\n"
-                        % (msg.topic(), msg.partition(), msg.offset())
-                    )
-            else:
-                collected.append(json.loads(msg.value().decode("utf8")))
-                if len(collected) >= num_messages_to_consume:
-                    logger.debug(f"Found {len(collected)} messages, stopping")
-                    consumer.commit(asynchronous=False)
-                    break
-
-        return [
-            KafkaIngestDoc(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                raw_filename=msg["filename"],
-                raw_content=msg["content"],
-            )
-            for msg in collected
-        ]
-
-
-@dataclass
-class KafkaWriteConfig(WriteConfig):
-    batch_size: int = 4
-
-
-@dataclass
-class KafkaDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationConnector):
-    """Connector to write BaseIngestDoc types to Kafka
-    Writes messages to Kafka in the format:
-    "type"<type>
-    "text":<the partitioned text>
-    "filename":<name of the upstream file>
-    """
-
-    write_config: KafkaWriteConfig
-    connector_config: SimpleKafkaConfig
-    _producer: t.Optional["Producer"] = None
-
-    @property
-    def kafka_producer(self):
-        if self._producer is None:
-            self._producer = self.create_producer()
-        return self._producer
-
-    def initialize(self):
-        pass
-
-    @requires_dependencies(["confluent_kafka"], extras="kafka")
-    def create_producer(self) -> "Producer":
-        from confluent_kafka import Producer
-
-        is_confluent = self.connector_config.confluent
-        bootstrap = self.connector_config.bootstrap_server
-        port = self.connector_config.port
-
-        conf = {
-            "bootstrap.servers": f"{bootstrap}:{port}",
-            "client.id": socket.gethostname(),
-        }
-
-        if is_confluent:
-            api_key = self.connector_config.access_config.kafka_api_key
-            secret = self.connector_config.access_config.secret
-            conf["sasl.mechanism"] = "PLAIN"
-            conf["security.protocol"] = "SASL_SSL"
-            conf["sasl.username"] = api_key
-            conf["sasl.password"] = secret
-
-        producer = Producer(conf)
-        logger.debug(f"Connected to bootstrap: {bootstrap}")
-        return producer
-
-    def check_connection(self):
-        try:
-            self.kafka_producer
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise DestinationConnectionError(f"failed to validate connection: {e}")
-
-    @DestinationConnectionError.wrap
-    def upload_msg(self, batch) -> int:
-        logger.debug(f"Uploading batch: {batch}")
-        topic = self.connector_config.topic
-        producer = self.kafka_producer
-        uploaded = 0
-        for i in range(len(batch)):
-            filename = f'{batch[i].pop("filename")}'
-            producer.produce(topic, key=filename, value=str(batch[i]))
-            uploaded += 1
-        return uploaded
-
-    @DestinationConnectionError.wrap
-    def write_dict(self, *args, dict_list: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
-        logger.info(f"Writing {len(dict_list)} documents to Kafka")
-        num_uploaded = 0
-
-        for chunk in batch_generator(dict_list, self.write_config.batch_size):
-            num_uploaded += self.upload_msg(chunk)  # noqa: E203
-
-        producer = self.kafka_producer
-        producer.flush()
-        logger.info(f"Uploaded {num_uploaded} documents to Kafka")
-
-    def write(self, docs: t.List[BaseIngestDoc]) -> None:
-        content_list: t.List[t.Dict[str, t.Any]] = []
-        for doc in docs:
-            local_path = doc._output_filename
-            with open(local_path) as json_file:
-                dict_content = json.load(json_file)
-                for content in dict_content:
-                    content_list.append(
-                        {
-                            "type": content["type"],
-                            "text": content["text"],
-                            "filename": content["metadata"]["filename"],
-                        }
-                    )
-        self.write_dict(dict_list=content_list)
diff --git a/unstructured/ingest/connector/local.py b/unstructured/ingest/connector/local.py
deleted file mode 100644
index 417828606..000000000
--- a/unstructured/ingest/connector/local.py
+++ /dev/null
@@ -1,139 +0,0 @@
-import fnmatch
-import glob
-import os
-import typing as t
-from dataclasses import dataclass
-from datetime import datetime
-from pathlib import Path
-
-from unstructured.ingest.interfaces import (
-    BaseConnectorConfig,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    SourceMetadata,
-)
-from unstructured.ingest.logger import logger
-
-
-@dataclass
-class SimpleLocalConfig(BaseConnectorConfig):
-    # Local specific options
-    input_path: str
-    recursive: bool = False
-    file_glob: t.Optional[t.List[str]] = None
-
-    def __post_init__(self):
-        if os.path.isfile(self.input_path):
-            self.input_path_is_file = True
-        else:
-            self.input_path_is_file = False
-
-
-@dataclass
-class LocalIngestDoc(BaseSingleIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing!).
-    """
-
-    connector_config: SimpleLocalConfig
-    path: str
-    registry_name: str = "local"
-
-    @property
-    def base_filename(self) -> t.Optional[str]:
-        download_path = Path(self.connector_config.input_path).resolve()
-        full_path = Path(self.filename).resolve()
-        if download_path.is_file():
-            download_path = download_path.parent
-        relative_path = full_path.relative_to(download_path)
-        return str(relative_path)
-
-    @property
-    def filename(self):
-        """The filename of the local file to be processed"""
-        return Path(self.path)
-
-    def cleanup_file(self):
-        """Not applicable to local file system"""
-
-    def get_file(self):
-        """Not applicable to local file system"""
-
-    def update_source_metadata(self, **kwargs) -> None:
-        try:
-            out = os.lstat(self.path)
-            self._source_metadata = SourceMetadata(
-                exists=True,
-                date_created=str(datetime.fromtimestamp(out.st_ctime)),
-                date_modified=str(datetime.fromtimestamp(out.st_mtime)),
-                permissions_data=[{"mode": out.st_mode}],
-                source_url=self.path,
-            )
-        except FileNotFoundError:
-            self._source_metadata = SourceMetadata(exists=False)
-
-    @property
-    def _output_filename(self) -> Path:
-        """Returns output filename for the doc
-        If input path argument is a file itself, it returns the filename of the doc.
-        If input path argument is a folder, it returns the relative path of the doc.
-        """
-        input_path = Path(self.connector_config.input_path)
-        basename = (
-            f"{self.base_filename}.json"
-            if input_path.is_file()
-            else f"{Path(self.path).relative_to(input_path)}.json"
-        )
-        return Path(self.processor_config.output_dir) / basename
-
-
-@dataclass
-class LocalSourceConnector(BaseSourceConnector):
-    """Objects of this class support fetching document(s) from local file system"""
-
-    def check_connection(self):
-        pass
-
-    connector_config: SimpleLocalConfig
-
-    def __post_init__(self):
-        self.ingest_doc_cls: t.Type[LocalIngestDoc] = LocalIngestDoc
-
-    def cleanup(self, cur_dir=None):
-        """Not applicable to local file system"""
-
-    def initialize(self):
-        """Not applicable to local file system"""
-
-    def _list_files(self):
-        if self.connector_config.input_path_is_file:
-            return glob.glob(f"{self.connector_config.input_path}")
-        elif self.connector_config.recursive:
-            return glob.glob(
-                f"{self.connector_config.input_path}/**",
-                recursive=self.connector_config.recursive,
-            )
-        else:
-            return glob.glob(f"{self.connector_config.input_path}/*")
-
-    def does_path_match_glob(self, path: str) -> bool:
-        if self.connector_config.file_glob is None:
-            return True
-        patterns = self.connector_config.file_glob
-        for pattern in patterns:
-            if fnmatch.filter([path], pattern):
-                return True
-        logger.debug(f"The file {path!r} is discarded as it does not match any given glob.")
-        return False
-
-    def get_ingest_docs(self):
-        return [
-            self.ingest_doc_cls(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                path=file,
-            )
-            for file in self._list_files()
-            if os.path.isfile(file) and self.does_path_match_glob(file)
-        ]
diff --git a/unstructured/ingest/connector/mongodb.py b/unstructured/ingest/connector/mongodb.py
deleted file mode 100644
index ae73ecbec..000000000
--- a/unstructured/ingest/connector/mongodb.py
+++ /dev/null
@@ -1,284 +0,0 @@
-import copy
-import typing as t
-from dataclasses import dataclass, field
-from pathlib import Path
-
-from unstructured.__version__ import __version__ as unstructured_version
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.enhanced_dataclass.core import _asdict
-from unstructured.ingest.error import DestinationConnectionError, SourceConnectionError, WriteError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-    BaseIngestDocBatch,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-)
-from unstructured.ingest.logger import logger
-from unstructured.staging.base import flatten_dict
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from pymongo import MongoClient
-
-
-SERVER_API_VERSION = "1"
-
-
-def parse_userinfo(userinfo: str) -> t.Tuple[str, str]:
-    user, _, passwd = userinfo.partition(":")
-    return user, passwd
-
-
-@dataclass
-class MongoDBAccessConfig(AccessConfig):
-    uri: t.Optional[str] = enhanced_field(sensitive=True, default=None)
-
-
-@dataclass
-class SimpleMongoDBConfig(BaseConnectorConfig):
-    access_config: MongoDBAccessConfig
-    host: t.Optional[str] = None
-    database: t.Optional[str] = None
-    collection: t.Optional[str] = None
-    port: int = 27017
-    batch_size: int = 100
-
-    @requires_dependencies(["pymongo"], extras="mongodb")
-    def generate_client(self) -> "MongoClient":
-        from pymongo import MongoClient
-        from pymongo.driver_info import DriverInfo
-        from pymongo.server_api import ServerApi
-
-        if self.access_config.uri:
-            return MongoClient(
-                self.access_config.uri,
-                server_api=ServerApi(version=SERVER_API_VERSION),
-                driver=DriverInfo(name="unstructured", version=unstructured_version),
-            )
-        else:
-            return MongoClient(
-                host=self.host,
-                port=self.port,
-                server_api=ServerApi(version=SERVER_API_VERSION),
-            )
-
-    def get_collection(self, client):
-        database = client[self.database]
-        return database.get_collection(name=self.collection)
-
-
-@dataclass
-class MongoDBDocumentMeta:
-    collection: str
-    document_id: str
-    date_created: str
-
-
-@dataclass
-class MongoDBIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    connector_config: SimpleMongoDBConfig
-    document_meta: MongoDBDocumentMeta
-    document: dict = field(default_factory=dict)
-    registry_name: str = "mongodb"
-
-    @property
-    def filename(self):
-        return (
-            Path(self.read_config.download_dir)
-            / self.connector_config.collection
-            / f"{self.document_meta.document_id}.txt"
-        ).resolve()
-
-    @property
-    def _output_filename(self):
-        return (
-            Path(self.processor_config.output_dir)
-            / self.connector_config.collection
-            / f"{self.document_meta.document_id}.json"
-        )
-
-    def update_source_metadata(self, **kwargs):
-        if self.document is None:
-            self.source_metadata = SourceMetadata(
-                exists=False,
-            )
-            return
-        self.source_metadata = SourceMetadata(
-            date_created=self.document_meta.date_created,
-            exists=True,
-        )
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["pymongo"], extras="mongodb")
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        pass
-
-    @property
-    def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]:
-        return {
-            "host": self.connector_config.host,
-            "collection": self.connector_config.collection,
-            "document_id": self.document_meta.document_id,
-        }
-
-
-@dataclass
-class MongoDBIngestDocBatch(BaseIngestDocBatch):
-    connector_config: SimpleMongoDBConfig
-    ingest_docs: t.List[MongoDBIngestDoc] = field(default_factory=list)
-    list_of_ids: t.List[str] = field(default_factory=list)
-    registry_name: str = "mongodb_batch"
-
-    @property
-    def unique_id(self) -> str:
-        return ",".join(sorted(self.list_of_ids))
-
-    @requires_dependencies(["pymongo"], extras="mongodb")
-    def _get_docs(self) -> t.List[dict]:
-        """Fetches all documents in a collection."""
-        from bson.objectid import ObjectId
-
-        # Note for future. Maybe this could use other client
-        client = self.connector_config.generate_client()
-        collection = self.connector_config.get_collection(client)
-        # MondoDB expects a list of ObjectIds
-        list_of_object_ids = []
-        for x in self.list_of_ids:
-            list_of_object_ids.append(ObjectId(x))
-        return list(collection.find({"_id": {"$in": list_of_object_ids}}))
-
-    def get_files(self):
-        documents = self._get_docs()
-        for doc in documents:
-            ingest_doc = MongoDBIngestDoc(
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                connector_config=self.connector_config,
-                document_meta=MongoDBDocumentMeta(
-                    collection=self.connector_config.collection,
-                    document_id=str(doc.get("_id")),
-                    date_created=doc.get("_id").generation_time.isoformat(),
-                ),
-                document=doc,
-            )
-            ingest_doc.update_source_metadata()
-            del doc["_id"]
-            filename = ingest_doc.filename
-            flattened_dict = flatten_dict(dictionary=doc)
-            str_values = [str(value) for value in flattened_dict.values()]
-            concatenated_values = "\n".join(str_values)
-
-            filename.parent.mkdir(parents=True, exist_ok=True)
-            with open(filename, "w", encoding="utf8") as f:
-                f.write(concatenated_values)
-
-            self.ingest_docs.append(ingest_doc)
-
-
-@dataclass
-class MongoDBSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    connector_config: SimpleMongoDBConfig
-    _client: t.Optional["MongoClient"] = field(init=False, default=None)
-
-    @property
-    def client(self) -> "MongoClient":
-        if self._client is None:
-            self._client = self.connector_config.generate_client()
-        return self._client
-
-    def check_connection(self):
-        try:
-            self.client.admin.command("ping")
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise DestinationConnectionError(f"failed to validate connection: {e}")
-
-    def initialize(self):
-        _ = self.client
-
-    @requires_dependencies(["pymongo"], extras="mongodb")
-    def _get_doc_ids(self) -> t.List[str]:
-        """Fetches all document ids in a collection."""
-        collection = self.connector_config.get_collection(self.client)
-        return [str(x) for x in collection.distinct("_id")]
-
-    def get_ingest_docs(self):
-        """Fetches all documents in an index, using ids that are fetched with _get_doc_ids"""
-        ids = self._get_doc_ids()
-        id_batches = [
-            ids[
-                i
-                * self.connector_config.batch_size : (i + 1)  # noqa
-                * self.connector_config.batch_size
-            ]
-            for i in range(
-                (len(ids) + self.connector_config.batch_size - 1)
-                // self.connector_config.batch_size
-            )
-        ]
-
-        return [
-            MongoDBIngestDocBatch(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                list_of_ids=batched_ids,
-            )
-            for batched_ids in id_batches
-        ]
-
-
-@dataclass
-class MongoDBDestinationConnector(BaseDestinationConnector):
-    connector_config: SimpleMongoDBConfig
-    _client: t.Optional["MongoClient"] = field(init=False, default=None)
-
-    def to_dict(self, **kwargs):
-        """
-        The _client variable in this dataclass breaks deepcopy due to:
-        TypeError: cannot pickle '_thread.lock' object
-        When serializing, remove it, meaning client data will need to be reinitialized
-        when deserialized
-        """
-        self_cp = copy.copy(self)
-        if hasattr(self_cp, "_client"):
-            setattr(self_cp, "_client", None)
-        return _asdict(self_cp, **kwargs)
-
-    @property
-    def client(self) -> "MongoClient":
-        if self._client is None:
-            self._client = self.connector_config.generate_client()
-        return self._client
-
-    @requires_dependencies(["pymongo"], extras="mongodb")
-    def check_connection(self):
-        try:
-            self.client.admin.command("ping")
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise DestinationConnectionError(f"failed to validate connection: {e}")
-
-    def initialize(self):
-        _ = self.client
-
-    @requires_dependencies(["pymongo"], extras="mongodb")
-    def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
-        logger.info(
-            f"writing {len(elements_dict)} documents to destination "
-            f"database {self.connector_config.database}, "
-            f"at collection {self.connector_config.collection}",
-        )
-
-        collection = self.connector_config.get_collection(self.client)
-        try:
-            collection.insert_many(elements_dict)
-        except Exception as e:
-            logger.error(f"failed to write records: {e}", exc_info=True)
-            raise WriteError(f"failed to write records: {e}")
diff --git a/unstructured/ingest/connector/notion/__init__.py b/unstructured/ingest/connector/notion/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/unstructured/ingest/connector/notion/client.py b/unstructured/ingest/connector/notion/client.py
deleted file mode 100644
index dfb9e8e48..000000000
--- a/unstructured/ingest/connector/notion/client.py
+++ /dev/null
@@ -1,233 +0,0 @@
-from typing import Any, Generator, List, Optional, Tuple
-
-import backoff
-import httpx
-import notion_client.errors
-from notion_client import Client as NotionClient
-from notion_client.api_endpoints import BlocksChildrenEndpoint as NotionBlocksChildrenEndpoint
-from notion_client.api_endpoints import BlocksEndpoint as NotionBlocksEndpoint
-from notion_client.api_endpoints import DatabasesEndpoint as NotionDatabasesEndpoint
-from notion_client.api_endpoints import Endpoint
-from notion_client.api_endpoints import PagesEndpoint as NotionPagesEndpoint
-from notion_client.errors import RequestTimeoutError
-
-from unstructured.ingest.connector.notion.types.block import Block
-from unstructured.ingest.connector.notion.types.database import Database
-from unstructured.ingest.connector.notion.types.database_properties import (
-    map_cells,
-)
-from unstructured.ingest.connector.notion.types.page import Page
-from unstructured.ingest.ingest_backoff import RetryHandler
-from unstructured.ingest.interfaces import RetryStrategyConfig
-
-retryable_exceptions = (
-    httpx.TimeoutException,
-    httpx.HTTPStatusError,
-    notion_client.errors.HTTPResponseError,
-)
-
-
-def get_retry_handler(endpoint: Endpoint) -> Optional[RetryHandler]:
-    if retry_strategy_config := getattr(endpoint, "retry_strategy_config"):
-        return RetryHandler(
-            backoff.expo,
-            retryable_exceptions,
-            max_time=retry_strategy_config.max_retry_time,
-            max_tries=retry_strategy_config.max_retries,
-            logger=endpoint.parent.logger,
-            start_log_level=endpoint.parent.logger.level,
-            backoff_log_level=endpoint.parent.logger.level,
-        )
-    return None
-
-
-class BlocksChildrenEndpoint(NotionBlocksChildrenEndpoint):
-    def __init__(
-        self,
-        *args,
-        retry_strategy_config: Optional[RetryStrategyConfig] = None,
-        **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-        self.retry_strategy_config = retry_strategy_config
-
-    @property
-    def retry_handler(self) -> Optional[RetryHandler]:
-        return get_retry_handler(self)
-
-    def list(self, block_id: str, **kwargs: Any) -> Tuple[List[Block], dict]:
-        resp: dict = (
-            self.retry_handler(super().list, block_id=block_id, **kwargs)
-            if self.retry_handler
-            else super().list(block_id=block_id, **kwargs)
-        )  # type: ignore
-        child_blocks = [Block.from_dict(data=b) for b in resp.pop("results", [])]
-        return child_blocks, resp
-
-    def iterate_list(
-        self,
-        block_id: str,
-        **kwargs: Any,
-    ) -> Generator[List[Block], None, None]:
-        while True:
-            response: dict = (
-                self.retry_handler(super().list, block_id=block_id, **kwargs)
-                if self.retry_handler
-                else super().list(block_id=block_id, **kwargs)
-            )  # type: ignore
-            child_blocks = [Block.from_dict(data=b) for b in response.pop("results", [])]
-            yield child_blocks
-
-            next_cursor = response.get("next_cursor")
-            if not response.get("has_more") or not next_cursor:
-                return
-
-
-class DatabasesEndpoint(NotionDatabasesEndpoint):
-    def __init__(
-        self,
-        *args,
-        retry_strategy_config: Optional[RetryStrategyConfig] = None,
-        **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-        self.retry_strategy_config = retry_strategy_config
-
-    @property
-    def retry_handler(self) -> Optional[RetryHandler]:
-        return get_retry_handler(self)
-
-    def retrieve(self, database_id: str, **kwargs: Any) -> Database:
-        resp: dict = (
-            self.retry_handler(super().retrieve, database_id=database_id, **kwargs)
-            if (self.retry_handler)
-            else (super().retrieve(database_id=database_id, **kwargs))
-        )  # type: ignore
-        return Database.from_dict(data=resp)
-
-    def retrieve_status(self, database_id: str, **kwargs) -> int:
-        request = self.parent._build_request(
-            method="HEAD",
-            path=f"databases/{database_id}",
-            auth=kwargs.get("auth"),
-        )
-        try:
-            response: httpx.Response = (
-                self.retry_handler(self.parent.client.send, request)
-                if (self.retry_handler)
-                else (self.parent.client.send(request))
-            )  # type: ignore
-            return response.status_code
-        except httpx.TimeoutException:
-            raise RequestTimeoutError()
-
-    def query(self, database_id: str, **kwargs: Any) -> Tuple[List[Page], dict]:
-        """Get a list of [Pages](https://developers.notion.com/reference/page) contained in the database.
-
-        *[🔗 Endpoint documentation](https://developers.notion.com/reference/post-database-query)*
-        """  # noqa: E501
-        resp: dict = (
-            self.retry_handler(super().query, database_id=database_id, **kwargs)
-            if (self.retry_handler)
-            else (super().query(database_id=database_id, **kwargs))
-        )  # type: ignore
-        pages = [Page.from_dict(data=p) for p in resp.pop("results")]
-        for p in pages:
-            p.properties = map_cells(p.properties)
-        return pages, resp
-
-    def iterate_query(self, database_id: str, **kwargs: Any) -> Generator[List[Page], None, None]:
-        while True:
-            response: dict = (
-                self.retry_handler(super().query, database_id=database_id, **kwargs)
-                if (self.retry_handler)
-                else (super().query(database_id=database_id, **kwargs))
-            )  # type: ignore
-            pages = [Page.from_dict(data=p) for p in response.pop("results", [])]
-            for p in pages:
-                p.properties = map_cells(p.properties)
-            yield pages
-
-            next_cursor = response.get("next_cursor")
-            if not response.get("has_more") or not next_cursor:
-                return
-
-
-class BlocksEndpoint(NotionBlocksEndpoint):
-    def __init__(
-        self,
-        *args: Any,
-        retry_strategy_config: Optional[RetryStrategyConfig] = None,
-        **kwargs: Any,
-    ) -> None:
-        super().__init__(*args, **kwargs)
-        self.retry_strategy_config = retry_strategy_config
-        self.children = BlocksChildrenEndpoint(
-            retry_strategy_config=retry_strategy_config,
-            *args,
-            **kwargs,
-        )
-
-    @property
-    def retry_handler(self) -> Optional[RetryHandler]:
-        return get_retry_handler(self)
-
-    def retrieve(self, block_id: str, **kwargs: Any) -> Block:
-        resp: dict = (
-            self.retry_handler(super().retrieve, block_id=block_id, **kwargs)
-            if (self.retry_handler)
-            else (super().retrieve(block_id=block_id, **kwargs))
-        )  # type: ignore
-        return Block.from_dict(data=resp)
-
-
-class PagesEndpoint(NotionPagesEndpoint):
-    def __init__(
-        self,
-        *args,
-        retry_strategy_config: Optional[RetryStrategyConfig] = None,
-        **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-        self.retry_strategy_config = retry_strategy_config
-
-    @property
-    def retry_handler(self) -> Optional[RetryHandler]:
-        return get_retry_handler(self)
-
-    def retrieve(self, page_id: str, **kwargs: Any) -> Page:
-        resp: dict = (
-            self.retry_handler(super().retrieve, page_id=page_id, **kwargs)
-            if (self.retry_handler)
-            else (super().retrieve(page_id=page_id, **kwargs))
-        )  # type: ignore
-        return Page.from_dict(data=resp)
-
-    def retrieve_status(self, page_id: str, **kwargs) -> int:
-        request = self.parent._build_request(
-            method="HEAD",
-            path=f"pages/{page_id}",
-            auth=kwargs.get("auth"),
-        )
-        try:
-            response: httpx.Response = (
-                self.retry_handler(self.parent.client.send, request)
-                if (self.retry_handler)
-                else (self.parent.client.send(request))
-            )  # type: ignore
-            return response.status_code
-        except httpx.TimeoutException:
-            raise RequestTimeoutError()
-
-
-class Client(NotionClient):
-    def __init__(
-        self,
-        *args: Any,
-        retry_strategy_config: Optional[RetryStrategyConfig] = None,
-        **kwargs: Any,
-    ) -> None:
-        super().__init__(*args, **kwargs)
-        self.blocks = BlocksEndpoint(retry_strategy_config=retry_strategy_config, parent=self)
-        self.pages = PagesEndpoint(retry_strategy_config=retry_strategy_config, parent=self)
-        self.databases = DatabasesEndpoint(retry_strategy_config=retry_strategy_config, parent=self)
diff --git a/unstructured/ingest/connector/notion/connector.py b/unstructured/ingest/connector/notion/connector.py
deleted file mode 100644
index c9588cc47..000000000
--- a/unstructured/ingest/connector/notion/connector.py
+++ /dev/null
@@ -1,468 +0,0 @@
-import typing as t
-from dataclasses import dataclass, field
-from pathlib import Path
-from uuid import UUID
-
-import httpx
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    RetryStrategyConfig,
-    SourceConnectorCleanupMixin,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import (
-    requires_dependencies,
-)
-
-NOTION_API_VERSION = "2022-06-28"
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.notion.client import Client as NotionClient
-
-
-@dataclass
-class NotionAccessConfig(AccessConfig):
-    notion_api_key: str = enhanced_field(sensitive=True)
-
-
-@dataclass
-class SimpleNotionConfig(BaseConnectorConfig):
-    """Connector config to process all messages by channel id's."""
-
-    access_config: NotionAccessConfig
-    page_ids: t.Optional[t.List[str]] = None
-    database_ids: t.Optional[t.List[str]] = None
-    recursive: bool = False
-
-    def __post_init__(self):
-        if self.page_ids:
-            self.page_ids = [str(UUID(p.strip())) for p in self.page_ids]
-
-        if self.database_ids:
-            self.database_ids = [str(UUID(d.strip())) for d in self.database_ids]
-
-
-@dataclass
-class NotionPageIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing!).
-
-    Also includes a cleanup method. When things go wrong and the cleanup
-    method is not called, the file is left behind on the filesystem to assist debugging.
-    """
-
-    page_id: str
-    connector_config: SimpleNotionConfig
-    registry_name: str = "notion_page"
-    retry_strategy_config: t.Optional[RetryStrategyConfig] = None
-
-    def _tmp_download_file(self):
-        page_file = self.page_id + ".html"
-        return Path(self.read_config.download_dir) / page_file
-
-    @property
-    def _output_filename(self):
-        page_file = self.page_id + ".json"
-        return Path(self.processor_config.output_dir) / page_file
-
-    def _create_full_tmp_dir_path(self):
-        self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True)
-
-    @requires_dependencies(dependencies=["notion_client"], extras="notion")
-    def get_client(self):
-        from unstructured.ingest.connector.notion.client import Client as NotionClient
-
-        # Pin the version of the api to avoid schema changes
-        return NotionClient(
-            notion_version=NOTION_API_VERSION,
-            auth=self.connector_config.access_config.notion_api_key,
-            logger=logger,
-            log_level=logger.level,
-            retry_strategy_config=self.retry_strategy_config,
-        )
-
-    @BaseSingleIngestDoc.skip_if_file_exists
-    @requires_dependencies(dependencies=["notion_client"], extras="notion")
-    def get_file(self):
-        from notion_client import APIErrorCode, APIResponseError
-
-        from unstructured.ingest.connector.notion.helpers import extract_page_html
-
-        self._create_full_tmp_dir_path()
-
-        client = self.get_client()
-
-        try:
-            text_extraction = extract_page_html(
-                client=client,
-                page_id=self.page_id,
-                logger=logger,
-            )
-            self.check_exists = True
-            self.file_exists = True
-            if html := text_extraction.html:
-                with open(self._tmp_download_file(), "w") as page_file:
-                    page_file.write(html.render(pretty=True))
-
-        except APIResponseError as error:
-            if error.code == APIErrorCode.ObjectNotFound:
-                self.check_exists = True
-                self.file_exists = False
-            else:
-                logger.error(f"Error: {error}")
-
-    @requires_dependencies(dependencies=["notion_client"], extras="notion")
-    def get_file_metadata(self):
-        from notion_client import APIErrorCode, APIResponseError
-
-        client = self.get_client()
-
-        # The Notion block endpoint gives more hierarchical information (parent,child relationships)
-        # than the pages endpoint so choosing to use that one to get metadata about the page
-        try:
-            self.file_metadata = client.pages.retrieve(page_id=self.page_id)  # type: ignore
-            self.check_exists = True
-            self.file_exists = True
-        except APIResponseError as error:
-            if error.code == APIErrorCode.ObjectNotFound:
-                self.check_exists = True
-                self.file_exists = False
-            else:
-                logger.error(f"Error: {error}")
-
-    @property
-    def date_created(self) -> t.Optional[str]:
-        """The date the document was created on the source system."""
-        if not hasattr(self, "file_metadata") or not self.file_metadata:
-            self.get_file_metadata()
-
-        return self.file_metadata.created_time if self.file_metadata else None
-
-    @property
-    def date_modified(self) -> t.Optional[str]:
-        """The date the document was last modified on the source system."""
-        if not hasattr(self, "file_metadata") or not self.file_metadata:
-            self.get_file_metadata()
-
-        return self.file_metadata.last_edited_time if self.file_metadata else None
-
-    @property
-    def exists(self) -> t.Optional[bool]:
-        """Whether the document exists on the remote source."""
-        if self.check_exists:
-            return self.file_exists
-
-        self.get_file_metadata()
-
-        return self.file_exists
-
-    @property
-    def filename(self):
-        """The filename of the file created from a notion page"""
-        return self._tmp_download_file()
-
-
-@dataclass
-class NotionDatabaseIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing!).
-
-    Also includes a cleanup method. When things go wrong and the cleanup
-    method is not called, the file is left behind on the filesystem to assist debugging.
-    """
-
-    database_id: str
-    connector_config: SimpleNotionConfig
-    retry_strategy_config: t.Optional[RetryStrategyConfig] = None
-    registry_name: str = "notion_database"
-
-    def _tmp_download_file(self):
-        page_file = self.database_id + ".html"
-        return Path(self.read_config.download_dir) / page_file
-
-    @property
-    def _output_filename(self):
-        page_file = self.database_id + ".json"
-        return Path(self.processor_config.output_dir) / page_file
-
-    def _create_full_tmp_dir_path(self):
-        self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True)
-
-    @requires_dependencies(dependencies=["notion_client"], extras="notion")
-    def get_client(self):
-        from unstructured.ingest.connector.notion.client import Client as NotionClient
-
-        # Pin the version of the api to avoid schema changes
-        return NotionClient(
-            notion_version=NOTION_API_VERSION,
-            auth=self.connector_config.access_config.notion_api_key,
-            logger=logger,
-            log_level=logger.level,
-            retry_strategy_config=self.retry_strategy_config,
-        )
-
-    @BaseSingleIngestDoc.skip_if_file_exists
-    @requires_dependencies(dependencies=["notion_client"], extras="notion")
-    def get_file(self):
-        from notion_client import APIErrorCode, APIResponseError
-
-        from unstructured.ingest.connector.notion.helpers import extract_database_html
-
-        self._create_full_tmp_dir_path()
-
-        client = self.get_client()
-
-        try:
-            text_extraction = extract_database_html(
-                client=client,
-                database_id=self.database_id,
-                logger=logger,
-            )
-            self.check_exists = True
-            self.file_exists = True
-            if html := text_extraction.html:
-                with open(self._tmp_download_file(), "w") as page_file:
-                    page_file.write(html.render(pretty=True))
-
-        except APIResponseError as error:
-            if error.code == APIErrorCode.ObjectNotFound:
-                self.check_exists = True
-                self.file_exists = False
-            else:
-                logger.error(f"Error: {error}")
-
-    @requires_dependencies(dependencies=["notion_client"], extras="notion")
-    def get_file_metadata(self):
-        from notion_client import APIErrorCode, APIResponseError
-
-        client = self.get_client()
-
-        # The Notion block endpoint gives more hierarchical information (parent,child relationships)
-        # than the pages endpoint so choosing to use that one to get metadata about the page
-        try:
-            self.file_metadata = client.databases.retrieve(
-                database_id=self.database_id,
-            )  # type: ignore
-            self.check_exists = True
-            self.file_exists = True
-        except APIResponseError as error:
-            if error.code == APIErrorCode.ObjectNotFound:
-                self.check_exists = True
-                self.file_exists = False
-            else:
-                logger.error(f"Error: {error}")
-
-    @property
-    def date_created(self) -> t.Optional[str]:
-        """The date the document was created on the source system."""
-        if not hasattr(self, "file_metadata") or not self.file_metadata:
-            self.get_file_metadata()
-
-        return self.file_metadata.created_time if self.file_metadata else None
-
-    @property
-    def date_modified(self) -> t.Optional[str]:
-        """The date the document was last modified on the source system."""
-        if not hasattr(self, "file_metadata") or not self.file_metadata:
-            self.get_file_metadata()
-
-        return self.file_metadata.last_edited_time if self.file_metadata else None
-
-    @property
-    def exists(self) -> t.Optional[bool]:
-        """Whether the document exists on the remote source."""
-        if self.check_exists:
-            return self.file_exists
-
-        self.get_file_metadata()
-
-        return self.file_exists
-
-    @property
-    def filename(self):
-        """The filename of the file created from a notion page"""
-        return self._tmp_download_file()
-
-
-@dataclass
-class NotionSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    """Objects of this class support fetching document(s) from"""
-
-    connector_config: SimpleNotionConfig
-    retry_strategy_config: t.Optional[RetryStrategyConfig] = None
-    _client: t.Optional["NotionClient"] = field(init=False, default=None)
-
-    @property
-    def client(self) -> "NotionClient":
-        if self._client is None:
-            self._client = self.create_client()
-        return self._client
-
-    @requires_dependencies(dependencies=["notion_client"], extras="notion")
-    def create_client(self) -> "NotionClient":
-        from unstructured.ingest.connector.notion.client import Client as NotionClient
-
-        return NotionClient(
-            notion_version=NOTION_API_VERSION,
-            auth=self.connector_config.access_config.notion_api_key,
-            logger=logger,
-            log_level=logger.level,
-            retry_strategy_config=self.retry_strategy_config,
-        )
-
-    def check_connection(self):
-        try:
-            request = self.client._build_request("HEAD", "users")
-            response = self.client.client.send(request)
-            response.raise_for_status()
-        except httpx.HTTPStatusError as http_error:
-            logger.error(f"failed to validate connection: {http_error}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {http_error}")
-
-    @requires_dependencies(dependencies=["notion_client"], extras="notion")
-    def initialize(self):
-        """Verify that can get metadata for an object, validates connections info."""
-        _ = self.client
-
-    @requires_dependencies(dependencies=["notion_client"], extras="notion")
-    def get_child_page_content(self, page_id: str):
-        from unstructured.ingest.connector.notion.helpers import (
-            get_recursive_content_from_page,
-        )
-
-        # sanity check that database id is valid
-        resp_code = self.client.pages.retrieve_status(page_id=page_id)
-        if resp_code != 200:
-            raise ValueError(
-                f"page associated with page id could not be found: {page_id}",
-            )
-
-        child_content = get_recursive_content_from_page(
-            client=self.client,
-            page_id=page_id,
-            logger=logger,
-        )
-        return child_content
-
-    def get_child_content(self, page_id: str):
-        from unstructured.ingest.connector.notion.helpers import (
-            get_recursive_content_from_page,
-        )
-
-        child_content = get_recursive_content_from_page(
-            client=self.client,
-            page_id=page_id,
-            logger=logger,
-        )
-        return child_content
-
-    @requires_dependencies(dependencies=["notion_client"], extras="notion")
-    def get_child_database_content(self, database_id: str):
-        from unstructured.ingest.connector.notion.helpers import (
-            get_recursive_content_from_database,
-        )
-
-        # sanity check that database id is valid
-        resp_code = self.client.databases.retrieve_status(database_id=database_id)
-        if resp_code != 200:
-            raise ValueError(
-                f"database associated with database id could not be found: {database_id}",
-            )
-
-        child_content = get_recursive_content_from_database(
-            client=self.client,
-            database_id=database_id,
-            logger=logger,
-        )
-        return child_content
-
-    def get_ingest_docs(self):
-        docs: t.List[BaseSingleIngestDoc] = []
-        if self.connector_config.page_ids:
-            docs += [
-                NotionPageIngestDoc(
-                    connector_config=self.connector_config,
-                    processor_config=self.processor_config,
-                    retry_strategy_config=self.retry_strategy_config,
-                    read_config=self.read_config,
-                    page_id=page_id,
-                )
-                for page_id in self.connector_config.page_ids
-            ]
-        if self.connector_config.database_ids:
-            docs += [
-                NotionDatabaseIngestDoc(
-                    connector_config=self.connector_config,
-                    processor_config=self.processor_config,
-                    retry_strategy_config=self.retry_strategy_config,
-                    read_config=self.read_config,
-                    database_id=database_id,
-                )
-                for database_id in self.connector_config.database_ids
-            ]
-        if self.connector_config.recursive:
-            logger.info("Getting recursive content")
-            child_pages = []
-            child_databases = []
-            if self.connector_config.page_ids:
-                for page_id in self.connector_config.page_ids:
-                    child_content = self.get_child_page_content(page_id=page_id)
-                    child_pages.extend(child_content.child_pages)
-                    child_databases.extend(child_content.child_databases)
-
-            if self.connector_config.database_ids:
-                for database_id in self.connector_config.database_ids:
-                    child_content = self.get_child_database_content(database_id=database_id)
-                    child_pages.extend(child_content.child_pages)
-                    child_databases.extend(child_content.child_databases)
-
-            # Remove duplicates
-            child_pages = list(set(child_pages))
-            if self.connector_config.page_ids:
-                child_pages = [c for c in child_pages if c not in self.connector_config.page_ids]
-
-            child_databases = list(set(child_databases))
-            if self.connector_config.database_ids:
-                child_databases = [
-                    db for db in child_databases if db not in self.connector_config.database_ids
-                ]
-
-            if child_pages:
-                logger.info(
-                    "Adding the following child page ids: {}".format(", ".join(child_pages)),
-                )
-                docs += [
-                    NotionPageIngestDoc(
-                        connector_config=self.connector_config,
-                        processor_config=self.processor_config,
-                        retry_strategy_config=self.retry_strategy_config,
-                        read_config=self.read_config,
-                        page_id=page_id,
-                    )
-                    for page_id in child_pages
-                ]
-
-            if child_databases:
-                logger.info(
-                    "Adding the following child database ids: {}".format(
-                        ", ".join(child_databases),
-                    ),
-                )
-                docs += [
-                    NotionDatabaseIngestDoc(
-                        connector_config=self.connector_config,
-                        processor_config=self.processor_config,
-                        retry_strategy_config=self.retry_strategy_config,
-                        read_config=self.read_config,
-                        database_id=database_id,
-                    )
-                    for database_id in child_databases
-                ]
-
-        return docs
diff --git a/unstructured/ingest/connector/notion/helpers.py b/unstructured/ingest/connector/notion/helpers.py
deleted file mode 100644
index a09fa083b..000000000
--- a/unstructured/ingest/connector/notion/helpers.py
+++ /dev/null
@@ -1,584 +0,0 @@
-import enum
-import logging
-from dataclasses import dataclass, field
-from typing import List, Optional, Tuple
-from urllib.parse import urlparse
-from uuid import UUID
-
-from htmlBuilder.attributes import Style, Type
-from htmlBuilder.tags import (
-    Body,
-    Div,
-    Head,
-    Html,
-    HtmlTag,
-    Ol,
-    Table,
-    Td,
-    Th,
-    Title,
-    Tr,
-    Ul,
-)
-from notion_client.errors import APIResponseError
-
-import unstructured.ingest.connector.notion.types.blocks as notion_blocks
-from unstructured.ingest.connector.notion.client import Client
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.block import Block
-from unstructured.ingest.connector.notion.types.database import Database
-
-
-@dataclass
-class TextExtractionResponse:
-    text: Optional[str] = None
-    child_pages: List[str] = field(default_factory=list)
-    child_databases: List[str] = field(default_factory=list)
-
-
-@dataclass
-class HtmlExtractionResponse:
-    html: Optional[HtmlTag] = None
-    child_pages: List[str] = field(default_factory=list)
-    child_databases: List[str] = field(default_factory=list)
-
-
-def extract_page_html(
-    client: Client,
-    page_id: str,
-    logger: logging.Logger,
-) -> HtmlExtractionResponse:
-    page_id_uuid = UUID(page_id)
-    html_elements: List[Tuple[BlockBase, HtmlTag]] = []
-    parent_block: Block = client.blocks.retrieve(block_id=page_id)  # type: ignore
-    head = None
-    if isinstance(parent_block.block, notion_blocks.ChildPage):
-        head = Head([], Title([], parent_block.block.title))
-    child_pages: List[str] = []
-    child_databases: List[str] = []
-    parents: List[Tuple[int, Block]] = [(0, parent_block)]
-    processed_block_ids = []
-    while len(parents) > 0:
-        level, parent = parents.pop(0)
-        parent_html = parent.get_html()
-        if parent_html:
-            html_elements.append((parent.block, parent_html))
-        logger.debug(f"processing block: {parent}")
-        if isinstance(parent.block, notion_blocks.ChildPage) and parent.id != str(page_id_uuid):
-            child_pages.append(parent.id)
-            continue
-        if isinstance(parent.block, notion_blocks.ChildDatabase):
-            child_databases.append(parent.id)
-            continue
-        if isinstance(parent.block, notion_blocks.Table):
-            table_response = build_table(client=client, table=parent)
-            html_elements.append((parent.block, table_response.table_html))
-            child_pages.extend(table_response.child_pages)
-            child_databases.extend(table_response.child_databases)
-            continue
-        if isinstance(parent.block, notion_blocks.ColumnList):
-            column_html = build_columned_list(client=client, column_parent=parent)
-            html_elements.append((parent.block, column_html))
-            continue
-        if isinstance(parent.block, notion_blocks.BulletedListItem):
-            bullet_list_resp = build_bulleted_list_children(
-                client=client,
-                bulleted_list_item_parent=parent,
-            )
-            if bullet_list_children := bullet_list_resp.child_list:
-                html_elements.append((parent.block, bullet_list_children))
-            continue
-        if isinstance(parent.block, notion_blocks.NumberedListItem):
-            numbered_list_resp = build_numbered_list_children(
-                client=client,
-                numbered_list_item_parent=parent,
-            )
-            if numbered_list_children := numbered_list_resp.child_list:
-                html_elements.append((parent.block, numbered_list_children))
-            continue
-        if parent.block.can_have_children() and parent.has_children:
-            children = []
-            for children_block in client.blocks.children.iterate_list(  # type: ignore
-                block_id=parent.id,
-            ):
-                children.extend(children_block)
-            if children:
-                logger.debug(f"Adding {len(children)} children from parent: {parent}")
-                for child in children:
-                    if child.id not in processed_block_ids:
-                        parents.append((level + 1, child))
-        processed_block_ids.append(parent)
-
-    # Join list items
-    joined_html_elements = []
-    numbered_list_items = []
-    bullet_list_items = []
-    for block, html in html_elements:
-        if isinstance(block, notion_blocks.BulletedListItem):
-            bullet_list_items.append(html)
-            continue
-        if isinstance(block, notion_blocks.NumberedListItem):
-            numbered_list_items.append(html)
-            continue
-        if len(numbered_list_items) > 0:
-            joined_html_elements.append(Ol([], numbered_list_items))
-            numbered_list_items = []
-        if len(bullet_list_items) > 0:
-            joined_html_elements.append(Ul([], bullet_list_items))
-            bullet_list_items = []
-        joined_html_elements.append(html)
-
-    body = Body([], joined_html_elements)
-    all_elements = [body]
-    if head:
-        all_elements = [head] + all_elements
-    full_html = Html([], all_elements)
-    return HtmlExtractionResponse(
-        full_html,
-        child_pages=child_pages,
-        child_databases=child_databases,
-    )
-
-
-def extract_database_html(
-    client: Client,
-    database_id: str,
-    logger: logging.Logger,
-) -> HtmlExtractionResponse:
-    logger.debug(f"processing database id: {database_id}")
-    database: Database = client.databases.retrieve(database_id=database_id)  # type: ignore
-    property_keys = list(database.properties.keys())
-    property_keys = sorted(property_keys)
-    table_html_rows = []
-    child_pages: List[str] = []
-    child_databases: List[str] = []
-    # Create header row
-    table_html_rows.append(Tr([], [Th([], k) for k in property_keys]))
-
-    all_pages = []
-    for page_chunk in client.databases.iterate_query(database_id=database_id):  # type: ignore
-        all_pages.extend(page_chunk)
-
-    logger.debug(f"Creating {len(all_pages)} rows")
-    for page in all_pages:
-        if is_database_url(client=client, url=page.url):
-            child_databases.append(page.id)
-        if is_page_url(client=client, url=page.url):
-            child_pages.append(page.id)
-        properties = page.properties
-        inner_html = [properties.get(k).get_html() for k in property_keys]  # type: ignore
-        table_html_rows.append(
-            Tr(
-                [],
-                [Td([], cell) for cell in [html if html else Div([], []) for html in inner_html]],
-            ),
-        )
-
-    table_html = Table([], table_html_rows)
-
-    return HtmlExtractionResponse(
-        html=table_html,
-        child_pages=child_pages,
-        child_databases=child_databases,
-    )
-
-
-@dataclass
-class ChildExtractionResponse:
-    child_pages: List[str] = field(default_factory=list)
-    child_databases: List[str] = field(default_factory=list)
-
-
-class QueueEntryType(enum.Enum):
-    DATABASE = "database"
-    PAGE = "page"
-
-
-@dataclass
-class QueueEntry:
-    type: QueueEntryType
-    id: UUID
-
-
-def get_recursive_content_from_page(
-    client: Client,
-    page_id: str,
-    logger: logging.Logger,
-) -> ChildExtractionResponse:
-    return get_recursive_content(
-        client=client,
-        init_entry=QueueEntry(type=QueueEntryType.PAGE, id=UUID(page_id)),
-        logger=logger,
-    )
-
-
-def get_recursive_content_from_database(
-    client: Client,
-    database_id: str,
-    logger: logging.Logger,
-) -> ChildExtractionResponse:
-    return get_recursive_content(
-        client=client,
-        init_entry=QueueEntry(type=QueueEntryType.DATABASE, id=UUID(database_id)),
-        logger=logger,
-    )
-
-
-def get_recursive_content(
-    client: Client,
-    init_entry: QueueEntry,
-    logger: logging.Logger,
-) -> ChildExtractionResponse:
-    parents: List[QueueEntry] = [init_entry]
-    child_pages: List[str] = []
-    child_dbs: List[str] = []
-    processed: List[str] = []
-    while len(parents) > 0:
-        parent: QueueEntry = parents.pop()
-        processed.append(str(parent.id))
-        if parent.type == QueueEntryType.PAGE:
-            logger.debug(f"Getting child data from page: {parent.id}")
-            page_children = []
-            try:
-                for children_block in client.blocks.children.iterate_list(  # type: ignore
-                    block_id=str(parent.id),
-                ):
-                    page_children.extend(children_block)
-            except APIResponseError as api_error:
-                logger.error(f"failed to get page with id {parent.id}: {api_error}")
-                if str(parent.id) in child_pages:
-                    child_pages.remove(str(parent.id))
-                continue
-            if not page_children:
-                continue
-
-            # Extract child pages
-            child_pages_from_page = [
-                c for c in page_children if isinstance(c.block, notion_blocks.ChildPage)
-            ]
-            if child_pages_from_page:
-                child_page_blocks: List[notion_blocks.ChildPage] = [
-                    p.block
-                    for p in child_pages_from_page
-                    if isinstance(p.block, notion_blocks.ChildPage)
-                ]
-                logger.debug(
-                    "found child pages from parent page {}: {}".format(
-                        parent.id,
-                        ", ".join([block.title for block in child_page_blocks]),
-                    ),
-                )
-            new_pages = [p.id for p in child_pages_from_page if p.id not in processed]
-            new_pages = list(set(new_pages))
-            child_pages.extend(new_pages)
-            parents.extend(
-                [QueueEntry(type=QueueEntryType.PAGE, id=UUID(i)) for i in new_pages],
-            )
-
-            # Extract child databases
-            child_dbs_from_page = [
-                c for c in page_children if isinstance(c.block, notion_blocks.ChildDatabase)
-            ]
-            if child_dbs_from_page:
-                child_db_blocks: List[notion_blocks.ChildDatabase] = [
-                    c.block
-                    for c in page_children
-                    if isinstance(c.block, notion_blocks.ChildDatabase)
-                ]
-                logger.debug(
-                    "found child database from parent page {}: {}".format(
-                        parent.id,
-                        ", ".join([block.title for block in child_db_blocks]),
-                    ),
-                )
-            new_dbs = [db.id for db in child_dbs_from_page if db.id not in processed]
-            new_dbs = list(set(new_dbs))
-            child_dbs.extend(new_dbs)
-            parents.extend(
-                [QueueEntry(type=QueueEntryType.DATABASE, id=UUID(i)) for i in new_dbs],
-            )
-
-            linked_to_others: List[notion_blocks.LinkToPage] = [
-                c.block for c in page_children if isinstance(c.block, notion_blocks.LinkToPage)
-            ]
-            for link in linked_to_others:
-                if (page_id := link.page_id) and (
-                    page_id not in processed and page_id not in child_pages
-                ):
-                    child_pages.append(page_id)
-                    parents.append(QueueEntry(type=QueueEntryType.PAGE, id=UUID(page_id)))
-                if (database_id := link.database_id) and (
-                    database_id not in processed and database_id not in child_dbs
-                ):
-                    child_dbs.append(database_id)
-                    parents.append(
-                        QueueEntry(type=QueueEntryType.DATABASE, id=UUID(database_id)),
-                    )
-
-        elif parent.type == QueueEntryType.DATABASE:
-            logger.debug(f"Getting child data from database: {parent.id}")
-            database_pages = []
-            try:
-                for page_entries in client.databases.iterate_query(  # type: ignore
-                    database_id=str(parent.id),
-                ):
-                    database_pages.extend(page_entries)
-            except APIResponseError as api_error:
-                logger.error(f"failed to get database with id {parent.id}: {api_error}")
-                if str(parent.id) in child_dbs:
-                    child_dbs.remove(str(parent.id))
-                continue
-            if not database_pages:
-                continue
-
-            child_pages_from_db = [
-                p for p in database_pages if is_page_url(client=client, url=p.url)
-            ]
-            if child_pages_from_db:
-                logger.debug(
-                    "found child pages from parent database {}: {}".format(
-                        parent.id,
-                        ", ".join([p.url for p in child_pages_from_db]),
-                    ),
-                )
-            new_pages = [p.id for p in child_pages_from_db if p.id not in processed]
-            child_pages.extend(new_pages)
-            parents.extend(
-                [QueueEntry(type=QueueEntryType.PAGE, id=UUID(i)) for i in new_pages],
-            )
-
-            child_dbs_from_db = [
-                p for p in database_pages if is_database_url(client=client, url=p.url)
-            ]
-            if child_dbs_from_db:
-                logger.debug(
-                    "found child database from parent database {}: {}".format(
-                        parent.id,
-                        ", ".join([db.url for db in child_dbs_from_db]),
-                    ),
-                )
-            new_dbs = [db.id for db in child_dbs_from_db if db.id not in processed]
-            child_dbs.extend(new_dbs)
-            parents.extend(
-                [QueueEntry(type=QueueEntryType.DATABASE, id=UUID(i)) for i in new_dbs],
-            )
-
-    return ChildExtractionResponse(
-        child_pages=child_pages,
-        child_databases=child_dbs,
-    )
-
-
-def is_valid_uuid(uuid_str: str) -> bool:
-    try:
-        UUID(uuid_str)
-        return True
-    except Exception:
-        return False
-
-
-def get_uuid_from_url(path: str) -> Optional[str]:
-    strings = path.split("-")
-    if len(strings) > 0 and is_valid_uuid(strings[-1]):
-        return strings[-1]
-    return None
-
-
-def is_page_url(client: Client, url: str):
-    parsed_url = urlparse(url)
-    path = parsed_url.path.split("/")[-1]
-    if parsed_url.netloc != "www.notion.so":
-        return False
-    page_uuid = get_uuid_from_url(path=path)
-    if not page_uuid:
-        return False
-    check_resp = client.pages.retrieve_status(page_id=page_uuid)
-    return check_resp == 200
-
-
-def is_database_url(client: Client, url: str):
-    parsed_url = urlparse(url)
-    path = parsed_url.path.split("/")[-1]
-    if parsed_url.netloc != "www.notion.so":
-        return False
-    database_uuid = get_uuid_from_url(path=path)
-    if not database_uuid:
-        return False
-    check_resp = client.databases.retrieve_status(database_id=database_uuid)
-    return check_resp == 200
-
-
-@dataclass
-class BuildTableResponse:
-    table_html: HtmlTag
-    child_pages: List[str] = field(default_factory=list)
-    child_databases: List[str] = field(default_factory=list)
-
-
-def build_table(client: Client, table: Block) -> BuildTableResponse:
-    if not isinstance(table.block, notion_blocks.Table):
-        raise ValueError(f"block type not table: {type(table.block)}")
-    rows: List[notion_blocks.TableRow] = []
-    child_pages: List[str] = []
-    child_databases: List[str] = []
-    for row_chunk in client.blocks.children.iterate_list(  # type: ignore
-        block_id=table.id,
-    ):
-        rows.extend(
-            [row.block for row in row_chunk if isinstance(row.block, notion_blocks.TableRow)],
-        )
-
-    # Extract child databases and pages
-    for row in rows:
-        for c in row.cells:
-            for rt in c.rich_texts:
-                if mention := rt.mention:
-                    if mention.type == "page" and (page := mention.page):
-                        child_pages.append(page.id)
-                    if mention.type == "database" and (database := mention.database):
-                        child_databases.append(database.id)
-
-    header: Optional[notion_blocks.TableRow] = None
-    if table.block.has_column_header:
-        header = rows.pop(0)
-    table_html_rows = []
-    if header:
-        header.is_header = True
-        table_html_rows.append(header.get_html())
-    table_html_rows.extend([row.get_html() for row in rows])
-    html_table = Table([], table_html_rows)
-
-    return BuildTableResponse(
-        table_html=html_table,
-        child_pages=child_pages,
-        child_databases=child_databases,
-    )
-
-
-def build_columned_list(client: Client, column_parent: Block) -> HtmlTag:
-    if not isinstance(column_parent.block, notion_blocks.ColumnList):
-        raise ValueError(f"block type not column list: {type(column_parent.block)}")
-    columns: List[Block] = []
-    for column_chunk in client.blocks.children.iterate_list(  # type: ignore
-        block_id=column_parent.id,
-    ):
-        columns.extend(column_chunk)
-    num_columns = len(columns)
-    columns_content = []
-    for column in columns:
-        for column_content_chunk in client.blocks.children.iterate_list(  # type: ignore
-            block_id=column.id,
-        ):
-            columns_content.append(
-                Div(
-                    [Style(f"width:{100/num_columns}%; float: left")],
-                    [content.block.get_html() for content in column_content_chunk],
-                ),
-            )
-
-    return Div([], columns_content)
-
-
-@dataclass
-class BulletedListResponse:
-    html: HtmlTag
-    child_list: Optional[HtmlTag] = None
-
-
-bulleted_list_styles = ["circle", "square", "disc"]
-
-
-def build_bulleted_list_children(
-    client: Client,
-    bulleted_list_item_parent: Block,
-    list_style_ind: int = 0,
-) -> BulletedListResponse:
-    if not isinstance(bulleted_list_item_parent.block, notion_blocks.BulletedListItem):
-        raise ValueError(
-            f"block type not bulleted list item: {type(bulleted_list_item_parent.block)}",
-        )
-    html = bulleted_list_item_parent.get_html()
-    if html:
-        html.attributes = [Style("margin-left: 10px")]
-    if not bulleted_list_item_parent.has_children:
-        return BulletedListResponse(
-            html=html,
-        )
-    children = []
-    for child_block in client.blocks.children.iterate_list(  # type: ignore
-        block_id=bulleted_list_item_parent.id,
-    ):
-        children.extend(child_block)
-    if not children:
-        return BulletedListResponse(
-            html=bulleted_list_item_parent.get_html(),
-        )
-    child_html = []
-    for child in children:
-        child_resp = build_bulleted_list_children(
-            client=client,
-            bulleted_list_item_parent=child,
-            list_style_ind=(list_style_ind + 1) % len(bulleted_list_styles),
-        )
-        child_html.append(child_resp.html)
-        if child_children := child_resp.child_list:
-            child_html.append(child_children)
-
-    return BulletedListResponse(
-        html=html,
-        child_list=Ul(
-            [Style(f"list-style-type: {bulleted_list_styles[list_style_ind]}")],
-            child_html,
-        ),
-    )
-
-
-@dataclass
-class NumberedListResponse:
-    html: HtmlTag
-    child_list: Optional[HtmlTag] = None
-
-
-numbered_list_types = ["a", "i", "1"]
-
-
-def build_numbered_list_children(
-    client: Client,
-    numbered_list_item_parent: Block,
-    type_attr_ind=0,
-) -> NumberedListResponse:
-    if not isinstance(numbered_list_item_parent.block, notion_blocks.NumberedListItem):
-        raise ValueError(
-            f"block type not numbered list item: {type(numbered_list_item_parent.block)}",
-        )
-    html = numbered_list_item_parent.get_html()
-    if html:
-        html.attributes = [Style("margin-left: 10px")]
-    if not numbered_list_item_parent.has_children:
-        return NumberedListResponse(
-            html=html,
-        )
-    children = []
-    for child_block in client.blocks.children.iterate_list(  # type: ignore
-        block_id=numbered_list_item_parent.id,
-    ):
-        children.extend(child_block)
-    if not children:
-        return NumberedListResponse(
-            html=numbered_list_item_parent.get_html(),
-        )
-    child_html = []
-    for child in children:
-        child_resp = build_numbered_list_children(
-            client=client,
-            numbered_list_item_parent=child,
-            type_attr_ind=(type_attr_ind + 1) % len(numbered_list_types),
-        )
-        child_html.append(child_resp.html)
-        if child_children := child_resp.child_list:
-            child_html.append(child_children)
-
-    return NumberedListResponse(
-        html=html,
-        child_list=Ol([Type(numbered_list_types[type_attr_ind])], child_html),
-    )
diff --git a/unstructured/ingest/connector/notion/interfaces.py b/unstructured/ingest/connector/notion/interfaces.py
deleted file mode 100644
index bcfa788d5..000000000
--- a/unstructured/ingest/connector/notion/interfaces.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-
-class FromJSONMixin(ABC):
-    @classmethod
-    @abstractmethod
-    def from_dict(cls, data: dict):
-        pass
-
-
-class GetHTMLMixin(ABC):
-    @abstractmethod
-    def get_html(self) -> Optional[HtmlTag]:
-        pass
-
-
-class BlockBase(FromJSONMixin, GetHTMLMixin):
-    @staticmethod
-    @abstractmethod
-    def can_have_children() -> bool:
-        pass
-
-
-class DBPropertyBase(FromJSONMixin):
-    pass
-
-
-class DBCellBase(FromJSONMixin, GetHTMLMixin):
-    pass
diff --git a/unstructured/ingest/connector/notion/types/__init__.py b/unstructured/ingest/connector/notion/types/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/unstructured/ingest/connector/notion/types/block.py b/unstructured/ingest/connector/notion/types/block.py
deleted file mode 100644
index 7159816d9..000000000
--- a/unstructured/ingest/connector/notion/types/block.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# https://developers.notion.com/reference/page
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import (
-    BlockBase,
-    FromJSONMixin,
-    GetHTMLMixin,
-)
-from unstructured.ingest.connector.notion.types import blocks
-from unstructured.ingest.connector.notion.types.parent import Parent
-from unstructured.ingest.connector.notion.types.user import PartialUser
-
-block_type_mapping = {
-    "bookmark": blocks.Bookmark,
-    "breadcrumb": blocks.Breadcrumb,
-    "bulleted_list_item": blocks.BulletedListItem,
-    "callout": blocks.Callout,
-    "child_database": blocks.ChildDatabase,
-    "child_page": blocks.ChildPage,
-    "code": blocks.Code,
-    "column": blocks.Column,
-    "column_list": blocks.ColumnList,
-    "divider": blocks.Divider,
-    "heading_1": blocks.Heading,
-    "heading_2": blocks.Heading,
-    "heading_3": blocks.Heading,
-    "embed": blocks.Embed,
-    "equation": blocks.Equation,
-    "file": blocks.File,
-    "image": blocks.Image,
-    "link_preview": blocks.LinkPreview,
-    "link_to_page": blocks.LinkToPage,
-    "numbered_list_item": blocks.NumberedListItem,
-    "paragraph": blocks.Paragraph,
-    "pdf": blocks.PDF,
-    "quote": blocks.Quote,
-    "synced_block": blocks.SyncBlock,
-    "table": blocks.Table,
-    "table_of_contents": blocks.TableOfContents,
-    "table_row": blocks.TableRow,
-    "template": blocks.Template,
-    "to_do": blocks.ToDo,
-    "toggle": blocks.Toggle,
-    "unsupported": blocks.Unsupported,
-    "video": blocks.Video,
-}
-
-
-@dataclass
-class Block(FromJSONMixin, GetHTMLMixin):
-    id: str
-    type: str
-    created_time: str
-    created_by: PartialUser
-    last_edited_time: str
-    last_edited_by: PartialUser
-    archived: bool
-    has_children: bool
-    parent: Parent
-    block: BlockBase
-    object: str = "block"
-    request_id: Optional[str] = None
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(id={self.id}, type={self.type})"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        t = data["type"]
-        block_data = data.pop(t)
-        created_by = data.pop("created_by")
-        last_edited_by = data.pop("last_edited_by")
-        parent = data.pop("parent")
-        try:
-            block = cls(
-                created_by=PartialUser.from_dict(created_by),
-                last_edited_by=PartialUser.from_dict(last_edited_by),
-                parent=Parent.from_dict(parent),
-                block=block_type_mapping[t].from_dict(block_data),  # type: ignore
-                **data,
-            )
-        except KeyError as ke:
-            raise KeyError(f"failed to map to associated block type -> {t}: {block_data}") from ke
-        except TypeError as te:
-            raise TypeError(f"failed to map to associated block type -> {t}: {block_data}") from te
-
-        return block
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if self.block:
-            return self.block.get_html()
-        return None
diff --git a/unstructured/ingest/connector/notion/types/blocks/__init__.py b/unstructured/ingest/connector/notion/types/blocks/__init__.py
deleted file mode 100644
index 5cd158bc8..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/__init__.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from .bookmark import Bookmark
-from .breadcrumb import Breadcrumb
-from .bulleted_list_item import BulletedListItem
-from .callout import Callout
-from .child_database import ChildDatabase
-from .child_page import ChildPage
-from .code import Code
-from .column_list import Column, ColumnList
-from .divider import Divider
-from .embed import Embed
-from .equation import Equation
-from .file import File
-from .heading import Heading
-from .image import Image
-from .link_preview import LinkPreview
-from .link_to_page import LinkToPage
-from .numbered_list import NumberedListItem
-from .paragraph import Paragraph
-from .pdf import PDF
-from .quote import Quote
-from .synced_block import DuplicateSyncedBlock, OriginalSyncedBlock, SyncBlock
-from .table import Table, TableRow
-from .table_of_contents import TableOfContents
-from .template import Template
-from .todo import ToDo
-from .toggle import Toggle
-from .unsupported import Unsupported
-from .video import Video
-
-__all__ = [
-    "Bookmark",
-    "Breadcrumb",
-    "BulletedListItem",
-    "Callout",
-    "ChildDatabase",
-    "ChildPage",
-    "Code",
-    "Column",
-    "ColumnList",
-    "Divider",
-    "Embed",
-    "Equation",
-    "File",
-    "Heading",
-    "Image",
-    "LinkPreview",
-    "LinkToPage",
-    "NumberedListItem",
-    "Paragraph",
-    "PDF",
-    "Quote",
-    "SyncBlock",
-    "OriginalSyncedBlock",
-    "DuplicateSyncedBlock",
-    "Table",
-    "TableRow",
-    "TableOfContents",
-    "Template",
-    "ToDo",
-    "Toggle",
-    "Unsupported",
-    "Video",
-]
diff --git a/unstructured/ingest/connector/notion/types/blocks/bookmark.py b/unstructured/ingest/connector/notion/types/blocks/bookmark.py
deleted file mode 100644
index 46804475f..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/bookmark.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# https://developers.notion.com/reference/block#bookmark
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Href
-from htmlBuilder.tags import A, Br, Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Bookmark(BlockBase):
-    url: str
-    caption: List[RichText] = field(default_factory=list)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        captions = data.pop("caption", [])
-        return cls(
-            url=data["url"],
-            caption=[RichText.from_dict(c) for c in captions],
-        )
-
-    def get_html(self) -> Optional[HtmlTag]:
-        texts = []
-        if self.url:
-            texts.append(A([Href(self.url)], self.url))
-        if self.caption:
-            texts.append(Div([], [rt.get_html() for rt in self.caption]))
-        if not texts:
-            return None
-        joined = [Br()] * (len(texts) * 2 - 1)
-        joined[0::2] = texts
-
-        return Div([], joined)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
diff --git a/unstructured/ingest/connector/notion/types/blocks/breadcrumb.py b/unstructured/ingest/connector/notion/types/blocks/breadcrumb.py
deleted file mode 100644
index d6b1626a2..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/breadcrumb.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# https://developers.notion.com/reference/block#breadcrumb
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class Breadcrumb(BlockBase):
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls()
-
-    def get_html(self) -> Optional[HtmlTag]:
-        pass
diff --git a/unstructured/ingest/connector/notion/types/blocks/bulleted_list_item.py b/unstructured/ingest/connector/notion/types/blocks/bulleted_list_item.py
deleted file mode 100644
index 5db911dd2..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/bulleted_list_item.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# https://developers.notion.com/reference/block#bulleted-list-item
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import HtmlTag, Li
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class BulletedListItem(BlockBase):
-    color: str
-    children: List[dict] = field(default_factory=list)
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        return cls(
-            color=data["color"],
-            children=data.get("children", []),
-            rich_text=[RichText.from_dict(rt) for rt in rich_text],
-        )
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Li([], [rt.get_html() for rt in self.rich_text])
diff --git a/unstructured/ingest/connector/notion/types/blocks/callout.py b/unstructured/ingest/connector/notion/types/blocks/callout.py
deleted file mode 100644
index 6ea2bb130..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/callout.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# https://developers.notion.com/reference/block#callout
-from dataclasses import dataclass, field
-from typing import List, Optional, Union
-
-from htmlBuilder.attributes import Href, Style
-from htmlBuilder.tags import A, Div, HtmlTag, P
-
-from unstructured.ingest.connector.notion.interfaces import (
-    BlockBase,
-    FromJSONMixin,
-    GetHTMLMixin,
-)
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class EmojiIcon(FromJSONMixin, GetHTMLMixin):
-    emoji: str
-    type: str = "emoji"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return P([], self.emoji)
-
-
-@dataclass
-class ExternalIconContent(FromJSONMixin):
-    url: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class ExternalIcon(FromJSONMixin, GetHTMLMixin):
-    external: ExternalIconContent
-    type: str = "external"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(external=ExternalIconContent.from_dict(data=data.pop("external")), **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if self.external:
-            return A([Href(self.external.url)], [self.external.url])
-        else:
-            return None
-
-
-class Icon(FromJSONMixin):
-    @classmethod
-    def from_dict(cls, data: dict) -> Union[EmojiIcon, ExternalIcon]:
-        t = data.get("type")
-        if t == "emoji":
-            return EmojiIcon.from_dict(data)
-        elif t == "external":
-            return ExternalIcon.from_dict(data)
-        else:
-            raise ValueError(f"Unexpected icon type: {t} ({data})")
-
-
-@dataclass
-class Callout(BlockBase):
-    color: str
-    icon: Optional[Union[EmojiIcon, ExternalIcon]] = None
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        return cls(
-            color=data["color"],
-            icon=Icon.from_dict(data.pop("icon")),
-            rich_text=[RichText.from_dict(rt) for rt in rich_text],
-        )
-
-    def get_html(self) -> Optional[HtmlTag]:
-        elements = []
-        if self.icon and self.icon.get_html():
-            elements.append(self.icon.get_html())
-        if self.rich_text:
-            elements.extend([rt.get_html() for rt in self.rich_text])
-        attributes = []
-        if self.color:
-            attributes.append(Style(f"color:{self.color}"))
-        return Div(attributes, elements)
diff --git a/unstructured/ingest/connector/notion/types/blocks/child_database.py b/unstructured/ingest/connector/notion/types/blocks/child_database.py
deleted file mode 100644
index 578b400f2..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/child_database.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# https://developers.notion.com/reference/block#child-database
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag, P
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class ChildDatabase(BlockBase):
-    title: str
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return P([], self.title)
diff --git a/unstructured/ingest/connector/notion/types/blocks/child_page.py b/unstructured/ingest/connector/notion/types/blocks/child_page.py
deleted file mode 100644
index 6ee6f9047..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/child_page.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# https://developers.notion.com/reference/block#child-page
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag, P
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase, GetHTMLMixin
-
-
-@dataclass
-class ChildPage(BlockBase, GetHTMLMixin):
-    title: str
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return P([], self.title)
diff --git a/unstructured/ingest/connector/notion/types/blocks/code.py b/unstructured/ingest/connector/notion/types/blocks/code.py
deleted file mode 100644
index 3a6d80e36..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/code.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# https://developers.notion.com/reference/block#code
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import Br, Div, HtmlTag
-from htmlBuilder.tags import Code as HtmlCode
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Code(BlockBase):
-    language: str
-    rich_text: List[RichText] = field(default_factory=list)
-    caption: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        caption = data.pop("caption", [])
-        return cls(
-            language=data["language"],
-            rich_text=[RichText.from_dict(rt) for rt in rich_text],
-            caption=[RichText.from_dict(c) for c in caption],
-        )
-
-    def get_html(self) -> Optional[HtmlTag]:
-        texts = []
-        if self.rich_text:
-            texts.append(HtmlCode([], [rt.get_html() for rt in self.rich_text]))
-        if self.caption:
-            texts.append(Div([], [rt.get_html() for rt in self.caption]))
-        if not texts:
-            return None
-        joined = [Br()] * (len(texts) * 2 - 1)
-        joined[0::2] = texts
-
-        return Div([], joined)
diff --git a/unstructured/ingest/connector/notion/types/blocks/column_list.py b/unstructured/ingest/connector/notion/types/blocks/column_list.py
deleted file mode 100644
index d2df367c2..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/column_list.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# https://developers.notion.com/reference/block#column-list-and-column
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class ColumnList(BlockBase):
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls()
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return None
-
-
-@dataclass
-class Column(BlockBase):
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls()
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return None
diff --git a/unstructured/ingest/connector/notion/types/blocks/divider.py b/unstructured/ingest/connector/notion/types/blocks/divider.py
deleted file mode 100644
index 33fc01e7b..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/divider.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# https://developers.notion.com/reference/block#divider
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.attributes import Style
-from htmlBuilder.tags import Hr, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class Divider(BlockBase):
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls()
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Hr([Style("border-top: 3px solid #bbb")])
diff --git a/unstructured/ingest/connector/notion/types/blocks/embed.py b/unstructured/ingest/connector/notion/types/blocks/embed.py
deleted file mode 100644
index 561fe828a..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/embed.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# https://developers.notion.com/reference/block#embed
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Href
-from htmlBuilder.tags import A, Br, Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Embed(BlockBase):
-    url: str
-    caption: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(caption=[RichText.from_dict(d) for d in data.pop("caption", [])], **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        texts = []
-        if self.url:
-            texts.append(A([Href(self.url)], self.url))
-        if self.caption:
-            texts.append(Div([], [rt.get_html() for rt in self.caption]))
-        if not texts:
-            return None
-        joined = [Br()] * (len(texts) * 2 - 1)
-        joined[0::2] = texts
-
-        return Div([], joined)
diff --git a/unstructured/ingest/connector/notion/types/blocks/equation.py b/unstructured/ingest/connector/notion/types/blocks/equation.py
deleted file mode 100644
index ccab3d04d..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/equation.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# https://developers.notion.com/reference/block#equation
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class Equation(BlockBase):
-    expression: str
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Div([], self.expression)
diff --git a/unstructured/ingest/connector/notion/types/blocks/file.py b/unstructured/ingest/connector/notion/types/blocks/file.py
deleted file mode 100644
index ad7fe54be..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/file.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# https://developers.notion.com/reference/block#file
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Href
-from htmlBuilder.tags import A, Br, Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.file import External
-from unstructured.ingest.connector.notion.types.file import File as FileContent
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class File(BlockBase):
-    type: str
-    external: Optional[External] = None
-    file: Optional[FileContent] = None
-    caption: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        caption = [RichText.from_dict(rt) for rt in data.pop("caption", [])]
-        t = data["type"]
-        file = cls(type=t, caption=caption)
-        if t == "external":
-            file.external = External.from_dict(data["external"])
-        elif t == "file":
-            file.file = FileContent.from_dict(data["file"])
-        return file
-
-    def get_html(self) -> Optional[HtmlTag]:
-        texts = []
-        if self.file:
-            texts.append(A([Href(self.file.url)], self.file.url))
-        if self.external:
-            texts.append(A([Href(self.external.url)], self.external.url))
-        if self.caption:
-            texts.append(Div([], [rt.get_html() for rt in self.caption]))
-        if not texts:
-            return None
-        joined = [Br()] * (len(texts) * 2 - 1)
-        joined[0::2] = texts
-
-        return Div([], joined)
diff --git a/unstructured/ingest/connector/notion/types/blocks/heading.py b/unstructured/ingest/connector/notion/types/blocks/heading.py
deleted file mode 100644
index 86983f585..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/heading.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# https://developers.notion.com/reference/block#headings
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Style
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Heading(BlockBase):
-    color: str
-    is_toggleable: bool
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        heading = cls(**data)
-        heading.rich_text = [RichText.from_dict(rt) for rt in rich_text]
-        return heading
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.rich_text:
-            return None
-
-        texts = [rt.get_html() for rt in self.rich_text]
-        attributes = []
-        if self.color and self.color != "default":
-            attributes.append(Style(f"color: {self.color}"))
-        return Div(attributes, texts)
diff --git a/unstructured/ingest/connector/notion/types/blocks/image.py b/unstructured/ingest/connector/notion/types/blocks/image.py
deleted file mode 100644
index d9c5203c4..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/image.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# https://developers.notion.com/reference/block#image
-from typing import Optional
-
-from htmlBuilder.attributes import Src
-from htmlBuilder.tags import HtmlTag, Img
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.file import FileObject
-
-
-class Image(BlockBase, FileObject):
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if self.external:
-            return Img([Src(self.external.url)], [])
-        if self.file:
-            return Img([Src(self.file.url)], [])
-        return None
diff --git a/unstructured/ingest/connector/notion/types/blocks/link_preview.py b/unstructured/ingest/connector/notion/types/blocks/link_preview.py
deleted file mode 100644
index 913df1f72..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/link_preview.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# https://developers.notion.com/reference/block#link-preview
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.attributes import Href
-from htmlBuilder.tags import A, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class LinkPreview(BlockBase):
-    url: str
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return A([Href(self.url)], self.url)
diff --git a/unstructured/ingest/connector/notion/types/blocks/link_to_page.py b/unstructured/ingest/connector/notion/types/blocks/link_to_page.py
deleted file mode 100644
index ed9156d26..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/link_to_page.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# https://developers.notion.com/reference/block#link-to-page
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class LinkToPage(BlockBase):
-    type: str
-    page_id: Optional[str] = None
-    database_id: Optional[str] = None
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if page_id := self.page_id:
-            return Div([], page_id)
-        if database_id := self.database_id:
-            return Div([], database_id)
-        return None
diff --git a/unstructured/ingest/connector/notion/types/blocks/numbered_list.py b/unstructured/ingest/connector/notion/types/blocks/numbered_list.py
deleted file mode 100644
index b0051bc80..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/numbered_list.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# https://developers.notion.com/reference/block#numbered-list-item
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import HtmlTag, Li
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class NumberedListItem(BlockBase):
-    color: str
-    children: List[dict] = field(default_factory=list)
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        numbered_list = cls(**data)
-        numbered_list.rich_text = [RichText.from_dict(rt) for rt in rich_text]
-        return numbered_list
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Li([], [rt.get_html() for rt in self.rich_text])
diff --git a/unstructured/ingest/connector/notion/types/blocks/paragraph.py b/unstructured/ingest/connector/notion/types/blocks/paragraph.py
deleted file mode 100644
index bc31e4cba..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/paragraph.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# https://developers.notion.com/reference/block#paragraph
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import Br, Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Paragraph(BlockBase):
-    color: str
-    children: List[dict] = field(default_factory=list)
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        paragraph = cls(**data)
-        paragraph.rich_text = [RichText.from_dict(rt) for rt in rich_text]
-        return paragraph
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.rich_text:
-            return Br()
-        return Div([], [rt.get_html() for rt in self.rich_text])
diff --git a/unstructured/ingest/connector/notion/types/blocks/pdf.py b/unstructured/ingest/connector/notion/types/blocks/pdf.py
deleted file mode 100644
index 61ef3a820..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/pdf.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# https://developers.notion.com/reference/block#pdf
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Href
-from htmlBuilder.tags import A, Br, Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.file import External, File
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class PDF(BlockBase):
-    type: str
-    caption: List[RichText] = field(default_factory=list)
-    external: Optional[External] = None
-    file: Optional[File] = None
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        caption = data.pop("caption", [])
-        t = data["type"]
-        paragraph = cls(type=t)
-        paragraph.caption = [RichText.from_dict(c) for c in caption]
-        if t == "external":
-            paragraph.external = External.from_dict(data["external"])
-        elif t == "file":
-            paragraph.file = File.from_dict(data["file"])
-        return paragraph
-
-    def get_html(self) -> Optional[HtmlTag]:
-        texts = []
-        if self.external:
-            texts.append(A([Href(self.external.url)], self.external.url))
-        if self.file:
-            texts.append(A([Href(self.file.url)], self.file.url))
-        if self.caption:
-            texts.append(Div([], [rt.get_html() for rt in self.caption]))
-        if not texts:
-            return None
-        joined = [Br()] * (len(texts) * 2 - 1)
-        joined[0::2] = texts
-
-        return Div([], joined)
diff --git a/unstructured/ingest/connector/notion/types/blocks/quote.py b/unstructured/ingest/connector/notion/types/blocks/quote.py
deleted file mode 100644
index 1469f1d2a..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/quote.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# https://developers.notion.com/reference/block#quote
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Style
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Quote(BlockBase):
-    color: str
-    children: List[dict] = field(default_factory=list)
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        quote = cls(**data)
-        quote.rich_text = [RichText.from_dict(rt) for rt in rich_text]
-        return quote
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.rich_text:
-            return None
-
-        texts = [rt.get_html() for rt in self.rich_text]
-        attributes = []
-        if self.color and self.color != "default":
-            attributes.append(Style(f"color: {self.color}"))
-        return Div(attributes, texts)
diff --git a/unstructured/ingest/connector/notion/types/blocks/synced_block.py b/unstructured/ingest/connector/notion/types/blocks/synced_block.py
deleted file mode 100644
index b4cd2da10..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/synced_block.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# https://developers.notion.com/reference/block#synced-block
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class OriginalSyncedBlock(BlockBase):
-    synced_from: Optional[str] = None
-    children: List[dict] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(children=data["children"])
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return None
-
-
-@dataclass
-class DuplicateSyncedBlock(BlockBase):
-    type: str
-    block_id: str
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return None
-
-
-class SyncBlock(BlockBase):
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        if "synced_from" in data:
-            return OriginalSyncedBlock.from_dict(data)
-        else:
-            return DuplicateSyncedBlock.from_dict(data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return None
diff --git a/unstructured/ingest/connector/notion/types/blocks/table.py b/unstructured/ingest/connector/notion/types/blocks/table.py
deleted file mode 100644
index 785827563..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/table.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# https://developers.notion.com/reference/block#table
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import HtmlTag, Td, Th, Tr
-
-from unstructured.ingest.connector.notion.interfaces import (
-    BlockBase,
-    FromJSONMixin,
-)
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Table(BlockBase):
-    table_width: int
-    has_column_header: bool
-    has_row_header: bool
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return None
-
-
-@dataclass
-class TableCell(FromJSONMixin):
-    rich_texts: List[RichText]
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(rich_texts=[RichText.from_dict(rt) for rt in data.pop("rich_texts", [])])
-
-    def get_html(self, is_header: bool) -> Optional[HtmlTag]:
-        if is_header:
-            return Th([], [rt.get_html() for rt in self.rich_texts])
-        else:
-            return Td([], [rt.get_html() for rt in self.rich_texts])
-
-
-# https://developers.notion.com/reference/block#table-rows
-@dataclass
-class TableRow(BlockBase):
-    is_header: bool = False
-    cells: List[TableCell] = field(default_factory=list)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        cells = data.get("cells", [])
-        return cls(cells=[TableCell.from_dict({"rich_texts": c}) for c in cells])
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Tr([], [cell.get_html(is_header=self.is_header) for cell in self.cells])
diff --git a/unstructured/ingest/connector/notion/types/blocks/table_of_contents.py b/unstructured/ingest/connector/notion/types/blocks/table_of_contents.py
deleted file mode 100644
index f753f6074..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/table_of_contents.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# https://developers.notion.com/reference/block#table-of-contents
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class TableOfContents(BlockBase):
-    color: str
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return None
diff --git a/unstructured/ingest/connector/notion/types/blocks/template.py b/unstructured/ingest/connector/notion/types/blocks/template.py
deleted file mode 100644
index 45056876f..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/template.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# https://developers.notion.com/reference/block#template
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Template(BlockBase):
-    children: List[dict] = field(default_factory=list)
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        template = cls(**data)
-        template.rich_text = [RichText.from_dict(rt) for rt in rich_text]
-        return template
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.rich_text:
-            return None
-        return Div([], [rt.get_html() for rt in self.rich_text])
diff --git a/unstructured/ingest/connector/notion/types/blocks/todo.py b/unstructured/ingest/connector/notion/types/blocks/todo.py
deleted file mode 100644
index 3e03b2ce0..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/todo.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# https://developers.notion.com/reference/block#to-do
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Checked, Style, Type
-from htmlBuilder.tags import Div, HtmlTag, Input
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class ToDo(BlockBase):
-    color: str
-    checked: bool = False
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        todo = cls(**data)
-        todo.rich_text = [RichText.from_dict(rt) for rt in rich_text]
-        return todo
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.rich_text:
-            return None
-
-        elements = []
-        check_input_attributes = [Type("checkbox")]
-        if self.checked:
-            check_input_attributes.append(Checked(""))
-        elements.append(Input(check_input_attributes))
-        elements.extend([rt.get_html() for rt in self.rich_text])
-        attributes = []
-        if self.color and self.color != "default":
-            attributes.append(Style(f"color: {self.color}"))
-        return Div(attributes, elements)
diff --git a/unstructured/ingest/connector/notion/types/blocks/toggle.py b/unstructured/ingest/connector/notion/types/blocks/toggle.py
deleted file mode 100644
index 8619eb7de..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/toggle.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# https://developers.notion.com/reference/block#toggle-blocks
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Style
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Toggle(BlockBase):
-    color: str
-    children: List[dict] = field(default_factory=list)
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        toggle = cls(**data)
-        toggle.rich_text = [RichText.from_dict(rt) for rt in rich_text]
-        return toggle
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.rich_text:
-            return None
-
-        texts = [rt.get_html() for rt in self.rich_text]
-        attributes = []
-        if self.color and self.color != "default":
-            attributes.append(Style(f"color: {self.color}"))
-        return Div(attributes, texts)
diff --git a/unstructured/ingest/connector/notion/types/blocks/unsupported.py b/unstructured/ingest/connector/notion/types/blocks/unsupported.py
deleted file mode 100644
index 6e28b8cf2..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/unsupported.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class Unsupported(BlockBase):
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls()
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return None
diff --git a/unstructured/ingest/connector/notion/types/blocks/video.py b/unstructured/ingest/connector/notion/types/blocks/video.py
deleted file mode 100644
index 2523adf70..000000000
--- a/unstructured/ingest/connector/notion/types/blocks/video.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# https://developers.notion.com/reference/block#image
-from typing import Optional
-
-from htmlBuilder.attributes import Src
-from htmlBuilder.tags import HtmlTag, Source
-from htmlBuilder.tags import Video as VideoHtml
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.file import FileObject
-
-
-class Video(BlockBase, FileObject):
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if self.external:
-            return VideoHtml([], [Source([Src(self.external.url)], [self.external.url])])
-        if self.file:
-            return VideoHtml([], [Source([Src(self.file.url)], [self.file.url])])
-        return None
diff --git a/unstructured/ingest/connector/notion/types/database.py b/unstructured/ingest/connector/notion/types/database.py
deleted file mode 100644
index db5718cf3..000000000
--- a/unstructured/ingest/connector/notion/types/database.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# https://developers.notion.com/reference/database
-from dataclasses import dataclass, field
-from typing import Dict, List, Optional
-
-from htmlBuilder.tags import Div, HtmlTag, Span
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBPropertyBase,
-    FromJSONMixin,
-    GetHTMLMixin,
-)
-from unstructured.ingest.connector.notion.types.database_properties import (
-    map_properties,
-)
-from unstructured.ingest.connector.notion.types.file import FileObject
-from unstructured.ingest.connector.notion.types.parent import Parent
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-from unstructured.ingest.connector.notion.types.user import PartialUser
-
-
-@dataclass
-class Database(FromJSONMixin, GetHTMLMixin):
-    id: str
-    created_time: str
-    created_by: PartialUser
-    last_edited_time: str
-    last_edited_by: PartialUser
-    archived: bool
-    parent: Parent
-    url: str
-    is_inline: bool
-    public_url: str
-    request_id: Optional[str] = None
-    properties: Dict[str, DBPropertyBase] = field(default_factory=dict)
-    title: List[RichText] = field(default_factory=list)
-    description: List[RichText] = field(default_factory=list)
-    icon: Optional[FileObject] = None
-    cover: Optional[FileObject] = None
-    object: str = "database"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        created_by = data.pop("created_by")
-        last_edited_by = data.pop("last_edited_by")
-        icon = data.pop("icon")
-        cover = data.pop("cover")
-        parent = data.pop("parent")
-        title = data.pop("title")
-        description = data.pop("description")
-        page = cls(
-            properties=map_properties(data.pop("properties", {})),
-            created_by=PartialUser.from_dict(created_by),
-            last_edited_by=PartialUser.from_dict(last_edited_by),
-            icon=FileObject.from_dict(icon) if icon else None,
-            cover=FileObject.from_dict(cover) if cover else None,
-            parent=Parent.from_dict(parent),
-            title=[RichText.from_dict(data=r) for r in title],
-            description=[RichText.from_dict(data=r) for r in description],
-            **data,
-        )
-
-        return page
-
-    def get_html(self) -> Optional[HtmlTag]:
-        spans = []
-        if title := self.title:
-            spans.append(Span([], [rt.get_html() for rt in title]))
-        if description := self.description:
-            spans.append(Span([], [rt.get_html() for rt in description]))
-        if spans:
-            return Div([], spans)
-        return None
diff --git a/unstructured/ingest/connector/notion/types/database_properties/__init__.py b/unstructured/ingest/connector/notion/types/database_properties/__init__.py
deleted file mode 100644
index 100111365..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/__init__.py
+++ /dev/null
@@ -1,106 +0,0 @@
-from typing import Dict
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-
-from .checkbox import Checkbox, CheckboxCell
-from .created_by import CreatedBy, CreatedByCell
-from .created_time import CreatedTime, CreatedTimeCell
-from .date import Date, DateCell
-from .email import Email, EmailCell
-from .files import Files, FilesCell
-from .formula import Formula, FormulaCell
-from .last_edited_by import LastEditedBy, LastEditedByCell
-from .last_edited_time import LastEditedTime, LastEditedTimeCell
-from .multiselect import MultiSelect, MultiSelectCell
-from .number import Number, NumberCell
-from .people import People, PeopleCell
-from .phone_number import PhoneNumber, PhoneNumberCell
-from .relation import Relation, RelationCell
-from .rich_text import RichText, RichTextCell
-from .rollup import Rollup, RollupCell
-from .select import Select, SelectCell
-from .status import Status, StatusCell
-from .title import Title, TitleCell
-from .unique_id import UniqueID, UniqueIDCell
-from .url import URL, URLCell
-from .verification import Verification, VerificationCell
-
-db_prop_type_mapping = {
-    "checkbox": Checkbox,
-    "created_by": CreatedBy,
-    "created_time": CreatedTime,
-    "date": Date,
-    "email": Email,
-    "files": Files,
-    "formula": Formula,
-    "last_edited_by": LastEditedBy,
-    "last_edited_time": LastEditedTime,
-    "multi_select": MultiSelect,
-    "number": Number,
-    "people": People,
-    "phone_number": PhoneNumber,
-    "relation": Relation,
-    "rich_text": RichText,
-    "rollup": Rollup,
-    "select": Select,
-    "status": Status,
-    "title": Title,
-    "unique_id": UniqueID,
-    "url": URL,
-    "verification": Verification,
-}
-
-
-def map_properties(props: Dict[str, dict]) -> Dict[str, DBPropertyBase]:
-    mapped_dict = {}
-    for k, v in props.items():
-        try:
-            mapped_dict[k] = db_prop_type_mapping[v["type"]].from_dict(v)  # type: ignore
-        except KeyError as ke:
-            raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
-
-    return mapped_dict
-
-
-db_cell_type_mapping = {
-    "checkbox": CheckboxCell,
-    "created_by": CreatedByCell,
-    "created_time": CreatedTimeCell,
-    "date": DateCell,
-    "email": EmailCell,
-    "files": FilesCell,
-    "formula": FormulaCell,
-    "last_edited_by": LastEditedByCell,
-    "last_edited_time": LastEditedTimeCell,
-    "multi_select": MultiSelectCell,
-    "number": NumberCell,
-    "people": PeopleCell,
-    "phone_number": PhoneNumberCell,
-    "relation": RelationCell,
-    "rich_text": RichTextCell,
-    "rollup": RollupCell,
-    "select": SelectCell,
-    "status": StatusCell,
-    "title": TitleCell,
-    "unique_id": UniqueIDCell,
-    "url": URLCell,
-    "verification": VerificationCell,
-}
-
-
-def map_cells(props: Dict[str, dict]) -> Dict[str, DBCellBase]:
-    mapped_dict = {}
-    for k, v in props.items():
-        try:
-            t = v["type"]
-            mapped_dict[k] = db_cell_type_mapping[t].from_dict(v)  # type: ignore
-        except KeyError as ke:
-            raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
-
-    return mapped_dict
-
-
-__all__ = [
-    "map_properties",
-    "map_cells",
-]
diff --git a/unstructured/ingest/connector/notion/types/database_properties/checkbox.py b/unstructured/ingest/connector/notion/types/database_properties/checkbox.py
deleted file mode 100644
index b60d187a1..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/checkbox.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# https://developers.notion.com/reference/property-object#checkbox
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.attributes import Checked, Type
-from htmlBuilder.tags import Div, HtmlTag, Input
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-
-
-@dataclass
-class Checkbox(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "checkbox"
-    checkbox: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class CheckboxCell(DBCellBase):
-    id: str
-    checkbox: bool
-    name: Optional[str] = None
-    type: str = "checkbox"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        check_input_attributes = [Type("checkbox")]
-        if self.checkbox:
-            check_input_attributes.append(Checked(""))
-        return Div([], Input(check_input_attributes))
diff --git a/unstructured/ingest/connector/notion/types/database_properties/created_by.py b/unstructured/ingest/connector/notion/types/database_properties/created_by.py
deleted file mode 100644
index 034b0c1c4..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/created_by.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# https://developers.notion.com/reference/property-object#created-by
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-from unstructured.ingest.connector.notion.types.user import People
-
-
-@dataclass
-class CreatedBy(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "created_by"
-    created_by: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class CreatedByCell(DBCellBase):
-    id: str
-    created_by: People
-    type: str = "created_by"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(created_by=People.from_dict(data.pop("created_by")), **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return self.created_by.get_html()
diff --git a/unstructured/ingest/connector/notion/types/database_properties/created_time.py b/unstructured/ingest/connector/notion/types/database_properties/created_time.py
deleted file mode 100644
index 86c1173d6..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/created_time.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# https://developers.notion.com/reference/property-object#created-time
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-
-
-@dataclass
-class CreatedTime(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "created_time"
-    created_time: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class CreatedTimeCell(DBCellBase):
-    id: str
-    created_time: str
-    type: str = "created_time"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Div([], self.created_time)
diff --git a/unstructured/ingest/connector/notion/types/database_properties/date.py b/unstructured/ingest/connector/notion/types/database_properties/date.py
deleted file mode 100644
index 779ef60cc..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/date.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# https://developers.notion.com/reference/property-object#date
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-from unstructured.ingest.connector.notion.types.date import Date as DateType
-
-
-@dataclass
-class Date(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "date"
-    date: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class DateCell(DBCellBase):
-    id: str
-    date: Optional[DateType] = None
-    name: Optional[str] = None
-    type: str = "date"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        date = None
-        date_data = data.pop("date")
-        if date_data:
-            date = DateType.from_dict(date_data)
-        return cls(date=date, **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if date := self.date:
-            return date.get_html()
-        return None
diff --git a/unstructured/ingest/connector/notion/types/database_properties/email.py b/unstructured/ingest/connector/notion/types/database_properties/email.py
deleted file mode 100644
index 1303770a8..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/email.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# https://developers.notion.com/reference/property-object#email
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-
-
-@dataclass
-class Email(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "email"
-    email: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class EmailCell(DBCellBase):
-    id: str
-    email: str
-    name: Optional[str] = None
-    type: str = "email"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if email := self.email:
-            return Div([], email)
-        return None
diff --git a/unstructured/ingest/connector/notion/types/database_properties/files.py b/unstructured/ingest/connector/notion/types/database_properties/files.py
deleted file mode 100644
index 680ee15ba..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/files.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# https://developers.notion.com/reference/property-object#files
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-from unstructured.ingest.connector.notion.types.file import FileObject
-
-
-@dataclass
-class Files(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "files"
-    files: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class FilesCell(DBCellBase):
-    id: str
-    files: List[FileObject]
-    type: str = "files"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(files=[FileObject.from_dict(f) for f in data.pop("files", [])], **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.files:
-            return None
-        return Div([], [f.get_html() for f in self.files])
diff --git a/unstructured/ingest/connector/notion/types/database_properties/formula.py b/unstructured/ingest/connector/notion/types/database_properties/formula.py
deleted file mode 100644
index b1921367e..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/formula.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# https://developers.notion.com/reference/property-object#formula
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-)
-
-
-@dataclass
-class FormulaProp(FromJSONMixin):
-    expression: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class Formula(DBPropertyBase):
-    id: str
-    name: str
-    formula: FormulaProp
-    type: str = "formula"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(formula=FormulaProp.from_dict(data.pop("formula", {})), **data)
-
-
-@dataclass
-class FormulaCell(DBCellBase):
-    id: str
-    formula: dict
-    type: str = "formula"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        formula = self.formula
-        t = formula.get("type")
-        return Div([], str(formula[t]))
diff --git a/unstructured/ingest/connector/notion/types/database_properties/last_edited_by.py b/unstructured/ingest/connector/notion/types/database_properties/last_edited_by.py
deleted file mode 100644
index a1a2d0a9c..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/last_edited_by.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# https://developers.notion.com/reference/property-object#last-edited-by
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-from unstructured.ingest.connector.notion.types.user import People
-
-
-@dataclass
-class LastEditedBy(DBPropertyBase):
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls()
-
-    def get_text(self) -> Optional[str]:
-        return None
-
-
-@dataclass
-class LastEditedByCell(DBCellBase):
-    id: str
-    last_edited_by: People
-    type: str = "last_edited_by"
-
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(last_edited_by=People.from_dict(data.pop("last_edited_by", {})), **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return self.last_edited_by.get_html()
diff --git a/unstructured/ingest/connector/notion/types/database_properties/last_edited_time.py b/unstructured/ingest/connector/notion/types/database_properties/last_edited_time.py
deleted file mode 100644
index 4c9e00981..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/last_edited_time.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# https://developers.notion.com/reference/property-object#last-edited-time
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-
-
-@dataclass
-class LastEditedTime(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "last_edited_time"
-    last_edited_time: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class LastEditedTimeCell(DBCellBase):
-    id: str
-    last_edited_time: str
-    type: str = "last_edited_time"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Div([], self.last_edited_time)
diff --git a/unstructured/ingest/connector/notion/types/database_properties/multiselect.py b/unstructured/ingest/connector/notion/types/database_properties/multiselect.py
deleted file mode 100644
index 7534ab82d..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/multiselect.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# https://developers.notion.com/reference/property-object#multi-select
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Style
-from htmlBuilder.tags import Div, HtmlTag, Span
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-)
-
-
-@dataclass
-class MultiSelectOption(FromJSONMixin):
-    color: str
-    id: str
-    name: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class MultiSelectProp(FromJSONMixin):
-    options: List[MultiSelectOption] = field(default_factory=list)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(options=[MultiSelectOption.from_dict(o) for o in data.get("options", [])])
-
-
-@dataclass
-class MultiSelect(DBPropertyBase):
-    id: str
-    name: str
-    multi_select: MultiSelectProp
-    type: str = "multi_select"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(
-            multi_select=data.pop("multi_select", {}),
-            **data,
-        )
-
-
-@dataclass
-class MultiSelectCell(DBCellBase):
-    id: str
-    multi_select: List[MultiSelectOption]
-    type: str = "multi_select"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(
-            multi_select=[MultiSelectOption.from_dict(o) for o in data.pop("multi_select", [])],
-            **data,
-        )
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.multi_select:
-            return None
-        option_spans = []
-        for option in self.multi_select:
-            option_attributes = []
-            if option.color and option.color != "default":
-                option_attributes.append(Style(f"color: {option.color}"))
-            option_spans.append(Span(option_attributes, option.name))
-        return Div([], option_spans)
diff --git a/unstructured/ingest/connector/notion/types/database_properties/number.py b/unstructured/ingest/connector/notion/types/database_properties/number.py
deleted file mode 100644
index 599981fc0..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/number.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# https://developers.notion.com/reference/property-object#number
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-)
-
-
-@dataclass
-class NumberProp(FromJSONMixin):
-    format: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class Number(DBPropertyBase):
-    id: str
-    name: str
-    number: NumberProp
-    type: str = "number"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(number=NumberProp.from_dict(data.pop("number")), **data)
-
-
-@dataclass
-class NumberCell(DBCellBase):
-    id: str
-    number: Optional[int] = None
-    type: str = "number"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if number := self.number:
-            return Div([], str(number))
-        return None
diff --git a/unstructured/ingest/connector/notion/types/database_properties/people.py b/unstructured/ingest/connector/notion/types/database_properties/people.py
deleted file mode 100644
index 44e66b2e8..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/people.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# https://developers.notion.com/reference/property-object#people
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import Div, HtmlTag, Span
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-from unstructured.ingest.connector.notion.types.user import People as PeopleType
-
-
-@dataclass
-class People(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "people"
-    people: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class PeopleCell(DBCellBase):
-    id: str
-    people: List[PeopleType]
-    type: str = "people"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(people=[PeopleType.from_dict(p) for p in data.pop("people", {})], **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.people:
-            return None
-        people_spans = []
-        for person in self.people:
-            people_spans.append(Span([], person.get_html()))
-        return Div([], people_spans)
diff --git a/unstructured/ingest/connector/notion/types/database_properties/phone_number.py b/unstructured/ingest/connector/notion/types/database_properties/phone_number.py
deleted file mode 100644
index 58a5c9170..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/phone_number.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# https://developers.notion.com/reference/property-object#phone-number
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-
-
-@dataclass
-class PhoneNumber(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "phone_number"
-    phone_number: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class PhoneNumberCell(DBCellBase):
-    id: str
-    phone_number: Optional[str]
-    name: Optional[str] = None
-    type: str = "phone_number"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if phone_number := self.phone_number:
-            return Div([], phone_number)
-        return None
diff --git a/unstructured/ingest/connector/notion/types/database_properties/relation.py b/unstructured/ingest/connector/notion/types/database_properties/relation.py
deleted file mode 100644
index 35c283a11..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/relation.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# https://developers.notion.com/reference/property-object#relation
-from dataclasses import dataclass
-from typing import Optional
-from urllib.parse import unquote
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-)
-
-
-@dataclass
-class DualProperty(FromJSONMixin):
-    synced_property_id: str
-    synced_property_name: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class RelationProp(FromJSONMixin):
-    database_id: str
-    type: str
-    dual_property: DualProperty
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        t = data.get("type")
-        if t == "dual_property":
-            dual_property = DualProperty.from_dict(data.pop(t))
-        else:
-            raise ValueError(f"{t} type not recognized")
-
-        return cls(dual_property=dual_property, **data)
-
-
-@dataclass
-class Relation(DBPropertyBase):
-    id: str
-    name: str
-    relation: RelationProp
-    type: str = "relation"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(relation=RelationProp.from_dict(data.pop("relation")), **data)
-
-
-@dataclass
-class RelationCell(DBCellBase):
-    id: str
-    has_more: bool
-    relation: list
-    type: str = "relation"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Div([], unquote(self.id))
diff --git a/unstructured/ingest/connector/notion/types/database_properties/rich_text.py b/unstructured/ingest/connector/notion/types/database_properties/rich_text.py
deleted file mode 100644
index 2bd56c2c9..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/rich_text.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# https://developers.notion.com/reference/property-object#rich-text
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import Div, HtmlTag, Span
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-from unstructured.ingest.connector.notion.types.rich_text import (
-    RichText as RichTextType,
-)
-
-
-@dataclass
-class RichText(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "rich_text"
-    rich_text: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class RichTextCell(DBCellBase):
-    id: str
-    rich_text: List[RichTextType]
-    name: Optional[str] = None
-    type: str = "rich_text"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(
-            rich_text=[RichTextType.from_dict(rt) for rt in data.pop("rich_text", [])],
-            **data,
-        )
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.rich_text:
-            return None
-        spans = [Span([], rt.get_html()) for rt in self.rich_text]
-        return Div([], spans)
diff --git a/unstructured/ingest/connector/notion/types/database_properties/rollup.py b/unstructured/ingest/connector/notion/types/database_properties/rollup.py
deleted file mode 100644
index 5134b40c4..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/rollup.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# https://developers.notion.com/reference/property-object#rollup
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag, Span
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-)
-
-
-@dataclass
-class RollupProp(FromJSONMixin):
-    function: str
-    relation_property_id: str
-    relation_property_name: str
-    rollup_property_id: str
-    rollup_property_name: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class Rollup(DBPropertyBase):
-    id: str
-    name: str
-    rollup: RollupProp
-    type: str = "rollup"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(rollup=RollupProp.from_dict(data.pop("rollup")), **data)
-
-
-@dataclass
-class RollupCell(DBCellBase):
-    id: str
-    rollup: dict
-    type: str = "rollup"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        rollup = self.rollup
-        t = rollup.get("type")
-        v = rollup[t]
-        if isinstance(v, list):
-            return Div([], [Span([], str(x)) for x in v])
-        return Div([], str(v))
diff --git a/unstructured/ingest/connector/notion/types/database_properties/select.py b/unstructured/ingest/connector/notion/types/database_properties/select.py
deleted file mode 100644
index 550f2ffed..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/select.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# https://developers.notion.com/reference/property-object#select
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Style
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-)
-
-
-@dataclass
-class SelectOption(FromJSONMixin):
-    color: str
-    id: str
-    name: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class SelectProp(FromJSONMixin):
-    options: List[SelectOption] = field(default_factory=list)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(options=[SelectOption.from_dict(o) for o in data.get("options", [])])
-
-
-@dataclass
-class Select(DBPropertyBase):
-    id: str
-    name: str
-    select: SelectProp
-    type: str = "select"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(select=SelectProp.from_dict(data.pop("select", {})), **data)
-
-
-@dataclass
-class SelectCell(DBCellBase):
-    id: str
-    select: Optional[SelectOption]
-    type: str = "select"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        select_data = data.pop("select")
-        select = None
-        if select_data:
-            select = SelectOption.from_dict(select_data)
-        return cls(select=select, **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if select := self.select:
-            select_attr = []
-            if select.color and select.color != "default":
-                select_attr.append(Style(f"color: {select.color}"))
-            return Div(select_attr, select.name)
-        return None
diff --git a/unstructured/ingest/connector/notion/types/database_properties/status.py b/unstructured/ingest/connector/notion/types/database_properties/status.py
deleted file mode 100644
index 8139b98a6..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/status.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# https://developers.notion.com/reference/property-object#status
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Style
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-)
-
-
-@dataclass
-class StatusOption(FromJSONMixin):
-    color: str
-    id: str
-    name: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class StatusGroup(FromJSONMixin):
-    color: str
-    id: str
-    name: str
-    option_ids: List[str] = field(default_factory=List[str])
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class StatusProp(FromJSONMixin):
-    options: List[StatusOption] = field(default_factory=list)
-    groups: List[StatusGroup] = field(default_factory=list)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(
-            options=[StatusOption.from_dict(o) for o in data.get("options", [])],
-            groups=[StatusGroup.from_dict(g) for g in data.get("groups", [])],
-        )
-
-
-@dataclass
-class Status(DBPropertyBase):
-    id: str
-    name: str
-    status: StatusProp
-    type: str = "status"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(status=StatusProp.from_dict(data.pop("status", {})), **data)
-
-
-@dataclass
-class StatusCell(DBCellBase):
-    id: str
-    status: Optional[StatusOption]
-    type: str = "status"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(status=StatusOption.from_dict(data.pop("status", {})), **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if status := self.status:
-            select_attr = []
-            if status.color and status.color != "default":
-                select_attr.append(Style(f"color: {status.color}"))
-            return Div(select_attr, status.name)
-        return None
diff --git a/unstructured/ingest/connector/notion/types/database_properties/title.py b/unstructured/ingest/connector/notion/types/database_properties/title.py
deleted file mode 100644
index aaee0e6ad..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/title.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# https://developers.notion.com/reference/property-object#title
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Title(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "title"
-    title: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class TitleCell(DBCellBase):
-    id: str
-    title: List[RichText]
-    type: str = "title"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(title=[RichText.from_dict(rt) for rt in data.pop("title", [])], **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.title:
-            return None
-        return Div([], [rt.get_html() for rt in self.title])
diff --git a/unstructured/ingest/connector/notion/types/database_properties/unique_id.py b/unstructured/ingest/connector/notion/types/database_properties/unique_id.py
deleted file mode 100644
index 643f2c07a..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/unique_id.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# https://developers.notion.com/reference/property-object#title
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-)
-
-
-@dataclass
-class UniqueID(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "unique_id"
-    unique_id: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class UniqueIDCellData(FromJSONMixin):
-    prefix: str
-    number: int
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class UniqueIDCell(DBCellBase):
-    id: str
-    unique_id: Optional[UniqueIDCellData]
-    type: str = "title"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(unique_id=UniqueIDCellData.from_dict(data.pop("unique_id")), **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if unique_id := self.unique_id:
-            return Div([], f"{unique_id.prefix}-{unique_id.number}")
-        return None
diff --git a/unstructured/ingest/connector/notion/types/database_properties/url.py b/unstructured/ingest/connector/notion/types/database_properties/url.py
deleted file mode 100644
index 8233ae9c2..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/url.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# https://developers.notion.com/reference/property-object#url
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.attributes import Href
-from htmlBuilder.tags import A, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-
-
-@dataclass
-class URL(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "url"
-    url: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class URLCell(DBCellBase):
-    id: str
-    url: Optional[str] = None
-    name: Optional[str] = None
-    type: str = "url"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if url := self.url:
-            return A([Href(url)], url)
-        return None
diff --git a/unstructured/ingest/connector/notion/types/database_properties/verification.py b/unstructured/ingest/connector/notion/types/database_properties/verification.py
deleted file mode 100644
index 03ade8e3b..000000000
--- a/unstructured/ingest/connector/notion/types/database_properties/verification.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# https://developers.notion.com/reference/property-object#url
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag, Span
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-    GetHTMLMixin,
-)
-from unstructured.ingest.connector.notion.types.date import Date
-from unstructured.ingest.connector.notion.types.user import People
-
-
-@dataclass
-class Verification(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "verification"
-    verification: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class VerificationData(FromJSONMixin, GetHTMLMixin):
-    state: Optional[str]
-    verified_by: Optional[People]
-    date: Optional[Date]
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        verified_by = data.pop("verified_by", None)
-        date = data.pop("date", None)
-        return cls(
-            verified_by=People.from_dict(data=verified_by) if verified_by else None,
-            date=Date.from_dict(data=date) if date else None,
-            **data,
-        )
-
-    def get_html(self) -> Optional[HtmlTag]:
-        elements = []
-        if state := self.state:
-            elements.append(Span([], state))
-        if (verified_by := self.verified_by) and (verified_by_html := verified_by.get_html()):
-            elements.append(verified_by_html)
-        if (date := self.date) and (date_html := date.get_html()):
-            elements.append(date_html)
-        if elements:
-            return Div([], elements)
-        return None
-
-
-@dataclass
-class VerificationCell(DBCellBase):
-    id: str
-    verification: Optional[VerificationData]
-    name: Optional[str] = None
-    type: str = "verification"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(verification=VerificationData.from_dict(data.pop("verification")), **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        elements = []
-        if name := self.name:
-            elements.append(Span([], name))
-        if (verification := self.verification) and (verification_html := verification.get_html()):
-            elements.append(verification_html)
-
-        if elements:
-            return Div([], elements)
-        return None
diff --git a/unstructured/ingest/connector/notion/types/date.py b/unstructured/ingest/connector/notion/types/date.py
deleted file mode 100644
index 7c6dcf1fd..000000000
--- a/unstructured/ingest/connector/notion/types/date.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# https://developers.notion.com/reference/property-value-object#date-property-values
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import FromJSONMixin, GetHTMLMixin
-
-
-@dataclass
-class Date(FromJSONMixin, GetHTMLMixin):
-    start: str
-    end: Optional[str] = None
-    time_zone: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        text = f"{self.start}"
-        if end := self.end:
-            text += f" - {end}"
-        if self.time_zone:
-            text += f" {self.time_zone}"
-        return Div([], text)
diff --git a/unstructured/ingest/connector/notion/types/file.py b/unstructured/ingest/connector/notion/types/file.py
deleted file mode 100644
index 6ade2d1e4..000000000
--- a/unstructured/ingest/connector/notion/types/file.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# https://developers.notion.com/reference/file-object
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.attributes import Href
-from htmlBuilder.tags import A, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import FromJSONMixin, GetHTMLMixin
-
-
-@dataclass
-class External(FromJSONMixin):
-    url: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class File(FromJSONMixin):
-    url: str
-    expiry_time: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class FileObject(FromJSONMixin, GetHTMLMixin):
-    type: str
-    external: Optional[External] = None
-    file: Optional[File] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        t = data["type"]
-        file_object = cls(type=t)
-        if t == "external":
-            file_object.external = External.from_dict(data["external"])
-        elif t == "file":
-            file_object.file = File.from_dict(data["file"])
-        return file_object
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if self.file:
-            return A([Href(self.file.url)], self.file.url)
-        if self.external:
-            return A([Href(self.external.url)], self.external.url)
-        return None
diff --git a/unstructured/ingest/connector/notion/types/page.py b/unstructured/ingest/connector/notion/types/page.py
deleted file mode 100644
index 1bbda85c7..000000000
--- a/unstructured/ingest/connector/notion/types/page.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# https://developers.notion.com/reference/page
-from dataclasses import dataclass
-from typing import Optional
-
-from unstructured.ingest.connector.notion.interfaces import FromJSONMixin
-from unstructured.ingest.connector.notion.types.file import FileObject
-from unstructured.ingest.connector.notion.types.parent import Parent
-from unstructured.ingest.connector.notion.types.user import PartialUser
-
-
-@dataclass
-class Page(FromJSONMixin):
-    id: str
-    created_time: str
-    created_by: PartialUser
-    last_edited_time: str
-    last_edited_by: PartialUser
-    archived: bool
-    properties: dict
-    parent: Parent
-    url: str
-    public_url: str
-    request_id: Optional[str] = None
-    object: str = "page"
-    icon: Optional[FileObject] = None
-    cover: Optional[FileObject] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        created_by = data.pop("created_by")
-        last_edited_by = data.pop("last_edited_by")
-        icon = data.pop("icon")
-        cover = data.pop("cover")
-        parent = data.pop("parent")
-        page = cls(
-            created_by=PartialUser.from_dict(created_by),
-            last_edited_by=PartialUser.from_dict(last_edited_by),
-            icon=FileObject.from_dict(icon) if icon else None,
-            cover=FileObject.from_dict(cover) if cover else None,
-            parent=Parent.from_dict(parent),
-            **data,
-        )
-
-        return page
diff --git a/unstructured/ingest/connector/notion/types/parent.py b/unstructured/ingest/connector/notion/types/parent.py
deleted file mode 100644
index f78c16673..000000000
--- a/unstructured/ingest/connector/notion/types/parent.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# https://developers.notion.com/reference/parent-object
-from dataclasses import dataclass
-
-from unstructured.ingest.connector.notion.interfaces import FromJSONMixin
-
-
-# https://developers.notion.com/reference/parent-object#database-parent
-@dataclass
-class DatabaseParent(FromJSONMixin):
-    database_id: str
-    type: str = "database_id"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(database_id=data["database_id"])
-
-
-# https://developers.notion.com/reference/parent-object#page-parent
-@dataclass
-class PageParent(FromJSONMixin):
-    page_id: str
-    type: str = "page_id"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(page_id=data["page_id"])
-
-
-# https://developers.notion.com/reference/parent-object#workspace-parent
-@dataclass
-class WorkspaceParent(FromJSONMixin):
-    type: str = "workspace"
-    workspace: bool = True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls()
-
-
-# https://developers.notion.com/reference/parent-object#block-parent
-@dataclass
-class BlockParent(FromJSONMixin):
-    block_id: str
-    type: str = "block_id"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(block_id=data["block_id"])
-
-
-@dataclass
-class Parent(FromJSONMixin):
-    block_id: str
-    type: str = "block_id"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        t = data["type"]
-        if t == "database_id":
-            return DatabaseParent.from_dict(data)
-        elif t == "page_id":
-            return PageParent.from_dict(data)
-        elif t == "workspace":
-            return WorkspaceParent.from_dict(data)
-        elif t == "block_id":
-            return BlockParent.from_dict(data)
diff --git a/unstructured/ingest/connector/notion/types/rich_text.py b/unstructured/ingest/connector/notion/types/rich_text.py
deleted file mode 100644
index ae71a0a78..000000000
--- a/unstructured/ingest/connector/notion/types/rich_text.py
+++ /dev/null
@@ -1,189 +0,0 @@
-# https://developers.notion.com/reference/rich-text
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.attributes import Href, Style
-from htmlBuilder.tags import A, B, Code, Div, HtmlTag, I, S, Span, U
-from htmlBuilder.tags import Text as HtmlText
-
-from unstructured.ingest.connector.notion.interfaces import (
-    FromJSONMixin,
-    GetHTMLMixin,
-)
-from unstructured.ingest.connector.notion.types.date import Date
-from unstructured.ingest.connector.notion.types.user import People
-
-
-@dataclass
-class Annotations(FromJSONMixin):
-    bold: bool
-    code: bool
-    italic: bool
-    strikethrough: bool
-    underline: bool
-    color: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class Equation(FromJSONMixin, GetHTMLMixin):
-    expression: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Code([], self.expression) if self.expression else None
-
-
-@dataclass
-class MentionDatabase(FromJSONMixin, GetHTMLMixin):
-    id: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Div([], self.id) if self.id else None
-
-
-@dataclass
-class MentionLinkPreview(FromJSONMixin, GetHTMLMixin):
-    url: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return A([Href(self.url)], self.url) if self.url else None
-
-
-@dataclass
-class MentionPage(FromJSONMixin, GetHTMLMixin):
-    id: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Div([], self.id) if self.id else None
-
-
-@dataclass
-class MentionTemplate(FromJSONMixin):
-    template_mention_date: Optional[str]
-    template_mention_user: Optional[str]
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class Mention(FromJSONMixin, GetHTMLMixin):
-    type: str
-    database: Optional[MentionDatabase] = None
-    date: Optional[Date] = None
-    link_preview: Optional[MentionLinkPreview] = None
-    page: Optional[MentionPage] = None
-    template_mention: Optional[MentionTemplate] = None
-    user: Optional[People] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        t = data["type"]
-        mention = cls(type=t)
-        if t == "date":
-            mention.date = Date.from_dict(data["date"])
-        elif t == "database":
-            mention.database = MentionDatabase.from_dict(data["database"])
-        elif t == "link_preview":
-            mention.link_preview = MentionLinkPreview.from_dict(data["link_preview"])
-        elif t == "page":
-            mention.page = MentionPage.from_dict(data["page"])
-        elif t == "template_mention":
-            mention.template_mention = MentionTemplate.from_dict(data["template_mention"])
-        elif t == "user":
-            mention.user = People.from_dict(data["user"])
-
-        return mention
-
-    def get_html(self) -> Optional[HtmlTag]:
-        t = self.type
-        if t == "date":
-            return self.date.get_html() if self.date else None
-        elif t == "database":
-            return self.database.get_html() if self.database else None
-        elif t == "link_preview":
-            return self.link_preview.get_html() if self.link_preview else None
-        elif t == "page":
-            return self.page.get_html() if self.page else None
-        elif t == "user":
-            return self.user.get_html() if self.user else None
-        return None
-
-
-@dataclass
-class Text(FromJSONMixin):
-    content: str
-    link: Optional[dict]
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class RichText(FromJSONMixin, GetHTMLMixin):
-    type: str
-    plain_text: str
-    annotations: Optional[Annotations] = None
-    href: Optional[str] = None
-    text: Optional[Text] = None
-    mention: Optional[Mention] = None
-    equation: Optional[Equation] = None
-
-    def get_html(self) -> Optional[HtmlTag]:
-        text = HtmlText(self.plain_text)
-        if self.href:
-            text = A([Href(self.href)], text)
-        if self.annotations:
-            annotations = self.annotations
-            if annotations.bold:
-                text = B([], text)
-            if annotations.code:
-                text = Code([], text)
-            if annotations.italic:
-                text = I([], text)
-            if annotations.strikethrough:
-                text = S([], text)
-            if annotations.underline:
-                text = U([], text)
-            if annotations.color and annotations.color != "default":
-                if isinstance(text, HtmlText):
-                    text = Span([], text)
-                text.attributes.append(Style(f"color:{annotations.color}"))
-        return text
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        t = data["type"]
-        rich_text = cls(
-            annotations=Annotations.from_dict(data.pop("annotations")),
-            **data,
-        )
-        if t == "text":
-            rich_text.text = Text.from_dict(data["text"])
-        elif t == "mention":
-            rich_text.mention = Mention.from_dict(data["mention"])
-        elif t == "equation":
-            rich_text.equation = Equation.from_dict(data["equation"])
-
-        return rich_text
diff --git a/unstructured/ingest/connector/notion/types/user.py b/unstructured/ingest/connector/notion/types/user.py
deleted file mode 100644
index 4574c0b8f..000000000
--- a/unstructured/ingest/connector/notion/types/user.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# https://developers.notion.com/reference/user
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.attributes import Href
-from htmlBuilder.tags import A, Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import FromJSONMixin, GetHTMLMixin
-
-
-@dataclass
-class PartialUser(FromJSONMixin):
-    id: str
-    object: str = "user"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(id=data["id"])
-
-
-@dataclass
-class User(FromJSONMixin, GetHTMLMixin):
-    object: dict
-    id: str
-    type: Optional[str] = None
-    name: Optional[str] = None
-    avatar_url: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_text(self) -> Optional[str]:
-        text = self.name
-        if self.avatar_url:
-            text = f"[{text}]({self.avatar_url}"
-        return text
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if self.avatar_url:
-            return A([Href(self.avatar_url)], self.name)
-        else:
-            return Div([], self.name)
-
-
-@dataclass
-class People(User):
-    person: dict = field(default_factory=dict)
-
-
-@dataclass
-class Bots(FromJSONMixin, GetHTMLMixin):
-    object: dict
-    id: str
-    bot: dict
-    owner: dict
-    type: str
-    workspace_name: str
-    name: Optional[str] = None
-    avatar_url: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_text(self) -> Optional[str]:
-        text = self.name
-        if self.avatar_url:
-            text = f"[{text}]({self.avatar_url}"
-        return text
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if self.avatar_url:
-            return A([Href(self.avatar_url)], self.name)
-        else:
-            return Div([], self.name)
diff --git a/unstructured/ingest/connector/onedrive.py b/unstructured/ingest/connector/onedrive.py
deleted file mode 100644
index 303e7f8fc..000000000
--- a/unstructured/ingest/connector/onedrive.py
+++ /dev/null
@@ -1,232 +0,0 @@
-import typing as t
-from dataclasses import dataclass, field
-from pathlib import Path
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-)
-from unstructured.ingest.logger import logger
-from unstructured.ingest.utils.string_and_date_utils import ensure_isoformat_datetime
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from office365.graph_client import GraphClient
-    from office365.onedrive.driveitems.driveItem import DriveItem
-MAX_MB_SIZE = 512_000_000
-
-
-@dataclass
-class OneDriveAccessConfig(AccessConfig):
-    client_credential: str = enhanced_field(repr=False, sensitive=True, overload_name="client_cred")
-
-
-@dataclass
-class SimpleOneDriveConfig(BaseConnectorConfig):
-    access_config: OneDriveAccessConfig
-    client_id: str
-    user_pname: str
-    tenant: str = field(repr=False)
-    authority_url: t.Optional[str] = field(repr=False, default="https://login.microsoftonline.com")
-    path: t.Optional[str] = field(default="")
-    recursive: bool = False
-
-    def __post_init__(self):
-        if not (self.client_id and self.access_config.client_credential and self.user_pname):
-            raise ValueError(
-                "Please provide all the following mandatory values:"
-                "\n-ms-client_id\n-ms-client_cred\n-ms-user-pname",
-            )
-        self.token_factory = self._acquire_token
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["msal"])
-    def _acquire_token(self):
-        from msal import ConfidentialClientApplication
-
-        try:
-            app = ConfidentialClientApplication(
-                authority=f"{self.authority_url}/{self.tenant}",
-                client_id=self.client_id,
-                client_credential=self.access_config.client_credential,
-            )
-            token = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
-        except ValueError as exc:
-            logger.error("Couldn't set up credentials for OneDrive")
-            raise exc
-        return token
-
-
-@dataclass
-class OneDriveIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    connector_config: SimpleOneDriveConfig
-    file_name: str
-    file_path: str
-    registry_name: str = "onedrive"
-
-    def __post_init__(self):
-        self.ext = Path(self.file_name).suffix
-        if not self.ext:
-            raise ValueError("Unsupported file without extension.")
-
-        self.server_relative_path = self.file_path + "/" + self.file_name
-        self._set_download_paths()
-
-    def _set_download_paths(self) -> None:
-        """Parses the folder structure from the source and creates the download and output paths"""
-        download_path = Path(f"{self.read_config.download_dir}")
-        output_path = Path(f"{self.processor_config.output_dir}")
-
-        if parent_path := self.file_path:
-            download_path = (
-                download_path if parent_path == "" else (download_path / parent_path).resolve()
-            )
-            output_path = (
-                output_path if parent_path == "" else (output_path / parent_path).resolve()
-            )
-
-        self.download_dir = download_path
-        self.download_filepath = (download_path / self.file_name).resolve()
-        output_filename = output_filename = self.file_name + ".json"
-        self.output_dir = output_path
-        self.output_filepath = (output_path / output_filename).resolve()
-
-    @property
-    def filename(self):
-        return Path(self.download_filepath).resolve()
-
-    @property
-    def _output_filename(self):
-        return Path(self.output_filepath).resolve()
-
-    @property
-    def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]:
-        return {
-            "user_pname": self.connector_config.user_pname,
-            "server_relative_path": self.server_relative_path,
-        }
-
-    @SourceConnectionNetworkError.wrap
-    @requires_dependencies(["office365"], extras="onedrive")
-    def _fetch_file(self):
-        from office365.graph_client import GraphClient
-
-        client = GraphClient(self.connector_config.token_factory)
-        root = client.users[self.connector_config.user_pname].drive.get().execute_query().root
-        file = root.get_by_path(self.server_relative_path).get().execute_query()
-        return file
-
-    def update_source_metadata(self, **kwargs):
-        file = kwargs.get("file", self._fetch_file())
-        if file is None:
-            self.source_metadata = SourceMetadata(
-                exists=False,
-            )
-            return
-
-        version = None
-        if (n_versions := len(file.versions)) > 0:
-            version = file.versions[n_versions - 1].properties.get("id", None)
-
-        self.source_metadata = SourceMetadata(
-            date_created=ensure_isoformat_datetime(timestamp=file.created_datetime),
-            date_modified=ensure_isoformat_datetime(timestamp=file.last_modified_datetime),
-            version=version,
-            source_url=file.parent_reference.path + "/" + self.file_name,
-            exists=True,
-        )
-
-    @SourceConnectionError.wrap
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        file = self._fetch_file()
-        self.update_source_metadata(file=file)
-        if file is None:
-            raise ValueError(
-                f"Failed to retrieve file {self.file_path}/{self.file_name}",
-            )
-
-        fsize = file.get_property("size", 0)
-        self.output_dir.mkdir(parents=True, exist_ok=True)
-
-        if not self.download_dir.is_dir():
-            logger.debug(f"Creating directory: {self.download_dir}")
-            self.download_dir.mkdir(parents=True, exist_ok=True)
-
-        if fsize > MAX_MB_SIZE:
-            logger.info(f"Downloading file with size: {fsize} bytes in chunks")
-            with self.filename.open(mode="wb") as f:
-                file.download_session(f, chunk_size=1024 * 1024 * 100).execute_query()
-        else:
-            with self.filename.open(mode="wb") as f:
-                file.download(f).execute_query()
-        logger.info(f"File downloaded: {self.filename}")
-        return
-
-
-@dataclass
-class OneDriveSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    connector_config: SimpleOneDriveConfig
-    _client: t.Optional["GraphClient"] = field(init=False, default=None)
-
-    @property
-    def client(self) -> "GraphClient":
-        from office365.graph_client import GraphClient
-
-        if self._client is None:
-            self._client = GraphClient(self.connector_config.token_factory)
-        return self._client
-
-    @requires_dependencies(["office365"], extras="onedrive")
-    def initialize(self):
-        _ = self.client
-
-    @requires_dependencies(["office365"], extras="onedrive")
-    def check_connection(self):
-        try:
-            token_resp: dict = self.connector_config.token_factory()
-            if error := token_resp.get("error"):
-                raise SourceConnectionError(
-                    "{} ({})".format(error, token_resp.get("error_description"))
-                )
-            _ = self.client
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    def _list_objects(self, folder, recursive) -> t.List["DriveItem"]:
-        drive_items = folder.children.get().execute_query()
-        files = [d for d in drive_items if d.is_file]
-        if not recursive:
-            return files
-        folders = [d for d in drive_items if d.is_folder]
-        for f in folders:
-            files += self._list_objects(f, recursive)
-        return files
-
-    def _gen_ingest_doc(self, file: "DriveItem") -> OneDriveIngestDoc:
-        file_path = file.parent_reference.path.split(":")[-1]
-        file_path = file_path[1:] if file_path[0] == "/" else file_path
-        return OneDriveIngestDoc(
-            connector_config=self.connector_config,
-            processor_config=self.processor_config,
-            read_config=self.read_config,
-            file_name=file.name,
-            file_path=file_path,
-        )
-
-    def get_ingest_docs(self):
-        root = self.client.users[self.connector_config.user_pname].drive.get().execute_query().root
-        if fpath := self.connector_config.path:
-            root = root.get_by_path(fpath).get().execute_query()
-            if root is None or not root.is_folder:
-                raise ValueError(f"Unable to find directory, given: {fpath}")
-        files = self._list_objects(root, self.connector_config.recursive)
-        return [self._gen_ingest_doc(f) for f in files]
diff --git a/unstructured/ingest/connector/opensearch.py b/unstructured/ingest/connector/opensearch.py
deleted file mode 100644
index 543bfbc39..000000000
--- a/unstructured/ingest/connector/opensearch.py
+++ /dev/null
@@ -1,219 +0,0 @@
-import typing as t
-from dataclasses import dataclass, field
-
-from dataclasses_json.core import Json
-
-from unstructured.ingest.connector.elasticsearch import (
-    ElasticsearchDestinationConnector,
-    ElasticsearchDocumentMeta,
-    ElasticsearchIngestDoc,
-    ElasticsearchIngestDocBatch,
-    ElasticsearchSourceConnector,
-    SimpleElasticsearchConfig,
-)
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import DestinationConnectionError, SourceConnectionError
-from unstructured.ingest.interfaces import AccessConfig, BaseSingleIngestDoc
-from unstructured.ingest.logger import logger
-from unstructured.ingest.utils.data_prep import generator_batching_wbytes
-from unstructured.staging.base import flatten_dict
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from opensearchpy import OpenSearch
-
-"""Since the actual OpenSearch project is a fork of Elasticsearch, we are relying
-heavily on the Elasticsearch connector code, inheriting the functionality as much as possible."""
-
-
-@dataclass
-class OpenSearchAccessConfig(AccessConfig):
-    hosts: t.Optional[t.List[str]] = None
-    username: t.Optional[str] = None
-    password: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    use_ssl: bool = False
-    verify_certs: bool = False
-    ssl_show_warn: bool = False
-    ca_certs: t.Optional[str] = None
-    client_cert: t.Optional[str] = None
-    client_key: t.Optional[str] = None
-
-    def to_dict(self, **kwargs) -> t.Dict[str, Json]:
-        d = super().to_dict(**kwargs)
-        d["http_auth"] = (self.username, self.password)
-        return d
-
-
-@dataclass
-class SimpleOpenSearchConfig(SimpleElasticsearchConfig):
-    access_config: OpenSearchAccessConfig = None
-
-
-@dataclass
-class OpenSearchIngestDoc(ElasticsearchIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing!).
-
-    Current implementation creates a python OpenSearch client to fetch each doc,
-    rather than creating a client for each thread.
-    """
-
-    connector_config: SimpleOpenSearchConfig
-    registry_name: str = "opensearch"
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["opensearchpy"], extras="opensearch")
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        pass
-
-
-@dataclass
-class OpenSearchIngestDocBatch(ElasticsearchIngestDocBatch):
-    connector_config: SimpleOpenSearchConfig
-    ingest_docs: t.List[OpenSearchIngestDoc] = field(default_factory=list)
-    registry_name: str = "opensearch_batch"
-
-    @requires_dependencies(["opensearchpy"], extras="opensearch")
-    def _get_docs(self):
-        from opensearchpy import OpenSearch
-        from opensearchpy.helpers import scan
-
-        ops = OpenSearch(**self.connector_config.access_config.to_dict(apply_name_overload=False))
-        scan_query = {
-            "_source": self.connector_config.fields,
-            "version": True,
-            "query": {"ids": {"values": self.list_of_ids}},
-        }
-
-        result = scan(
-            ops,
-            query=scan_query,
-            scroll="1m",
-            index=self.connector_config.index_name,
-        )
-        return list(result)
-
-    @SourceConnectionError.wrap
-    @requires_dependencies(["opensearchpy"], extras="opensearch")
-    def get_files(self):
-        documents = self._get_docs()
-        for doc in documents:
-            ingest_doc = OpenSearchIngestDoc(
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                connector_config=self.connector_config,
-                document=doc,
-                document_meta=ElasticsearchDocumentMeta(
-                    self.connector_config.index_name, doc["_id"]
-                ),
-            )
-            ingest_doc.update_source_metadata()
-            doc_body = doc["_source"]
-            filename = ingest_doc.filename
-            flattened_dict = flatten_dict(dictionary=doc_body)
-            str_values = [str(value) for value in flattened_dict.values()]
-            concatenated_values = "\n".join(str_values)
-
-            filename.parent.mkdir(parents=True, exist_ok=True)
-            with open(filename, "w", encoding="utf8") as f:
-                f.write(concatenated_values)
-            self.ingest_docs.append(ingest_doc)
-
-
-@dataclass
-class OpenSearchSourceConnector(ElasticsearchSourceConnector):
-    """Fetches particular fields from all documents in a given opensearch cluster and index"""
-
-    connector_config: SimpleOpenSearchConfig
-    _ops: t.Optional["OpenSearch"] = field(init=False, default=None)
-
-    @property
-    def ops(self):
-        from opensearchpy import OpenSearch
-
-        if self._ops is None:
-            self._ops = OpenSearch(
-                **self.connector_config.access_config.to_dict(apply_name_overload=False)
-            )
-        return self._ops
-
-    def check_connection(self):
-        try:
-            assert self.ops.ping()
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    @requires_dependencies(["opensearchpy"], extras="opensearch")
-    def _get_doc_ids(self):
-        """Fetches all document ids in an index"""
-        from opensearchpy.helpers import scan
-
-        hits = scan(
-            self.ops,
-            query=self.scan_query,
-            scroll="1m",
-            index=self.connector_config.index_name,
-        )
-
-        return [hit["_id"] for hit in hits]
-
-    def get_ingest_docs(self):
-        """Fetches all documents in an index, using ids that are fetched with _get_doc_ids"""
-        ids = self._get_doc_ids()
-        id_batches = [
-            ids[
-                i
-                * self.connector_config.batch_size : (i + 1)  # noqa
-                * self.connector_config.batch_size
-            ]
-            for i in range(
-                (len(ids) + self.connector_config.batch_size - 1)
-                // self.connector_config.batch_size
-            )
-        ]
-        return [
-            OpenSearchIngestDocBatch(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                list_of_ids=batched_ids,
-            )
-            for batched_ids in id_batches
-        ]
-
-
-@dataclass
-class OpenSearchDestinationConnector(ElasticsearchDestinationConnector):
-    connector_config: SimpleOpenSearchConfig
-    _client: t.Optional["OpenSearch"] = field(init=False, default=None)
-
-    @DestinationConnectionError.wrap
-    @requires_dependencies(["opensearchpy"], extras="opensearch")
-    def generate_client(self) -> "OpenSearch":
-        from opensearchpy import OpenSearch
-
-        return OpenSearch(**self.connector_config.access_config.to_dict(apply_name_overload=False))
-
-    @requires_dependencies(["opensearchpy"], extras="opensearch")
-    def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]]) -> None:
-        logger.info(
-            f"writing document batches to destination"
-            f" index named {self.connector_config.index_name}"
-            f" at {self.connector_config.access_config.hosts}"
-            f" with batch size (in bytes) {self.write_config.batch_size_bytes}"
-            f" with {self.write_config.num_processes} (number of) processes"
-        )
-        from opensearchpy.helpers import parallel_bulk
-
-        for batch in generator_batching_wbytes(
-            elements_dict, batch_size_limit_bytes=self.write_config.batch_size_bytes
-        ):
-            for success, info in parallel_bulk(
-                self.client, batch, thread_count=self.write_config.num_processes
-            ):
-                if not success:
-                    logger.error(
-                        "upload failed for a batch in opensearch destination connector:", info
-                    )
diff --git a/unstructured/ingest/connector/outlook.py b/unstructured/ingest/connector/outlook.py
deleted file mode 100644
index 58684a6db..000000000
--- a/unstructured/ingest/connector/outlook.py
+++ /dev/null
@@ -1,285 +0,0 @@
-import hashlib
-import os
-import typing as t
-from collections import defaultdict
-from dataclasses import dataclass, field
-from itertools import chain
-from pathlib import Path
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-MAX_NUM_EMAILS = 1000000  # Maximum number of emails per folder
-if t.TYPE_CHECKING:
-    from office365.graph_client import GraphClient
-
-
-class MissingFolderError(Exception):
-    """There are no root folders with those names."""
-
-
-@dataclass
-class OutlookAccessConfig(AccessConfig):
-    client_credential: str = enhanced_field(repr=False, sensitive=True, overload_name="client_cred")
-
-
-@dataclass
-class SimpleOutlookConfig(BaseConnectorConfig):
-    """This class is getting the token."""
-
-    access_config: OutlookAccessConfig
-    user_email: str
-    client_id: str
-    tenant: t.Optional[str] = field(repr=False, default="common")
-    authority_url: t.Optional[str] = field(repr=False, default="https://login.microsoftonline.com")
-    outlook_folders: t.List[str] = field(default_factory=list)
-    recursive: bool = False
-    registry_name: str = "outlook"
-
-    def __post_init__(self):
-        if not (self.client_id and self.access_config.client_credential and self.user_email):
-            raise ValueError(
-                "Please provide one of the following mandatory values:"
-                "\nclient_id\nclient_cred\nuser_email",
-            )
-        self.token_factory = self._acquire_token
-
-    @requires_dependencies(["msal"])
-    def _acquire_token(self):
-        from msal import ConfidentialClientApplication
-
-        try:
-            app = ConfidentialClientApplication(
-                authority=f"{self.authority_url}/{self.tenant}",
-                client_id=self.client_id,
-                client_credential=self.access_config.client_credential,
-            )
-            token = app.acquire_token_for_client(
-                scopes=["https://graph.microsoft.com/.default"],
-            )
-        except ValueError as exc:
-            logger.error("Couldn't set up credentials for Outlook")
-            raise exc
-        return token
-
-    @requires_dependencies(["office365"], extras="outlook")
-    def _get_client(self):
-        from office365.graph_client import GraphClient
-
-        return GraphClient(self.token_factory)
-
-
-@dataclass
-class OutlookIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    connector_config: SimpleOutlookConfig
-    message_id: str
-    registry_name: str = "outlook"
-
-    def __post_init__(self):
-        self._set_download_paths()
-
-    def hash_mail_name(self, id):
-        """Outlook email ids are 152 char long. Hash to shorten to 16."""
-        return hashlib.sha256(id.encode("utf-8")).hexdigest()[:16]
-
-    def _set_download_paths(self) -> None:
-        """Creates paths for downloading and parsing."""
-        download_path = Path(f"{self.read_config.download_dir}")
-        output_path = Path(f"{self.processor_config.output_dir}")
-
-        self.download_dir = download_path
-        self.download_filepath = (
-            download_path / f"{self.hash_mail_name(self.message_id)}.eml"
-        ).resolve()
-        oname = f"{self.hash_mail_name(self.message_id)}.eml.json"
-        self.output_dir = output_path
-        self.output_filepath = (output_path / oname).resolve()
-
-    @property
-    def filename(self):
-        return Path(self.download_filepath).resolve()
-
-    @property
-    def _output_filename(self):
-        return Path(self.output_filepath).resolve()
-
-    @property
-    def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]:
-        return {
-            "message_id": self.message_id,
-            "user_email": self.connector_config.user_email,
-        }
-
-    @requires_dependencies(["office365"], extras="outlook")
-    def update_source_metadata(self, **kwargs):
-        from office365.runtime.client_request_exception import ClientRequestException
-
-        try:
-            client = self.connector_config._get_client()
-            msg = (
-                client.users[self.connector_config.user_email]
-                .messages[self.message_id]
-                .get()
-                .execute_query()
-            )
-        except ClientRequestException as e:
-            if e.response.status_code == 404:
-                self.source_metadata = SourceMetadata(
-                    exists=False,
-                )
-                return
-            raise
-        self.source_metadata = SourceMetadata(
-            date_created=msg.created_datetime.isoformat(),
-            date_modified=msg.last_modified_datetime.isoformat(),
-            version=msg.get_property("changeKey"),
-            source_url=msg.get_property("webLink"),
-            exists=True,
-        )
-
-    @SourceConnectionNetworkError.wrap
-    def _run_download(self, local_file):
-        client = self.connector_config._get_client()
-        client.users[self.connector_config.user_email].messages[self.message_id].download(
-            local_file,
-        ).execute_query()
-
-    @SourceConnectionError.wrap
-    @BaseSingleIngestDoc.skip_if_file_exists
-    @requires_dependencies(["office365"], extras="outlook")
-    def get_file(self):
-        """Relies on Office365 python sdk message object to do the download."""
-        try:
-            self.connector_config._get_client()
-            self.update_source_metadata()
-            if not self.download_dir.is_dir():
-                logger.debug(f"Creating directory: {self.download_dir}")
-                self.download_dir.mkdir(parents=True, exist_ok=True)
-
-            with open(
-                os.path.join(
-                    self.download_dir,
-                    self.hash_mail_name(self.message_id) + ".eml",
-                ),
-                "wb",
-            ) as local_file:
-                self._run_download(local_file=local_file)
-
-        except Exception as e:
-            logger.error(
-                f"Error while downloading and saving file: {self.hash_mail_name(self.message_id)}.",
-            )
-            logger.error(e)
-            return
-        logger.info(f"File downloaded: {self.hash_mail_name(self.message_id)}")
-        return
-
-
-@dataclass
-class OutlookSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    connector_config: SimpleOutlookConfig
-    _client: t.Optional["GraphClient"] = field(init=False, default=None)
-
-    @property
-    def client(self) -> "GraphClient":
-        if self._client is None:
-            self._client = self.connector_config._get_client()
-        return self._client
-
-    def initialize(self):
-        try:
-            self.get_folder_ids()
-        except Exception as e:
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    def check_connection(self):
-        try:
-            _ = self.client
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    def recurse_folders(self, folder_id, main_folder_dict):
-        """We only get a count of subfolders for any folder.
-        Have to make additional calls to get subfolder ids."""
-        subfolders = (
-            self.client.users[self.connector_config.user_email]
-            .mail_folders[folder_id]
-            .child_folders.get()
-            .execute_query()
-        )
-        for subfolder in subfolders:
-            for k, v in main_folder_dict.items():
-                if subfolder.get_property("parentFolderId") in v:
-                    v.append(subfolder.id)
-            if subfolder.get_property("childFolderCount") > 0:
-                self.recurse_folders(subfolder.id, main_folder_dict)
-
-    def get_folder_ids(self):
-        """Sets the mail folder ids and subfolder ids for requested root mail folders."""
-        self.root_folders = defaultdict(list)
-        root_folders_with_subfolders = []
-        get_root_folders = (
-            self.client.users[self.connector_config.user_email].mail_folders.get().execute_query()
-        )
-
-        for folder in get_root_folders:
-            self.root_folders[folder.display_name].append(folder.id)
-            if folder.get_property("childFolderCount") > 0:
-                root_folders_with_subfolders.append(folder.id)
-
-        for folder in root_folders_with_subfolders:
-            self.recurse_folders(folder, self.root_folders)
-
-        # Narrow down all mail folder ids (plus all subfolders) to the ones that were requested.
-        self.selected_folder_ids = list(
-            chain.from_iterable(
-                [
-                    v
-                    for k, v in self.root_folders.items()
-                    if k.lower() in [x.lower() for x in self.connector_config.outlook_folders]
-                ],
-            ),
-        )
-        if not self.selected_folder_ids:
-            raise MissingFolderError(
-                "There are no root folders with the names: "
-                f"{self.connector_config.outlook_folders}",
-            )
-
-    def get_ingest_docs(self):
-        """Returns a list of all the message objects that are in the requested root folder(s)."""
-        filtered_messages = []
-
-        # Get all the relevant messages in the selected folders/subfolders.
-        for folder_id in self.selected_folder_ids:
-            messages = (
-                self.client.users[self.connector_config.user_email]
-                .mail_folders[folder_id]
-                .messages.get()
-                .top(MAX_NUM_EMAILS)  # Prevents the return from paging
-                .execute_query()
-            )
-            # Skip empty list if there are no messages in folder.
-            if messages:
-                filtered_messages.append(messages)
-        return [
-            OutlookIngestDoc(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                message_id=message.id,
-            )
-            for message in list(chain.from_iterable(filtered_messages))
-        ]
diff --git a/unstructured/ingest/connector/pinecone.py b/unstructured/ingest/connector/pinecone.py
deleted file mode 100644
index 6599185a1..000000000
--- a/unstructured/ingest/connector/pinecone.py
+++ /dev/null
@@ -1,142 +0,0 @@
-import copy
-import json
-import multiprocessing as mp
-import typing as t
-import uuid
-from dataclasses import dataclass
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.enhanced_dataclass.core import _asdict
-from unstructured.ingest.error import DestinationConnectionError, WriteError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-    ConfigSessionHandleMixin,
-    IngestDocSessionHandleMixin,
-    WriteConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.ingest.utils.data_prep import batch_generator
-from unstructured.staging.base import flatten_dict
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from pinecone import Index as PineconeIndex
-
-
-@dataclass
-class PineconeAccessConfig(AccessConfig):
-    api_key: str = enhanced_field(sensitive=True)
-
-
-@dataclass
-class SimplePineconeConfig(ConfigSessionHandleMixin, BaseConnectorConfig):
-    index_name: str
-    environment: str
-    access_config: PineconeAccessConfig
-
-
-@dataclass
-class PineconeWriteConfig(WriteConfig):
-    batch_size: int = 50
-    num_processes: int = 1
-
-
-@dataclass
-class PineconeDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationConnector):
-    write_config: PineconeWriteConfig
-    connector_config: SimplePineconeConfig
-    _index: t.Optional["PineconeIndex"] = None
-
-    def to_dict(self, **kwargs):
-        """
-        The _index variable in this dataclass breaks deepcopy due to:
-        TypeError: cannot pickle '_thread.lock' object
-        When serializing, remove it, meaning client data will need to be reinitialized
-        when deserialized
-        """
-        self_cp = copy.copy(self)
-        if hasattr(self_cp, "_index"):
-            setattr(self_cp, "_index", None)
-        return _asdict(self_cp, **kwargs)
-
-    @property
-    def pinecone_index(self):
-        if self._index is None:
-            self._index = self.create_index()
-        return self._index
-
-    def initialize(self):
-        pass
-
-    @requires_dependencies(["pinecone"], extras="pinecone")
-    def create_index(self) -> "PineconeIndex":
-        from pinecone import Pinecone
-
-        from unstructured import __version__ as unstructured_version
-
-        pc = Pinecone(
-            api_key=self.connector_config.access_config.api_key,
-            source_tag=f"unstructured=={unstructured_version}",
-        )
-
-        index = pc.Index(self.connector_config.index_name)
-        logger.debug(f"Connected to index: {pc.describe_index(self.connector_config.index_name)}")
-        return index
-
-    @DestinationConnectionError.wrap
-    def check_connection(self):
-        _ = self.pinecone_index
-
-    @DestinationConnectionError.wrap
-    @requires_dependencies(["pinecone"], extras="pinecone")
-    def upsert_batch(self, batch):
-        import pinecone.exceptions
-
-        index = self.pinecone_index
-        try:
-            response = index.upsert(batch)
-        except pinecone.exceptions.PineconeApiException as api_error:
-            raise WriteError(f"http error: {api_error}") from api_error
-        logger.debug(f"results: {response}")
-
-    def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
-        logger.info(
-            f"Upserting {len(elements_dict)} elements to destination "
-            f"index at {self.connector_config.index_name}",
-        )
-
-        pinecone_batch_size = self.write_config.batch_size
-
-        logger.info(f"using {self.write_config.num_processes} processes to upload")
-        if self.write_config.num_processes == 1:
-            for chunk in batch_generator(elements_dict, pinecone_batch_size):
-                self.upsert_batch(chunk)  # noqa: E203
-
-        else:
-            with mp.Pool(
-                processes=self.write_config.num_processes,
-            ) as pool:
-                pool.map(
-                    self.upsert_batch, list(batch_generator(elements_dict, pinecone_batch_size))
-                )
-
-    def normalize_dict(self, element_dict: dict) -> dict:
-        # While flatten_dict enables indexing on various fields,
-        # element_serialized enables easily reloading the element object to memory.
-        # element_serialized is formed without text/embeddings to avoid data bloating.
-        return {
-            "id": str(uuid.uuid4()),
-            "values": element_dict.pop("embeddings", None),
-            "metadata": {
-                "text": element_dict.pop("text", None),
-                "element_serialized": json.dumps(element_dict),
-                **flatten_dict(
-                    element_dict,
-                    separator="-",
-                    flatten_lists=True,
-                    remove_none=True,
-                ),
-            },
-        }
diff --git a/unstructured/ingest/connector/qdrant.py b/unstructured/ingest/connector/qdrant.py
deleted file mode 100644
index da19c2dae..000000000
--- a/unstructured/ingest/connector/qdrant.py
+++ /dev/null
@@ -1,145 +0,0 @@
-import json
-import multiprocessing as mp
-import typing as t
-import uuid
-from dataclasses import dataclass
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import DestinationConnectionError, WriteError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-    ConfigSessionHandleMixin,
-    IngestDocSessionHandleMixin,
-    WriteConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.ingest.utils.data_prep import batch_generator
-from unstructured.staging.base import flatten_dict
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from qdrant_client import QdrantClient
-
-
-@dataclass
-class QdrantAccessConfig(AccessConfig):
-    api_key: t.Optional[str] = enhanced_field(sensitive=True)
-
-
-@dataclass
-class SimpleQdrantConfig(ConfigSessionHandleMixin, BaseConnectorConfig):
-    collection_name: str
-    location: t.Optional[str] = None
-    url: t.Optional[str] = None
-    port: t.Optional[int] = 6333
-    grpc_port: t.Optional[int] = 6334
-    prefer_grpc: t.Optional[bool] = False
-    https: t.Optional[bool] = None
-    prefix: t.Optional[str] = None
-    timeout: t.Optional[float] = None
-    host: t.Optional[str] = None
-    path: t.Optional[str] = None
-    force_disable_check_same_thread: t.Optional[bool] = False
-    access_config: t.Optional[QdrantAccessConfig] = None
-
-
-@dataclass
-class QdrantWriteConfig(WriteConfig):
-    batch_size: int = 50
-    num_processes: int = 1
-
-
-@dataclass
-class QdrantDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationConnector):
-    write_config: QdrantWriteConfig
-    connector_config: SimpleQdrantConfig
-    _client: t.Optional["QdrantClient"] = None
-
-    @property
-    def qdrant_client(self):
-        if self._client is None:
-            self._client = self.create_client()
-        return self._client
-
-    def initialize(self):
-        ...  # fmt: skip
-
-    @requires_dependencies(["qdrant_client"], extras="qdrant")
-    def create_client(self) -> "QdrantClient":
-        from qdrant_client import QdrantClient
-
-        client = QdrantClient(
-            location=self.connector_config.location,
-            url=self.connector_config.url,
-            port=self.connector_config.port,
-            grpc_port=self.connector_config.grpc_port,
-            prefer_grpc=self.connector_config.prefer_grpc,
-            https=self.connector_config.https,
-            api_key=(
-                self.connector_config.access_config.api_key
-                if self.connector_config.access_config
-                else None
-            ),
-            prefix=self.connector_config.prefix,
-            timeout=self.connector_config.timeout,
-            host=self.connector_config.host,
-            path=self.connector_config.path,
-            force_disable_check_same_thread=self.connector_config.force_disable_check_same_thread,
-        )
-
-        return client
-
-    @DestinationConnectionError.wrap
-    def check_connection(self):
-        self.qdrant_client.get_collections()
-
-    @DestinationConnectionError.wrap
-    @requires_dependencies(["qdrant_client"], extras="qdrant")
-    def upsert_batch(self, batch: t.List[t.Dict[str, t.Any]]):
-        from qdrant_client import models
-
-        client = self.qdrant_client
-        try:
-            points: list[models.PointStruct] = [models.PointStruct(**item) for item in batch]
-            response = client.upsert(
-                self.connector_config.collection_name, points=points, wait=True
-            )
-        except Exception as api_error:
-            raise WriteError(f"Qdrant error: {api_error}") from api_error
-        logger.debug(f"results: {response}")
-
-    def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
-        logger.info(
-            f"Upserting {len(elements_dict)} elements to "
-            f"{self.connector_config.collection_name}",
-        )
-
-        qdrant_batch_size = self.write_config.batch_size
-
-        logger.info(f"using {self.write_config.num_processes} processes to upload")
-        if self.write_config.num_processes == 1:
-            for chunk in batch_generator(elements_dict, qdrant_batch_size):
-                self.upsert_batch(chunk)
-
-        else:
-            with mp.Pool(
-                processes=self.write_config.num_processes,
-            ) as pool:
-                pool.map(self.upsert_batch, list(batch_generator(elements_dict, qdrant_batch_size)))
-
-    def normalize_dict(self, element_dict: dict) -> dict:
-        return {
-            "id": str(uuid.uuid4()),
-            "vector": element_dict.pop("embeddings", {}),
-            "payload": {
-                "text": element_dict.pop("text", None),
-                "element_serialized": json.dumps(element_dict),
-                **flatten_dict(
-                    element_dict,
-                    separator="-",
-                    flatten_lists=True,
-                ),
-            },
-        }
diff --git a/unstructured/ingest/connector/reddit.py b/unstructured/ingest/connector/reddit.py
deleted file mode 100644
index 18f8ba7c7..000000000
--- a/unstructured/ingest/connector/reddit.py
+++ /dev/null
@@ -1,166 +0,0 @@
-import typing as t
-from dataclasses import dataclass, field
-from datetime import datetime
-from pathlib import Path
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from praw import Reddit
-
-
-@dataclass
-class RedditAccessConfig(AccessConfig):
-    client_secret: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-
-
-@dataclass
-class SimpleRedditConfig(BaseConnectorConfig):
-    access_config: RedditAccessConfig
-    subreddit_name: str
-    num_posts: int
-    user_agent: str
-    client_id: str
-    search_query: t.Optional[str] = None
-
-    def __post_init__(self):
-        if self.num_posts <= 0:
-            raise ValueError("The number of Reddit posts to fetch must be positive.")
-
-
-@dataclass
-class RedditIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    connector_config: SimpleRedditConfig = field(repr=False)
-    post_id: str
-    registry_name: str = "reddit"
-
-    def _create_full_tmp_dir_path(self):
-        self.filename.parent.mkdir(parents=True, exist_ok=True)
-
-    @SourceConnectionNetworkError.wrap
-    @requires_dependencies(["praw"])
-    def get_post(self):
-        from praw import Reddit
-        from praw.models import Submission
-
-        reddit = Reddit(
-            client_id=self.connector_config.client_id,
-            client_secret=self.connector_config.access_config.client_secret,
-            user_agent=self.connector_config.user_agent,
-        )
-        post = Submission(reddit, self.post_id)
-        return post
-
-    def update_source_metadata(self, **kwargs):
-        post = kwargs.get("post", self.get_post())
-        if post is None:
-            self.source_metadata = SourceMetadata(
-                exists=False,
-            )
-            return
-
-        file_exists = (post.author != "[deleted]" or post.auth is not None) and (
-            post.selftext != "[deleted]" or post.selftext != "[removed]"
-        )
-
-        self.source_metadata = SourceMetadata(
-            date_created=datetime.utcfromtimestamp(post.created_utc).isoformat(),
-            source_url=post.permalink,
-            exists=file_exists,
-        )
-
-    @SourceConnectionError.wrap
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        """Fetches the "remote" doc and stores it locally on the filesystem."""
-        self._create_full_tmp_dir_path()
-        # Write the title plus the body, if any
-        post = self.get_post()
-        self.update_source_metadata(post=post)
-        if post is None:
-            raise ValueError(
-                f"Failed to retrieve post {self.post_id}",
-            )
-
-        text_to_write = f"# {post.title}\n{post.selftext}"
-        with open(self.filename, "w", encoding="utf8") as f:
-            f.write(text_to_write)
-
-    @property
-    def filename(self) -> Path:
-        return (Path(self.read_config.download_dir) / f"{self.post_id}.md").resolve()
-
-    @property
-    def _output_filename(self):
-        return Path(self.processor_config.output_dir) / f"{self.post_id}.json"
-
-    @property
-    def date_modified(self) -> t.Optional[str]:
-        return None
-
-    @property
-    def version(self) -> t.Optional[str]:
-        return None
-
-
-@dataclass
-class RedditSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    connector_config: SimpleRedditConfig
-    _reddit: t.Optional["Reddit"] = field(init=False, default=None)
-
-    @property
-    def reddit(self) -> "Reddit":
-        from praw import Reddit
-
-        if self._reddit is None:
-            self._reddit = Reddit(
-                client_id=self.connector_config.client_id,
-                client_secret=self.connector_config.access_config.client_secret,
-                user_agent=self.connector_config.user_agent,
-            )
-        return self._reddit
-
-    @requires_dependencies(["praw"], extras="reddit")
-    def initialize(self):
-        _ = self.reddit
-
-    def check_connection(self):
-        from praw.endpoints import API_PATH
-        from prawcore import ResponseException
-
-        try:
-            self.reddit._objectify_request(method="HEAD", params=None, path=API_PATH["me"])
-        except ResponseException as response_error:
-            logger.error(f"failed to validate connection: {response_error}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {response_error}")
-
-    def get_ingest_docs(self):
-        subreddit = self.reddit.subreddit(self.connector_config.subreddit_name)
-        if self.connector_config.search_query:
-            posts = subreddit.search(
-                self.connector_config.search_query,
-                limit=self.connector_config.num_posts,
-            )
-        else:
-            posts = subreddit.hot(limit=self.connector_config.num_posts)
-        return [
-            RedditIngestDoc(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                post_id=post.id,
-            )
-            for post in posts
-        ]
diff --git a/unstructured/ingest/connector/registry.py b/unstructured/ingest/connector/registry.py
deleted file mode 100644
index 35250d6f0..000000000
--- a/unstructured/ingest/connector/registry.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import json
-from typing import Dict, Type, cast
-
-from unstructured.ingest.connector.airtable import AirtableIngestDoc
-from unstructured.ingest.connector.astradb import AstraDBIngestDoc
-from unstructured.ingest.connector.biomed import BiomedIngestDoc
-from unstructured.ingest.connector.confluence import ConfluenceIngestDoc
-from unstructured.ingest.connector.delta_table import DeltaTableIngestDoc
-from unstructured.ingest.connector.discord import DiscordIngestDoc
-from unstructured.ingest.connector.elasticsearch import (
-    ElasticsearchIngestDoc,
-    ElasticsearchIngestDocBatch,
-)
-from unstructured.ingest.connector.fsspec.azure import AzureBlobStorageIngestDoc
-from unstructured.ingest.connector.fsspec.box import BoxIngestDoc
-from unstructured.ingest.connector.fsspec.dropbox import DropboxIngestDoc
-from unstructured.ingest.connector.fsspec.gcs import GcsIngestDoc
-from unstructured.ingest.connector.fsspec.s3 import S3IngestDoc
-from unstructured.ingest.connector.fsspec.sftp import SftpIngestDoc
-from unstructured.ingest.connector.github import GitHubIngestDoc
-from unstructured.ingest.connector.gitlab import GitLabIngestDoc
-from unstructured.ingest.connector.google_drive import GoogleDriveIngestDoc
-from unstructured.ingest.connector.hubspot import HubSpotIngestDoc
-from unstructured.ingest.connector.jira import JiraIngestDoc
-from unstructured.ingest.connector.kafka import KafkaIngestDoc
-from unstructured.ingest.connector.local import LocalIngestDoc
-from unstructured.ingest.connector.mongodb import MongoDBIngestDoc, MongoDBIngestDocBatch
-from unstructured.ingest.connector.notion.connector import (
-    NotionDatabaseIngestDoc,
-    NotionPageIngestDoc,
-)
-from unstructured.ingest.connector.onedrive import OneDriveIngestDoc
-from unstructured.ingest.connector.opensearch import OpenSearchIngestDoc, OpenSearchIngestDocBatch
-from unstructured.ingest.connector.outlook import OutlookIngestDoc
-from unstructured.ingest.connector.reddit import RedditIngestDoc
-from unstructured.ingest.connector.salesforce import SalesforceIngestDoc
-from unstructured.ingest.connector.sharepoint import SharepointIngestDoc
-from unstructured.ingest.connector.slack import SlackIngestDoc
-from unstructured.ingest.connector.wikipedia import (
-    WikipediaIngestHTMLDoc,
-    WikipediaIngestSummaryDoc,
-    WikipediaIngestTextDoc,
-)
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
-from unstructured.ingest.interfaces import BaseIngestDoc
-
-INGEST_DOC_NAME_TO_CLASS: Dict[str, Type[EnhancedDataClassJsonMixin]] = {
-    "airtable": AirtableIngestDoc,
-    "astradb": AstraDBIngestDoc,
-    "azure": AzureBlobStorageIngestDoc,
-    "biomed": BiomedIngestDoc,
-    "box": BoxIngestDoc,
-    "confluence": ConfluenceIngestDoc,
-    "delta-table": DeltaTableIngestDoc,
-    "discord": DiscordIngestDoc,
-    "dropbox": DropboxIngestDoc,
-    "elasticsearch": ElasticsearchIngestDoc,
-    "elasticsearch_batch": ElasticsearchIngestDocBatch,
-    "gcs": GcsIngestDoc,
-    "github": GitHubIngestDoc,
-    "gitlab": GitLabIngestDoc,
-    "google_drive": GoogleDriveIngestDoc,
-    "hubspot": HubSpotIngestDoc,
-    "jira": JiraIngestDoc,
-    "kafka": KafkaIngestDoc,
-    "local": LocalIngestDoc,
-    "mongodb": MongoDBIngestDoc,
-    "mongodb_batch": MongoDBIngestDocBatch,
-    "notion_database": NotionDatabaseIngestDoc,
-    "notion_page": NotionPageIngestDoc,
-    "onedrive": OneDriveIngestDoc,
-    "opensearch": OpenSearchIngestDoc,
-    "opensearch_batch": OpenSearchIngestDocBatch,
-    "outlook": OutlookIngestDoc,
-    "reddit": RedditIngestDoc,
-    "s3": S3IngestDoc,
-    "salesforce": SalesforceIngestDoc,
-    "sftp": SftpIngestDoc,
-    "sharepoint": SharepointIngestDoc,
-    "slack": SlackIngestDoc,
-    "wikipedia_html": WikipediaIngestHTMLDoc,
-    "wikipedia_text": WikipediaIngestTextDoc,
-    "wikipedia_summary": WikipediaIngestSummaryDoc,
-}
-
-
-def create_ingest_doc_from_json(ingest_doc_json: str) -> BaseIngestDoc:
-    try:
-        ingest_doc_dict: dict = json.loads(ingest_doc_json)
-    except TypeError as te:
-        raise TypeError(
-            f"failed to load json string when deserializing IngestDoc: {ingest_doc_json}",
-        ) from te
-    return create_ingest_doc_from_dict(ingest_doc_dict)
-
-
-def create_ingest_doc_from_dict(ingest_doc_dict: dict) -> BaseIngestDoc:
-    ingest_doc_dict = ingest_doc_dict.copy()
-    if "registry_name" not in ingest_doc_dict:
-        raise ValueError(f"registry_name not present in ingest doc: {ingest_doc_dict}")
-    registry_name = ingest_doc_dict.pop("registry_name")
-    try:
-        ingest_doc_cls = INGEST_DOC_NAME_TO_CLASS[registry_name]
-        return cast(BaseIngestDoc, ingest_doc_cls.from_dict(ingest_doc_dict))
-    except KeyError:
-        raise ValueError(
-            f"Error: Received unknown IngestDoc name: {registry_name} while deserializing",
-            "IngestDoc.",
-        )
diff --git a/unstructured/ingest/connector/salesforce.py b/unstructured/ingest/connector/salesforce.py
deleted file mode 100644
index b17810120..000000000
--- a/unstructured/ingest/connector/salesforce.py
+++ /dev/null
@@ -1,301 +0,0 @@
-"""
-Salesforce Connector
-Able to download Account, Case, Campaign, EmailMessage, Lead
-Salesforce returns everything as a list of json.
-This saves each entry as a separate file to be partitioned.
-Using JWT authorization
-https://developer.salesforce.com/docs/atlas.en-us.sfdx_dev.meta/sfdx_dev/sfdx_dev_auth_key_and_cert.htm
-https://developer.salesforce.com/docs/atlas.en-us.sfdx_dev.meta/sfdx_dev/sfdx_dev_auth_connected_app.htm
-"""
-
-import json
-import typing as t
-from collections import OrderedDict
-from dataclasses import dataclass, field
-from datetime import datetime
-from email.utils import formatdate
-from pathlib import Path
-from string import Template
-from textwrap import dedent
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-
-class MissingCategoryError(Exception):
-    """There are no categories with that name."""
-
-
-SALESFORCE_API_VERSION = "57.0"
-
-ACCEPTED_CATEGORIES = ["Account", "Case", "Campaign", "EmailMessage", "Lead"]
-
-EMAIL_TEMPLATE = Template(
-    """MIME-Version: 1.0
-Date: $date
-Message-ID: $message_identifier
-Subject: $subject
-From: $from_email
-To: $to_email
-Content-Type: multipart/alternative; boundary="00000000000095c9b205eff92630"
---00000000000095c9b205eff92630
-Content-Type: text/plain; charset="UTF-8"
-$textbody
---00000000000095c9b205eff92630
-Content-Type: text/html; charset="UTF-8"
-$htmlbody
---00000000000095c9b205eff92630--
-""",
-)
-
-
-@dataclass
-class SalesforceAccessConfig(AccessConfig):
-    consumer_key: str = enhanced_field(sensitive=True)
-    private_key: str = enhanced_field(sensitive=True)
-
-    @requires_dependencies(["cryptography"])
-    def get_private_key_value_and_type(self) -> t.Tuple[str, t.Type]:
-        from cryptography.hazmat.primitives import serialization
-
-        try:
-            serialization.load_pem_private_key(data=self.private_key.encode("utf-8"), password=None)
-        except ValueError:
-            pass
-        else:
-            return self.private_key, str
-
-        if Path(self.private_key).is_file():
-            return self.private_key, Path
-
-        raise ValueError("private_key does not contain PEM private key or path")
-
-
-@dataclass
-class SimpleSalesforceConfig(BaseConnectorConfig):
-    """Connector specific attributes"""
-
-    access_config: SalesforceAccessConfig
-    categories: t.List[str]
-    username: str
-    recursive: bool = False
-
-    @requires_dependencies(["simple_salesforce"], extras="salesforce")
-    def get_client(self):
-        from simple_salesforce import Salesforce
-
-        pkey_value, pkey_type = self.access_config.get_private_key_value_and_type()
-
-        return Salesforce(
-            username=self.username,
-            consumer_key=self.access_config.consumer_key,
-            privatekey_file=pkey_value if pkey_type is Path else None,
-            privatekey=pkey_value if pkey_type is str else None,
-            version=SALESFORCE_API_VERSION,
-        )
-
-
-@dataclass
-class SalesforceIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    connector_config: SimpleSalesforceConfig
-    record_type: str
-    record_id: str
-    registry_name: str = "salesforce"
-    _record: OrderedDict = field(default_factory=lambda: OrderedDict())
-
-    @property
-    def record(self):
-        if not self._record:
-            self._record = self.get_record()
-        return self._record
-
-    def get_file_extension(self) -> str:
-        if self.record_type == "EmailMessage":
-            extension = ".eml"
-        elif self.record_type in ["Account", "Lead", "Case", "Campaign"]:
-            extension = ".xml"
-        else:
-            raise MissingCategoryError(
-                f"There are no categories with the name: {self.record_type}",
-            )
-        return extension
-
-    def _tmp_download_file(self) -> Path:
-        record_file = self.record_id + self.get_file_extension()
-        return Path(self.read_config.download_dir) / self.record_type / record_file
-
-    @property
-    def _output_filename(self) -> Path:
-        record_file = self.record_id + self.get_file_extension() + ".json"
-        return Path(self.processor_config.output_dir) / self.record_type / record_file
-
-    def _create_full_tmp_dir_path(self):
-        self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True)
-
-    def _xml_for_record(self, record: OrderedDict) -> str:
-        """Creates partitionable xml file from a record"""
-        import xml.etree.ElementTree as ET
-
-        def flatten_dict(data, parent, prefix=""):
-            for key, value in data.items():
-                if isinstance(value, OrderedDict):
-                    flatten_dict(value, parent, prefix=f"{prefix}{key}.")
-                else:
-                    item = ET.Element("item")
-                    item.text = f"{prefix}{key}: {value}"
-                    parent.append(item)
-
-        root = ET.Element("root")
-        flatten_dict(record, root)
-        xml_string = ET.tostring(root, encoding="utf-8", xml_declaration=True).decode()
-        return xml_string
-
-    def _eml_for_record(self, email_json: t.Dict[str, t.Any]) -> str:
-        from dateutil import parser  # type: ignore
-
-        """Recreates standard expected .eml format using template."""
-        eml = EMAIL_TEMPLATE.substitute(
-            date=formatdate(parser.parse(email_json.get("MessageDate")).timestamp()),
-            message_identifier=email_json.get("MessageIdentifier"),
-            subject=email_json.get("Subject"),
-            from_email=email_json.get("FromAddress"),
-            to_email=email_json.get("ToAddress"),
-            textbody=email_json.get("TextBody"),
-            # TODO: This is a hack to get emails to process correctly.
-            # The HTML partitioner seems to have issues with <br> and text without tags like <p>
-            htmlbody=email_json.get("HtmlBody", "")  # "" because you can't .replace None
-            .replace("<br />", "<p>")
-            .replace("<body", "<body><p"),
-        )
-        return dedent(eml)
-
-    @SourceConnectionNetworkError.wrap
-    def _get_response(self):
-        client = self.connector_config.get_client()
-        return client.query_all(
-            f"select FIELDS(STANDARD) from {self.record_type} where Id='{self.record_id}'",
-        )
-
-    def get_record(self) -> OrderedDict:
-        # Get record from Salesforce based on id
-        response = self._get_response()
-        logger.debug(f"response was returned for salesforce record id: {self.record_id}")
-        records = response["records"]
-        if not records:
-            raise ValueError(
-                f"No record found with record id {self.record_id}: {json.dumps(response)}"
-            )
-        record_json = records[0]
-        return record_json
-
-    def update_source_metadata(self) -> None:  # type: ignore
-        record_json = self.record
-
-        date_format = "%Y-%m-%dT%H:%M:%S.000+0000"
-        self.source_metadata = SourceMetadata(
-            date_created=datetime.strptime(record_json["CreatedDate"], date_format).isoformat(),
-            date_modified=datetime.strptime(
-                record_json["LastModifiedDate"],
-                date_format,
-            ).isoformat(),
-            # SystemModstamp is Timestamp if record has been modified by person or automated system
-            version=record_json.get("SystemModstamp"),
-            source_url=record_json["attributes"].get("url"),
-            exists=True,
-        )
-
-    @SourceConnectionError.wrap
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        """Saves individual json records locally."""
-        self._create_full_tmp_dir_path()
-        record = self.record
-
-        self.update_source_metadata()
-
-        try:
-            if self.record_type == "EmailMessage":
-                document = self._eml_for_record(record)
-            else:
-                document = self._xml_for_record(record)
-
-            with open(self._tmp_download_file(), "w") as page_file:
-                page_file.write(document)
-
-        except Exception as e:
-            logger.error(
-                f"Error while downloading and saving file: {self.record_id}.",
-            )
-            logger.error(e)
-
-    @property
-    def filename(self):
-        """The filename of the file created from a Salesforce record"""
-        return self._tmp_download_file()
-
-
-@dataclass
-class SalesforceSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    connector_config: SimpleSalesforceConfig
-
-    def __post_init__(self):
-        self.ingest_doc_cls: t.Type[SalesforceIngestDoc] = SalesforceIngestDoc
-
-    def initialize(self):
-        pass
-
-    @requires_dependencies(["simple_salesforce"], extras="salesforce")
-    def check_connection(self):
-        from simple_salesforce.exceptions import SalesforceError
-
-        try:
-            self.connector_config.get_client()
-        except SalesforceError as salesforce_error:
-            logger.error(f"failed to validate connection: {salesforce_error}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {salesforce_error}")
-
-    @requires_dependencies(["simple_salesforce"], extras="salesforce")
-    def get_ingest_docs(self) -> t.List[SalesforceIngestDoc]:
-        """Get Salesforce Ids for the records.
-        Send them to next phase where each doc gets downloaded into the
-        appropriate format for partitioning.
-        """
-        from simple_salesforce.exceptions import SalesforceMalformedRequest
-
-        client = self.connector_config.get_client()
-
-        ingest_docs = []
-        for record_type in self.connector_config.categories:
-            if record_type not in ACCEPTED_CATEGORIES:
-                raise ValueError(f"{record_type} not currently an accepted Salesforce category")
-
-            try:
-                # Get ids from Salesforce
-                records = client.query_all(
-                    f"select Id from {record_type}",
-                )
-                for record in records["records"]:
-                    ingest_docs.append(
-                        SalesforceIngestDoc(
-                            connector_config=self.connector_config,
-                            processor_config=self.processor_config,
-                            read_config=self.read_config,
-                            record_type=record_type,
-                            record_id=record["Id"],
-                        ),
-                    )
-            except SalesforceMalformedRequest as e:
-                raise SalesforceMalformedRequest(f"Problem with Salesforce query: {e}")
-
-        return ingest_docs
diff --git a/unstructured/ingest/connector/sharepoint.py b/unstructured/ingest/connector/sharepoint.py
deleted file mode 100644
index c65722404..000000000
--- a/unstructured/ingest/connector/sharepoint.py
+++ /dev/null
@@ -1,573 +0,0 @@
-import json
-import os
-import typing as t
-from dataclasses import dataclass
-from html import unescape
-from pathlib import Path
-from urllib.parse import urlparse
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-)
-from unstructured.ingest.interfaces import PermissionsConfig as SharepointPermissionsConfig
-from unstructured.ingest.logger import logger
-from unstructured.ingest.utils.string_and_date_utils import ensure_isoformat_datetime
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from office365.sharepoint.client_context import ClientContext
-    from office365.sharepoint.files.file import File
-    from office365.sharepoint.publishing.pages.page import SitePage
-
-MAX_MB_SIZE = 512_000_000
-CONTENT_LABELS = ["CanvasContent1", "LayoutWebpartsContent1", "TimeCreated"]
-
-
-@dataclass
-class SharepointAccessConfig(AccessConfig):
-    client_cred: str = enhanced_field(repr=False, sensitive=True)
-
-
-@dataclass
-class SimpleSharepointConfig(BaseConnectorConfig):
-    access_config: SharepointAccessConfig
-    client_id: str
-    site: str
-    path: str
-    process_pages: bool = enhanced_field(default=True, init=False)
-    recursive: bool = False
-    files_only: bool = False
-    permissions_config: t.Optional[SharepointPermissionsConfig] = None
-
-    def __post_init__(self):
-        if not (self.client_id and self.access_config.client_cred and self.site):
-            raise ValueError(
-                "Please provide one of the following mandatory values:"
-                "\n--client-id\n--client-cred\n--site",
-            )
-        self.process_pages = not self.files_only
-
-    @requires_dependencies(["office365"], extras="sharepoint")
-    def get_site_client(self, site_url: str = "") -> "ClientContext":
-        from office365.runtime.auth.client_credential import ClientCredential
-        from office365.sharepoint.client_context import ClientContext
-
-        try:
-            site_client = ClientContext(site_url or self.site).with_credentials(
-                ClientCredential(self.client_id, self.access_config.client_cred),
-            )
-        except Exception:
-            logger.error("Couldn't set Sharepoint client.")
-            raise
-        return site_client
-
-    def get_permissions_client(self):
-        try:
-            permissions_connector = SharepointPermissionsConnector(self.permissions_config)
-            assert permissions_connector.access_token
-            return permissions_connector
-        except Exception as e:
-            logger.error("Couldn't obtain Sharepoint permissions ingestion access token:", e)
-
-
-@dataclass
-class SharepointIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    connector_config: SimpleSharepointConfig
-    site_url: str
-    server_path: str
-    is_page: bool
-    file_path: str
-    registry_name: str = "sharepoint"
-
-    def __post_init__(self):
-        self.extension = Path(self.file_path).suffix if not self.is_page else ".html"
-        self.extension = ".html" if self.extension == ".aspx" else self.extension
-        if not self.extension:
-            raise ValueError("Unsupported file without extension.")
-
-        self._set_download_paths()
-
-    def _set_download_paths(self) -> None:
-        """Parses the folder structure from the source and creates the download and output paths"""
-        download_path = Path(f"{self.read_config.download_dir}")
-        output_path = Path(f"{self.processor_config.output_dir}")
-        parent = Path(self.file_path).with_suffix(self.extension)
-        self.download_dir = (download_path / parent.parent).resolve()
-        self.download_filepath = (download_path / parent).resolve()
-        output_filename = str(parent) + ".json"
-        self.output_dir = (output_path / parent.parent).resolve()
-        self.output_filepath = (output_path / output_filename).resolve()
-
-    @property
-    def filename(self):
-        return Path(self.download_filepath).resolve()
-
-    @property
-    def _output_filename(self):
-        return Path(self.output_filepath).resolve()
-
-    @property
-    def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]:
-        return {
-            "server_path": self.server_path,
-            "site_url": self.site_url,
-        }
-
-    @SourceConnectionNetworkError.wrap
-    @requires_dependencies(["office365"], extras="sharepoint")
-    def _fetch_file(self, properties_only: bool = False):
-        """Retrieves the actual page/file from the Sharepoint instance"""
-        from office365.runtime.client_request_exception import ClientRequestException
-
-        site_client = self.connector_config.get_site_client(self.site_url)
-
-        try:
-            if self.is_page:
-                file = site_client.web.get_file_by_server_relative_path("/" + self.server_path)
-                file = file.listItemAllFields.select(CONTENT_LABELS).get().execute_query()
-            else:
-                file = site_client.web.get_file_by_server_relative_url(self.server_path)
-                if properties_only:
-                    file = file.get().execute_query()
-        except ClientRequestException as e:
-            if e.response.status_code == 404:
-                return None
-            raise
-        return file
-
-    def _fetch_page(self):
-        site_client = self.connector_config.get_site_client(self.site_url)
-        try:
-            page = (
-                site_client.site_pages.pages.get_by_url(self.server_path)
-                .expand(["FirstPublished", "Modified", "Version"])
-                .get()
-                .execute_query()
-            )
-        except Exception as e:
-            logger.error(f"Failed to retrieve page {self.server_path} from site {self.site_url}")
-            logger.error(e)
-            return None
-        return page
-
-    def update_permissions_data(self):
-        def parent_name_matches(parent_type, permissions_filename, ingest_doc_filepath):
-            permissions_filename = permissions_filename.split("_SEP_")
-            ingest_doc_filepath = ingest_doc_filepath.split("/")
-
-            if parent_type == "sites":
-                return permissions_filename[0] == ingest_doc_filepath[1]
-
-            elif parent_type == "SitePages" or parent_type == "Shared Documents":
-                return True
-
-        permissions_data = None
-        permissions_dir = Path(self.processor_config.output_dir) / "permissions_data"
-
-        if permissions_dir.is_dir():
-            parent_type = self.file_path.split("/")[0]
-
-            if parent_type == "sites":
-                read_dir = permissions_dir / "sites"
-            elif parent_type == "SitePages" or parent_type == "Shared Documents":
-                read_dir = permissions_dir / "other"
-            else:
-                read_dir = permissions_dir / "other"
-
-            for filename in os.listdir(read_dir):
-                permissions_docname = os.path.splitext(filename)[0].split("_SEP_")[1]
-                ingestdoc_docname = self.file_path.split("/")[-1]
-
-                if ingestdoc_docname == permissions_docname and parent_name_matches(
-                    parent_type=parent_type,
-                    permissions_filename=filename,
-                    ingest_doc_filepath=self.file_path,
-                ):
-                    with open(read_dir / filename) as f:
-                        permissions_data = json.loads(f.read())
-
-        return permissions_data
-
-    def update_source_metadata(self, **kwargs):
-        if self.is_page:
-            page = self._fetch_page()
-            if page is None:
-                self.source_metadata = SourceMetadata(
-                    exists=False,
-                )
-                return
-            self.source_metadata = SourceMetadata(
-                date_created=page.get_property("FirstPublished", None),
-                date_modified=page.get_property("Modified", None),
-                version=page.get_property("Version", ""),
-                source_url=page.absolute_url,
-                exists=True,
-                permissions_data=(
-                    self.update_permissions_data()
-                    if self.connector_config.permissions_config
-                    else None
-                ),
-            )
-            return
-
-        file = self._fetch_file(True)
-        if file is None:
-            self.source_metadata = SourceMetadata(
-                exists=False,
-            )
-            return
-        self.source_metadata = SourceMetadata(
-            date_created=ensure_isoformat_datetime(timestamp=file.time_created),
-            date_modified=ensure_isoformat_datetime(timestamp=file.time_last_modified),
-            version=file.major_version,
-            source_url=file.properties.get("LinkingUrl", None),
-            exists=True,
-            permissions_data=(
-                self.update_permissions_data() if self.connector_config.permissions_config else None
-            ),
-        )
-
-    def _download_page(self):
-        """Formats and saves locally page content"""
-        content = self._fetch_file()
-        self.update_source_metadata()
-        pld = (content.properties.get("LayoutWebpartsContent1", "") or "") + (
-            content.properties.get("CanvasContent1", "") or ""
-        )
-        if pld != "":
-            pld = unescape(pld)
-        else:
-            logger.info(
-                f"Page {self.server_path} has no retrievable content. \
-                    Dumping empty doc.",
-            )
-            pld = "<div></div>"
-
-        self.output_dir.mkdir(parents=True, exist_ok=True)
-        if not self.download_dir.is_dir():
-            logger.debug(f"Creating directory: {self.download_dir}")
-            self.download_dir.mkdir(parents=True, exist_ok=True)
-        with self.filename.open(mode="w") as f:
-            f.write(pld)
-        logger.info(f"File downloaded: {self.filename}")
-
-    def _download_file(self):
-        file = self._fetch_file()
-        self.update_source_metadata()
-        fsize = file.length
-        self.output_dir.mkdir(parents=True, exist_ok=True)
-
-        if not self.download_dir.is_dir():
-            logger.debug(f"Creating directory: {self.download_dir}")
-            self.download_dir.mkdir(parents=True, exist_ok=True)
-
-        if fsize > MAX_MB_SIZE:
-            logger.info(f"Downloading file with size: {fsize} bytes in chunks")
-            with self.filename.open(mode="wb") as f:
-                file.download_session(f, chunk_size=1024 * 1024 * 100).execute_query()
-        else:
-            with self.filename.open(mode="wb") as f:
-                file.download(f).execute_query()
-        logger.info(f"File downloaded: {self.filename}")
-
-    @BaseSingleIngestDoc.skip_if_file_exists
-    @SourceConnectionError.wrap
-    @requires_dependencies(["office365"])
-    def get_file(self):
-        if self.is_page:
-            self._download_page()
-        else:
-            self._download_file()
-        return
-
-
-@dataclass
-class SharepointSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    connector_config: SimpleSharepointConfig
-
-    def check_connection(self):
-        try:
-            site_client = self.connector_config.get_site_client()
-            site_client.site_pages.pages.get().execute_query()
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    @requires_dependencies(["office365"], extras="sharepoint")
-    def _list_files(self, folder, recursive) -> t.List["File"]:
-        from office365.runtime.client_request_exception import ClientRequestException
-
-        try:
-            objects = folder.expand(["Files", "Folders"]).get().execute_query()
-            files = list(objects.files)
-            if not recursive:
-                return files
-            for f in objects.folders:
-                if "/Forms" in f.serverRelativeUrl:
-                    continue
-                files += self._list_files(f, recursive)
-            return files
-        except ClientRequestException as e:
-            if e.response.status_code != 404:
-                logger.info("Caught an error while processing documents %s", e.response.text)
-            return []
-
-    def _prepare_ingest_doc(self, obj: t.Union["File", "SitePage"], base_url, is_page=False):
-        if is_page:
-            file_path = obj.get_property("Url", "")
-            server_path = file_path if file_path[0] != "/" else file_path[1:]
-            if (url_path := (urlparse(base_url).path)) and (url_path != "/"):
-                file_path = url_path[1:] + "/" + file_path
-        else:
-            server_path = obj.serverRelativeUrl
-            file_path = obj.serverRelativeUrl[1:]
-
-        return SharepointIngestDoc(
-            processor_config=self.processor_config,
-            read_config=self.read_config,
-            connector_config=self.connector_config,
-            site_url=base_url,
-            server_path=server_path,
-            is_page=is_page,
-            file_path=file_path,
-        )
-
-    @requires_dependencies(["office365"], extras="sharepoint")
-    def _list_pages(self, site_client) -> list:
-        from office365.runtime.client_request_exception import ClientRequestException
-
-        try:
-            site_pages = site_client.site_pages.pages.get().execute_query()
-        except ClientRequestException as e:
-            logger.info(
-                "Caught an error while retrieving site pages from %s \n%s",
-                site_client.base_url,
-                e.response.text,
-            )
-            return []
-
-        return [self._prepare_ingest_doc(page, site_client.base_url, True) for page in site_pages]
-
-    def _ingest_site_docs(self, site_client) -> t.List["SharepointIngestDoc"]:
-        root_folder = site_client.web.get_folder_by_server_relative_path(self.connector_config.path)
-        files = self._list_files(root_folder, self.connector_config.recursive)
-        if not files:
-            logger.info(
-                f"No processable files at path {self.connector_config.path}\
-                for site {site_client.base_url}",
-            )
-        output = []
-        for file in files:
-            try:
-                output.append(self._prepare_ingest_doc(file, site_client.base_url))
-            except ValueError as e:
-                logger.error("Unable to process file %s", file.properties["Name"])
-                logger.error(e)
-        if self.connector_config.process_pages:
-            page_output = self._list_pages(site_client)
-            if not page_output:
-                logger.info(f"Couldn't process pages for site {site_client.base_url}")
-            output = output + page_output
-        return output
-
-    def initialize(self):
-        pass
-
-    def get_ingest_docs(self):
-        base_site_client = self.connector_config.get_site_client()
-
-        if not all(
-            getattr(self.connector_config.permissions_config, attr, False)
-            for attr in ["application_id", "client_cred", "tenant"]
-        ):
-            logger.info(
-                "Permissions config is not fed with 'application_id', 'client_cred' and 'tenant'."
-                "Skipping permissions ingestion.",
-            )
-        else:
-            permissions_client = self.connector_config.get_permissions_client()
-            if permissions_client:
-                permissions_client.write_all_permissions(self.processor_config.output_dir)
-
-        if not base_site_client.is_tenant:
-            return self._ingest_site_docs(base_site_client)
-        tenant = base_site_client.tenant
-        tenant_sites = tenant.get_site_properties_from_sharepoint_by_filters().execute_query()
-        tenant_sites = {s.url for s in tenant_sites if (s.url is not None)}
-        ingest_docs: t.List[SharepointIngestDoc] = []
-        for site_url in tenant_sites:
-            logger.info(f"Processing docs for site: {site_url}")
-            site_client = self.connector_config.get_site_client(site_url)
-            ingest_docs = ingest_docs + self._ingest_site_docs(site_client)
-        return ingest_docs
-
-
-@dataclass
-class SharepointPermissionsConnector:
-    def __init__(self, permissions_config):
-        self.permissions_config: SharepointPermissionsConfig = permissions_config
-        self.initialize()
-
-    def initialize(self):
-        self.access_token: str = self.get_access_token()
-
-    @requires_dependencies(["requests"], extras="sharepoint")
-    def get_access_token(self) -> str:
-        import requests
-
-        url = (
-            f"https://login.microsoftonline.com/{self.permissions_config.tenant}/oauth2/v2.0/token"
-        )
-        headers = {"Content-Type": "application/x-www-form-urlencoded"}
-        data = {
-            "client_id": self.permissions_config.application_id,
-            "scope": "https://graph.microsoft.com/.default",
-            "client_secret": self.permissions_config.client_cred,
-            "grant_type": "client_credentials",
-        }
-        response = requests.post(url, headers=headers, data=data)
-        return response.json()["access_token"]
-
-    def validated_response(self, response):
-        if response.status_code == 200:
-            return response.json()
-        else:
-            logger.info(f"Request failed with status code {response.status_code}:")
-            logger.info(response.text)
-
-    @requires_dependencies(["requests"], extras="sharepoint")
-    def get_sites(self):
-        import requests
-
-        url = "https://graph.microsoft.com/v1.0/sites"
-        params = {
-            "$select": "webUrl, id",
-        }
-
-        headers = {
-            "Authorization": f"Bearer {self.access_token}",
-        }
-
-        response = requests.get(url, params=params, headers=headers)
-        return self.validated_response(response)
-
-    @requires_dependencies(["requests"], extras="sharepoint")
-    def get_drives(self, site):
-        import requests
-
-        url = f"https://graph.microsoft.com/v1.0/sites/{site}/drives"
-
-        headers = {
-            "Authorization": f"Bearer {self.access_token}",
-        }
-
-        response = requests.get(url, headers=headers)
-
-        return self.validated_response(response)
-
-    @requires_dependencies(["requests"], extras="sharepoint")
-    def get_drive_items(self, site, drive_id):
-        import requests
-
-        url = f"https://graph.microsoft.com/v1.0/sites/{site}/drives/{drive_id}/root/children"
-
-        headers = {
-            "Authorization": f"Bearer {self.access_token}",
-        }
-
-        response = requests.get(url, headers=headers)
-
-        return self.validated_response(response)
-
-    def extract_site_name_from_weburl(self, weburl):
-        split_path = urlparse(weburl).path.lstrip("/").split("/")
-
-        if split_path[0] == "sites":
-            return "sites", split_path[1]
-
-        elif split_path[0] == "Shared%20Documents":
-            return "Shared Documents", "Shared Documents"
-
-        elif split_path[0] == "personal":
-            return "Personal", "Personal"
-
-        elif split_path[0] == "_layouts":
-            return "layouts", "layouts"
-
-        # if other weburl structures are found, additional logic might need to be implemented
-
-        logger.warning(
-            """Couldn't extract sitename, unknown site or parent type. Skipping permissions
-            ingestion for the document with the URL:""",
-            weburl,
-        )
-
-        return None, None
-
-    @requires_dependencies(["requests"], extras="sharepoint")
-    def get_permissions_for_drive_item(self, site, drive_id, item_id):
-        import requests
-
-        url = f"https://graph.microsoft.com/v1.0/sites/ \
-        {site}/drives/{drive_id}/items/{item_id}/permissions"
-
-        headers = {
-            "Authorization": f"Bearer {self.access_token}",
-        }
-
-        response = requests.get(url, headers=headers)
-
-        return self.validated_response(response)
-
-    def write_all_permissions(self, output_dir):
-        sites = [(site["id"], site["webUrl"]) for site in self.get_sites()["value"]]
-        drive_ids = []
-
-        logger.info("Obtaining drive data for sites for permissions (rbac)")
-        for site_id, site_url in sites:
-            drives = self.get_drives(site_id)
-            if drives:
-                drives_for_site = drives["value"]
-                drive_ids.extend([(site_id, drive["id"]) for drive in drives_for_site])
-
-        logger.info("Obtaining item data from drives for permissions (rbac)")
-        item_ids = []
-        for site, drive_id in drive_ids:
-            drive_items = self.get_drive_items(site, drive_id)
-            if drive_items:
-                item_ids.extend(
-                    [
-                        (site, drive_id, item["id"], item["name"], item["webUrl"])
-                        for item in drive_items["value"]
-                    ],
-                )
-
-        permissions_dir = Path(output_dir) / "permissions_data"
-
-        logger.info("Writing permissions data to disk")
-        for site, drive_id, item_id, item_name, item_web_url in item_ids:
-            res = self.get_permissions_for_drive_item(site, drive_id, item_id)
-            if res:
-                parent_type, parent_name = self.extract_site_name_from_weburl(item_web_url)
-
-                if parent_type == "sites":
-                    write_path = permissions_dir / "sites" / f"{parent_name}_SEP_{item_name}.json"
-
-                elif parent_type == "Personal" or parent_type == "Shared Documents":
-                    write_path = permissions_dir / "other" / f"{parent_name}_SEP_{item_name}.json"
-                else:
-                    write_path = permissions_dir / "other" / f"{parent_name}_SEP_{item_name}.json"
-
-                if not Path(os.path.dirname(write_path)).is_dir():
-                    os.makedirs(os.path.dirname(write_path))
-
-                with open(write_path, "w") as f:
-                    json.dump(res["value"], f)
diff --git a/unstructured/ingest/connector/slack.py b/unstructured/ingest/connector/slack.py
deleted file mode 100644
index 4f6a8ce42..000000000
--- a/unstructured/ingest/connector/slack.py
+++ /dev/null
@@ -1,224 +0,0 @@
-import typing as t
-import xml.etree.ElementTree as ET
-from dataclasses import dataclass
-from datetime import datetime
-from pathlib import Path
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import (
-    requires_dependencies,
-    validate_date_args,
-)
-
-DATE_FORMATS = ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S%z")
-
-
-@dataclass
-class SlackAccessConfig(AccessConfig):
-    token: str = enhanced_field(sensitive=True)
-
-
-@dataclass
-class SimpleSlackConfig(BaseConnectorConfig):
-    """Connector config to process all messages by channel id's."""
-
-    access_config: SlackAccessConfig
-    channels: t.List[str]
-    start_date: t.Optional[str] = None
-    end_date: t.Optional[str] = None
-
-    def validate_inputs(self):
-        oldest_valid = True
-        latest_valid = True
-
-        if self.start_date:
-            oldest_valid = validate_date_args(self.start_date)
-
-        if self.end_date:
-            latest_valid = validate_date_args(self.end_date)
-
-        return oldest_valid, latest_valid
-
-    def __post_init__(self):
-        oldest_valid, latest_valid = self.validate_inputs()
-        if not oldest_valid and not latest_valid:
-            raise ValueError(
-                "Start and/or End dates are not valid. ",
-            )
-
-
-@dataclass
-class SlackIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing!).
-
-    Also includes a cleanup method. When things go wrong and the cleanup
-    method is not called, the file is left behind on the filesystem to assist debugging.
-    """
-
-    connector_config: SimpleSlackConfig
-    channel: str
-    registry_name: str = "slack"
-
-    # NOTE(crag): probably doesn't matter,  but intentionally not defining tmp_download_file
-    # __post_init__ for multiprocessing simplicity (no Path objects in initially
-    # instantiated object)
-    def _tmp_download_file(self):
-        channel_file = self.channel + ".xml"
-        return Path(self.read_config.download_dir) / channel_file
-
-    @property
-    def _output_filename(self):
-        output_file = self.channel + ".json"
-        return Path(self.processor_config.output_dir) / output_file
-
-    @property
-    def version(self) -> t.Optional[str]:
-        return None
-
-    @property
-    def source_url(self) -> t.Optional[str]:
-        return None
-
-    def _create_full_tmp_dir_path(self):
-        self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True)
-
-    @SourceConnectionNetworkError.wrap
-    @requires_dependencies(dependencies=["slack_sdk"], extras="slack")
-    def _fetch_messages(self):
-        from slack_sdk import WebClient
-
-        self.client = WebClient(token=self.connector_config.access_config.token)
-        oldest = "0"
-        latest = "0"
-        if self.connector_config.start_date:
-            oldest = self.convert_datetime(self.connector_config.start_date)
-
-        if self.connector_config.end_date:
-            latest = self.convert_datetime(self.connector_config.end_date)
-
-        result = self.client.conversations_history(
-            channel=self.channel,
-            oldest=oldest,
-            latest=latest,
-        )
-        return result
-
-    def update_source_metadata(self, **kwargs):
-        result = kwargs.get("result", self._fetch_messages())
-        if result is None:
-            self.source_metadata = SourceMetadata(
-                exists=True,
-            )
-            return
-        timestamps = [m["ts"] for m in result["messages"]]
-        timestamps.sort()
-        date_created = None
-        date_modified = None
-        if len(timestamps) > 0:
-            date_created = datetime.fromtimestamp(float(timestamps[0])).isoformat()
-            date_modified = datetime.fromtimestamp(
-                float(timestamps[len(timestamps) - 1]),
-            ).isoformat()
-
-        self.source_metadata = SourceMetadata(
-            date_created=date_created,
-            date_modified=date_modified,
-            exists=True,
-        )
-
-    @SourceConnectionError.wrap
-    @BaseSingleIngestDoc.skip_if_file_exists
-    @requires_dependencies(dependencies=["slack_sdk"], extras="slack")
-    def get_file(self):
-        from slack_sdk.errors import SlackApiError
-
-        """Fetches the data from a slack channel and stores it locally."""
-
-        self._create_full_tmp_dir_path()
-
-        result = self._fetch_messages()
-        self.update_source_metadata(result=result)
-        root = ET.Element("messages")
-        for message in result["messages"]:
-            message_elem = ET.SubElement(root, "message")
-            text_elem = ET.SubElement(message_elem, "text")
-            text_elem.text = message.get("text")
-
-            cursor = None
-            while True:
-                try:
-                    response = self.client.conversations_replies(
-                        channel=self.channel,
-                        ts=message["ts"],
-                        cursor=cursor,
-                    )
-
-                    for reply in response["messages"]:
-                        reply_msg = reply.get("text")
-                        text_elem.text = "".join([str(text_elem.text), " <reply> ", reply_msg])
-
-                    if not response["has_more"]:
-                        break
-
-                    cursor = response["response_metadata"]["next_cursor"]
-
-                except SlackApiError as e:
-                    logger.error(f"Error retrieving replies: {e.response['error']}")
-        tree = ET.ElementTree(root)
-        tree.write(self._tmp_download_file(), encoding="utf-8", xml_declaration=True)
-
-    def convert_datetime(self, date_time):
-        for format in DATE_FORMATS:
-            try:
-                return datetime.strptime(date_time, format).timestamp()
-            except ValueError:
-                pass
-
-    @property
-    def filename(self):
-        """The filename of the file created from a slack channel"""
-        return self._tmp_download_file()
-
-
-class SlackSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    """Objects of this class support fetching document(s) from"""
-
-    connector_config: SimpleSlackConfig
-
-    @requires_dependencies(dependencies=["slack_sdk"], extras="slack")
-    def check_connection(self):
-        from slack_sdk import WebClient
-        from slack_sdk.errors import SlackClientError
-
-        try:
-            client = WebClient(token=self.connector_config.access_config.token)
-            client.users_identity()
-        except SlackClientError as slack_error:
-            logger.error(f"failed to validate connection: {slack_error}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {slack_error}")
-
-    def initialize(self):
-        """Verify that can get metadata for an object, validates connections info."""
-
-    def get_ingest_docs(self):
-        return [
-            SlackIngestDoc(
-                connector_config=self.connector_config,
-                processor_config=self.processor_config,
-                read_config=self.read_config,
-                channel=channel,
-            )
-            for channel in self.connector_config.channels
-        ]
diff --git a/unstructured/ingest/connector/sql.py b/unstructured/ingest/connector/sql.py
deleted file mode 100644
index 21f1f4a1f..000000000
--- a/unstructured/ingest/connector/sql.py
+++ /dev/null
@@ -1,196 +0,0 @@
-import copy
-import json
-import typing as t
-import uuid
-from dataclasses import dataclass, field
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.enhanced_dataclass.core import _asdict
-from unstructured.ingest.error import DestinationConnectionError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-ELEMENTS_TABLE_NAME = "elements"
-
-
-@dataclass
-class SqlAccessConfig(AccessConfig):
-    username: t.Optional[str]
-    password: t.Optional[str] = enhanced_field(sensitive=True)
-
-
-@dataclass
-class SimpleSqlConfig(BaseConnectorConfig):
-    db_type: t.Optional[str]
-    host: t.Optional[str]
-    database: t.Optional[str]
-    port: t.Optional[int]
-    access_config: SqlAccessConfig
-
-    def __post_init__(self):
-        if (self.db_type == "sqlite") and (self.database is None):
-            raise ValueError(
-                "A sqlite connection requires a path to a *.db file "
-                "through the `database` argument"
-            )
-
-    @property
-    def connection(self):
-        if self.db_type == "postgresql":
-            return self._make_psycopg_connection
-        elif self.db_type == "sqlite":
-            return self._make_sqlite_connection
-        raise ValueError(f"Unsupported database {self.db_type} connection.")
-
-    def _make_sqlite_connection(self):
-        from sqlite3 import connect
-
-        return connect(database=self.database)
-
-    @requires_dependencies(["psycopg2"], extras="postgres")
-    def _make_psycopg_connection(self):
-        from psycopg2 import connect
-
-        return connect(
-            user=self.access_config.username,
-            password=self.access_config.password,
-            dbname=self.database,
-            host=self.host,
-            port=self.port,
-        )
-
-
-@dataclass
-class SqlDestinationConnector(BaseDestinationConnector):
-    connector_config: SimpleSqlConfig
-    _client: t.Optional[t.Any] = field(init=False, default=None)
-
-    def to_dict(self, **kwargs):
-        """
-        The _client variable in this dataclass breaks deepcopy due to:
-        TypeError: cannot pickle '_thread.lock' object
-        When serializing, remove it, meaning client data will need to be reinitialized
-        when deserialized
-        """
-        self_cp = copy.copy(self)
-        if hasattr(self_cp, "_client"):
-            setattr(self_cp, "_client", None)
-        return _asdict(self_cp, **kwargs)
-
-    @property
-    def client(self):
-        if self._client is None:
-            self._client = self.connector_config.connection()
-        return self._client
-
-    @DestinationConnectionError.wrap
-    def initialize(self):
-        _ = self.client
-
-    def check_connection(self):
-        try:
-            cursor = self.client.cursor()
-            cursor.execute("SELECT 1;")
-            cursor.close()
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise DestinationConnectionError(f"failed to validate connection: {e}")
-
-    def conform_dict(self, data: dict) -> None:
-        """
-        Updates the element dictionary to conform to the sql schema
-        """
-        from datetime import datetime
-
-        data["id"] = str(uuid.uuid4())
-
-        # Dict as string formatting
-        if record_locator := data.get("metadata", {}).get("data_source", {}).get("record_locator"):
-            # Explicit casting otherwise fails schema type checking
-            data["metadata"]["data_source"]["record_locator"] = str(json.dumps(record_locator))
-
-        # Array of items as string formatting
-        if (embeddings := data.get("embeddings")) and (
-            self.connector_config.db_type != "postgresql"
-        ):
-            data["embeddings"] = str(json.dumps(embeddings))
-
-        if points := data.get("metadata", {}).get("coordinates", {}).get("points"):
-            data["metadata"]["coordinates"]["points"] = str(json.dumps(points))
-
-        if links := data.get("metadata", {}).get("links", {}):
-            data["metadata"]["links"] = str(json.dumps(links))
-
-        if permissions_data := (
-            data.get("metadata", {}).get("data_source", {}).get("permissions_data")
-        ):
-            data["metadata"]["data_source"]["permissions_data"] = json.dumps(permissions_data)
-
-        if sent_from := data.get("metadata", {}).get("sent_from", {}):
-            data["metadata"]["sent_from"] = str(json.dumps(sent_from))
-
-        if sent_to := data.get("metadata", {}).get("sent_to", {}):
-            data["metadata"]["sent_to"] = str(json.dumps(sent_to))
-
-        # Datetime formatting
-        if date_created := data.get("metadata", {}).get("data_source", {}).get("date_created"):
-            data["metadata"]["data_source"]["date_created"] = datetime.fromisoformat(date_created)
-
-        if date_modified := data.get("metadata", {}).get("data_source", {}).get("date_modified"):
-            data["metadata"]["data_source"]["date_modified"] = datetime.fromisoformat(date_modified)
-
-        if date_processed := data.get("metadata", {}).get("data_source", {}).get("date_processed"):
-            data["metadata"]["data_source"]["date_processed"] = datetime.fromisoformat(
-                date_processed
-            )
-
-        if last_modified := data.get("metadata", {}).get("last_modified", {}):
-            data["metadata"]["last_modified"] = datetime.fromisoformat(last_modified)
-
-        # String casting
-        if version := data.get("metadata", {}).get("data_source", {}).get("version"):
-            data["metadata"]["data_source"]["version"] = str(version)
-
-        if page_number := data.get("metadata", {}).get("page_number"):
-            data["metadata"]["page_number"] = str(page_number)
-
-        if data.get("metadata", {}).get("data_source", None):
-            data.update(data.get("metadata", {}).pop("data_source", None))
-        if data.get("metadata", {}).get("coordinates", None):
-            data.update(data.get("metadata", {}).pop("coordinates", None))
-        if data.get("metadata", {}):
-            data.update(data.pop("metadata", None))
-
-    @DestinationConnectionError.wrap
-    def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
-        logger.info(
-            f"writing {len(elements_dict)} objects to database {self.connector_config.database} "
-            f"at {self.connector_config.host}"
-        )
-
-        with self.client as conn:
-            cursor = conn.cursor()
-
-            # Since we have no guarantee that each element will have the same keys
-            # we insert each element individually
-            for elem in elements_dict:
-                query = f"INSERT INTO {ELEMENTS_TABLE_NAME} ({','.join(elem.keys())}) \
-                VALUES({','.join(['?' if self.connector_config.db_type=='sqlite' else '%s' for x in elem])})"  # noqa E501
-                values = []
-                for v in elem.values():
-                    if self.connector_config.db_type == "sqlite" and isinstance(v, list):
-                        values.append(json.dumps(v))
-                    else:
-                        values.append(v)
-                cursor.execute(query, values)
-
-            conn.commit()
-            cursor.close()
-
-        # Leaving contexts doesn't close the connection, so doing it here
-        conn.close()
diff --git a/unstructured/ingest/connector/vectara.py b/unstructured/ingest/connector/vectara.py
deleted file mode 100644
index e94ff9c4f..000000000
--- a/unstructured/ingest/connector/vectara.py
+++ /dev/null
@@ -1,248 +0,0 @@
-import datetime
-import json
-import typing as t
-import uuid
-from dataclasses import dataclass, field
-
-import requests
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import DestinationConnectionError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-    BaseIngestDoc,
-    WriteConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.staging.base import flatten_dict
-
-BASE_URL = "https://api.vectara.io/v1"
-
-
-@dataclass
-class VectaraAccessConfig(AccessConfig):
-    oauth_client_id: str = enhanced_field(sensitive=True)
-    oauth_secret: str = enhanced_field(sensitive=True)
-
-
-@dataclass
-class SimpleVectaraConfig(BaseConnectorConfig):
-    access_config: VectaraAccessConfig
-    customer_id: str
-    corpus_name: t.Optional[str] = None
-    corpus_id: t.Optional[str] = None
-    token_url: str = "https://vectara-prod-{}.auth.us-west-2.amazoncognito.com/oauth2/token"
-
-
-@dataclass
-class VectaraDestinationConnector(BaseDestinationConnector):
-    write_config: WriteConfig
-    connector_config: SimpleVectaraConfig
-    _jwt_token: t.Optional[str] = field(init=False, default=None)
-    _jwt_token_expires_ts: t.Optional[float] = field(init=False, default=None)
-
-    @property
-    def jwt_token(self):
-        if (
-            not self._jwt_token
-            or self._jwt_token_expires_ts - datetime.datetime.now().timestamp() <= 60
-        ):
-            self._jwt_token = self._get_jwt_token()
-        return self._jwt_token
-
-    @DestinationConnectionError.wrap
-    def vectara(self):
-        """
-        Check the connection for Vectara and validate corpus exists.
-        - If more than one corpus with the same name exists - then return a message
-        - If exactly one corpus exists with this name - use it.
-        - If does not exist - create it.
-        """
-        try:
-            # Get token if not already set
-            self.jwt_token
-
-            list_corpora_response = self._request(
-                endpoint="list-corpora",
-                data={"numResults": 1, "filter": self.connector_config.corpus_name},
-            )
-
-            possible_corpora_ids_names_map = {
-                corpus.get("id"): corpus.get("name")
-                for corpus in list_corpora_response.get("corpus")
-                if corpus.get("name") == self.connector_config.corpus_name
-            }
-
-            if len(possible_corpora_ids_names_map) > 1:
-                return f"Multiple Corpora exist with name {self.connector_config.corpus_name}"
-            if len(possible_corpora_ids_names_map) == 1:
-                self.connector_config.corpus_id = list(possible_corpora_ids_names_map.keys())[0]
-            else:
-                data = {
-                    "corpus": {
-                        "name": self.connector_config.corpus_name,
-                    }
-                }
-                create_corpus_response = self._request(endpoint="create-corpus", data=data)
-                self.connector_config.corpus_id = create_corpus_response.get("corpusId")
-
-        except Exception as e:
-            logger.error(f"failed to create Vectara connection: {e}", exc_info=True)
-            raise DestinationConnectionError(f"failed to create Vectara connection: {e}")
-
-    def initialize(self):
-        self.vectara()
-
-    def _request(
-        self,
-        endpoint: str,
-        http_method: str = "POST",
-        params: t.Mapping[str, t.Any] = None,
-        data: t.Mapping[str, t.Any] = None,
-    ):
-        url = f"{BASE_URL}/{endpoint}"
-
-        headers = {
-            "Content-Type": "application/json",
-            "Accept": "application/json",
-            "Authorization": f"Bearer {self.jwt_token}",
-            "customer-id": self.connector_config.customer_id,
-            "X-source": "unstructured",
-        }
-
-        response = requests.request(
-            method=http_method, url=url, headers=headers, params=params, data=json.dumps(data)
-        )
-        response.raise_for_status()
-        return response.json()
-
-    # Get Oauth2 JWT token
-    def _get_jwt_token(self):
-        """Connect to the server and get a JWT token."""
-        token_endpoint = self.connector_config.token_url.format(self.connector_config.customer_id)
-        headers = {
-            "Content-Type": "application/x-www-form-urlencoded",
-        }
-        data = {
-            "grant_type": "client_credentials",
-            "client_id": self.connector_config.access_config.oauth_client_id,
-            "client_secret": self.connector_config.access_config.oauth_secret,
-        }
-
-        response = requests.request(method="POST", url=token_endpoint, headers=headers, data=data)
-        response.raise_for_status()
-        response_json = response.json()
-
-        request_time = datetime.datetime.now().timestamp()
-        self._jwt_token_expires_ts = request_time + response_json.get("expires_in")
-
-        return response_json.get("access_token")
-
-    @DestinationConnectionError.wrap
-    def check_connection(self):
-        try:
-            self.vectara()
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise DestinationConnectionError(f"failed to validate connection: {e}")
-
-    def _delete_doc(self, doc_id: str) -> None:
-        """
-        Delete a document from the Vectara corpus.
-
-        Args:
-            url (str): URL of the page to delete.
-            doc_id (str): ID of the document to delete.
-        """
-        body = {
-            "customer_id": self.connector_config.customer_id,
-            "corpus_id": self.connector_config.corpus_id,
-            "document_id": doc_id,
-        }
-        self._request(endpoint="delete-doc", data=body)
-
-    def _index_document(self, document: t.Dict[str, t.Any]) -> None:
-        """
-        Index a document (by uploading it to the Vectara corpus) from the document dictionary
-        """
-        body = {
-            "customer_id": self.connector_config.customer_id,
-            "corpus_id": self.connector_config.corpus_id,
-            "document": document,
-        }
-
-        try:
-            result = self._request(endpoint="index", data=body, http_method="POST")
-        except Exception as e:
-            logger.info(f"Exception {e} while indexing document {document['documentId']}")
-            return
-
-        if (
-            "status" in result
-            and result["status"]
-            and (
-                "ALREADY_EXISTS" in result["status"]["code"]
-                or (
-                    "CONFLICT" in result["status"]["code"]
-                    and "Indexing doesn't support updating documents"
-                    in result["status"]["statusDetail"]
-                )
-            )
-        ):
-            logger.info(f"Document {document['documentId']} already exists, re-indexing")
-            self._delete_doc(document["documentId"])
-            result = self._request(endpoint="index", data=body, http_method="POST")
-            return
-
-        if "status" in result and result["status"] and "OK" in result["status"]["code"]:
-            logger.info(f"Indexing document {document['documentId']} succeeded")
-        else:
-            logger.info(f"Indexing document {document['documentId']} failed, response = {result}")
-
-    def write_dict(self, *args, docs_list: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
-        logger.info(f"Inserting / updating {len(docs_list)} documents to Vectara ")
-        for vdoc in docs_list:
-            self._index_document(vdoc)
-
-    def write(self, docs: t.List[BaseIngestDoc]) -> None:
-        docs_list: t.Dict[t.Dict[str, t.Any]] = []
-
-        def get_metadata(element) -> t.Dict[str, t.Any]:
-            """
-            Select which meta-data fields to include and optionaly map them to a new new.
-            remove the "metadata-" prefix from the keys
-            """
-            metadata_map = {
-                "page_number": "page_number",
-                "data_source-url": "url",
-                "filename": "filename",
-                "filetype": "filetype",
-                "last_modified": "last_modified",
-            }
-            md = flatten_dict(element, separator="-", flatten_lists=True)
-            md = {k.replace("metadata-", ""): v for k, v in md.items()}
-            md = {metadata_map[k]: v for k, v in md.items() if k in metadata_map}
-            return md
-
-        for doc in docs:
-            local_path = doc._output_filename
-            with open(local_path) as json_file:
-                dict_content = json.load(json_file)
-                vdoc = {
-                    "documentId": str(uuid.uuid4()),
-                    "title": dict_content[0].get("metadata", {}).get("data_source", {}).get("url"),
-                    "section": [
-                        {
-                            "text": element.pop("text", None),
-                            "metadataJson": json.dumps(get_metadata(element)),
-                        }
-                        for element in dict_content
-                    ],
-                }
-                logger.info(
-                    f"Extending {len(vdoc)} json elements from content in {local_path}",
-                )
-                docs_list.append(vdoc)
-        self.write_dict(docs_list=docs_list)
diff --git a/unstructured/ingest/connector/weaviate.py b/unstructured/ingest/connector/weaviate.py
deleted file mode 100644
index 5039b2f99..000000000
--- a/unstructured/ingest/connector/weaviate.py
+++ /dev/null
@@ -1,187 +0,0 @@
-import copy
-import json
-import typing as t
-from dataclasses import dataclass, field
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.enhanced_dataclass.core import _asdict
-from unstructured.ingest.error import DestinationConnectionError, SourceConnectionError
-from unstructured.ingest.interfaces import (
-    AccessConfig,
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-    WriteConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from weaviate import Client
-
-
-@dataclass
-class WeaviateAccessConfig(AccessConfig):
-    access_token: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    refresh_token: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    api_key: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    client_secret: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    scope: t.Optional[t.List[str]] = None
-    username: t.Optional[str] = None
-    password: t.Optional[str] = enhanced_field(default=None, sensitive=True)
-    anonymous: bool = False
-
-
-@dataclass
-class SimpleWeaviateConfig(BaseConnectorConfig):
-    access_config: WeaviateAccessConfig
-    host_url: str
-    class_name: str
-
-
-@dataclass
-class WeaviateWriteConfig(WriteConfig):
-    batch_size: int = 100
-
-
-@dataclass
-class WeaviateDestinationConnector(BaseDestinationConnector):
-    write_config: WeaviateWriteConfig
-    connector_config: SimpleWeaviateConfig
-    _client: t.Optional["Client"] = field(init=False, default=None)
-
-    def to_dict(self, **kwargs):
-        """
-        The _client variable in this dataclass breaks deepcopy due to:
-        TypeError: cannot pickle '_thread.lock' object
-        When serializing, remove it, meaning client data will need to be reinitialized
-        when deserialized
-        """
-        self_cp = copy.copy(self)
-        if hasattr(self_cp, "_client"):
-            setattr(self_cp, "_client", None)
-        return _asdict(self_cp, **kwargs)
-
-    @property
-    @requires_dependencies(["weaviate"], extras="weaviate")
-    def client(self) -> "Client":
-        if self._client is None:
-            from weaviate import Client
-
-            auth = self._resolve_auth_method()
-            self._client = Client(url=self.connector_config.host_url, auth_client_secret=auth)
-        return self._client
-
-    @requires_dependencies(["weaviate"], extras="weaviate")
-    @DestinationConnectionError.wrap
-    def initialize(self):
-        _ = self.client
-
-    @requires_dependencies(["weaviate"], extras="weaviate")
-    def check_connection(self):
-        try:
-            _ = self.client
-        except Exception as e:
-            logger.error(f"Failed to validate connection {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    def _resolve_auth_method(self):
-        access_configs = self.connector_config.access_config
-        if access_configs.anonymous:
-            return None
-
-        if access_configs.access_token:
-            from weaviate.auth import AuthBearerToken
-
-            return AuthBearerToken(
-                access_token=access_configs.access_token,
-                refresh_token=access_configs.refresh_token,
-            )
-        elif access_configs.api_key:
-            from weaviate.auth import AuthApiKey
-
-            return AuthApiKey(api_key=access_configs.api_key)
-        elif access_configs.client_secret:
-            from weaviate.auth import AuthClientCredentials
-
-            return AuthClientCredentials(
-                client_secret=access_configs.client_secret, scope=access_configs.scope
-            )
-        elif access_configs.username and access_configs.password:
-            from weaviate.auth import AuthClientPassword
-
-            return AuthClientPassword(
-                username=access_configs.username,
-                password=access_configs.password,
-                scope=access_configs.scope,
-            )
-        return None
-
-    def conform_dict(self, data: dict) -> None:
-        """
-        Updates the element dictionary to conform to the Weaviate schema
-        """
-        from dateutil import parser
-
-        # Dict as string formatting
-        if record_locator := data.get("metadata", {}).get("data_source", {}).get("record_locator"):
-            # Explicit casting otherwise fails schema type checking
-            data["metadata"]["data_source"]["record_locator"] = str(json.dumps(record_locator))
-
-        # Array of items as string formatting
-        if points := data.get("metadata", {}).get("coordinates", {}).get("points"):
-            data["metadata"]["coordinates"]["points"] = str(json.dumps(points))
-
-        if links := data.get("metadata", {}).get("links", {}):
-            data["metadata"]["links"] = str(json.dumps(links))
-
-        if permissions_data := (
-            data.get("metadata", {}).get("data_source", {}).get("permissions_data")
-        ):
-            data["metadata"]["data_source"]["permissions_data"] = json.dumps(permissions_data)
-
-        # Datetime formatting
-        if date_created := data.get("metadata", {}).get("data_source", {}).get("date_created"):
-            data["metadata"]["data_source"]["date_created"] = parser.parse(date_created).strftime(
-                "%Y-%m-%dT%H:%M:%S.%fZ",
-            )
-
-        if date_modified := data.get("metadata", {}).get("data_source", {}).get("date_modified"):
-            data["metadata"]["data_source"]["date_modified"] = parser.parse(date_modified).strftime(
-                "%Y-%m-%dT%H:%M:%S.%fZ",
-            )
-
-        if date_processed := data.get("metadata", {}).get("data_source", {}).get("date_processed"):
-            data["metadata"]["data_source"]["date_processed"] = parser.parse(
-                date_processed
-            ).strftime(
-                "%Y-%m-%dT%H:%M:%S.%fZ",
-            )
-
-        if last_modified := data.get("metadata", {}).get("last_modified", {}):
-            data["metadata"]["last_modified"] = parser.parse(last_modified).strftime(
-                "%Y-%m-%dT%H:%M:%S.%fZ",
-            )
-
-        # String casting
-        if version := data.get("metadata", {}).get("data_source", {}).get("version"):
-            data["metadata"]["data_source"]["version"] = str(version)
-
-        if page_number := data.get("metadata", {}).get("page_number"):
-            data["metadata"]["page_number"] = str(page_number)
-
-    def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
-        logger.info(
-            f"writing {len(elements_dict)} objects to destination "
-            f"class {self.connector_config.class_name} "
-            f"at {self.connector_config.host_url}",
-        )
-
-        self.client.batch.configure(batch_size=self.write_config.batch_size)
-        with self.client.batch as b:
-            for e in elements_dict:
-                vector = e.pop("embeddings", None)
-                b.add_data_object(
-                    e,
-                    self.connector_config.class_name,
-                    vector=vector,
-                )
diff --git a/unstructured/ingest/connector/wikipedia.py b/unstructured/ingest/connector/wikipedia.py
deleted file mode 100644
index 239e4636c..000000000
--- a/unstructured/ingest/connector/wikipedia.py
+++ /dev/null
@@ -1,208 +0,0 @@
-import typing as t
-from dataclasses import dataclass, field
-from pathlib import Path
-
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.interfaces import (
-    BaseConnectorConfig,
-    BaseSingleIngestDoc,
-    BaseSourceConnector,
-    IngestDocCleanupMixin,
-    SourceConnectorCleanupMixin,
-    SourceMetadata,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from wikipedia import WikipediaPage
-
-
-@dataclass
-class SimpleWikipediaConfig(BaseConnectorConfig):
-    page_title: str
-    auto_suggest: bool = False
-
-
-@dataclass
-class WikipediaIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
-    connector_config: SimpleWikipediaConfig = field(repr=False)
-
-    @property
-    @requires_dependencies(["wikipedia"], extras="wikipedia")
-    def page(self) -> "WikipediaPage":
-        import wikipedia
-
-        return wikipedia.page(
-            self.connector_config.page_title,
-            auto_suggest=self.connector_config.auto_suggest,
-        )
-
-    def get_filename_prefix(self) -> str:
-        title: str = str(self.connector_config.page_title)
-        title = " ".join(title.split()).replace(" ", "-")
-        return title
-
-    @property
-    def filename(self) -> Path:
-        raise NotImplementedError()
-
-    @property
-    def text(self) -> str:
-        raise NotImplementedError()
-
-    @property
-    def _output_filename(self):
-        raise NotImplementedError()
-
-    @property
-    def date_created(self) -> t.Optional[str]:
-        return None
-
-    @property
-    def date_modified(self) -> t.Optional[str]:
-        return None
-
-    @property
-    def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]:
-        return {
-            "page_title": self.connector_config.page_title,
-            "page_url": self.source_metadata.source_url,  # type: ignore
-        }
-
-    def _create_full_tmp_dir_path(self):
-        self.filename.parent.mkdir(parents=True, exist_ok=True)
-
-    @requires_dependencies(["wikipedia"], extras="wikipedia")
-    def update_source_metadata(self):
-        from wikipedia.exceptions import PageError
-
-        try:
-            page = self.page
-        except PageError:
-            self.source_metadata = SourceMetadata(
-                exists=False,
-            )
-            return
-
-        self.source_metadata = SourceMetadata(
-            version=page.revision_id,
-            source_url=page.url,
-            exists=True,
-        )
-
-    @SourceConnectionError.wrap
-    @BaseSingleIngestDoc.skip_if_file_exists
-    def get_file(self):
-        """Fetches the "remote" doc and stores it locally on the filesystem."""
-        self._create_full_tmp_dir_path()
-        self.update_source_metadata()
-        with open(self.filename, "w", encoding="utf8") as f:
-            f.write(self.text)
-
-
-@dataclass
-class WikipediaIngestHTMLDoc(WikipediaIngestDoc):
-    registry_name: str = "wikipedia_html"
-
-    @property
-    def filename(self) -> Path:
-        return (
-            Path(self.read_config.download_dir) / f"{self.get_filename_prefix()}.html"
-        ).resolve()
-
-    @property
-    def text(self):
-        return self._get_html()
-
-    @SourceConnectionNetworkError.wrap
-    def _get_html(self):
-        return self.page.html()
-
-    @property
-    def _output_filename(self):
-        return Path(self.processor_config.output_dir) / f"{self.get_filename_prefix()}-html.json"
-
-
-@dataclass
-class WikipediaIngestTextDoc(WikipediaIngestDoc):
-    registry_name: str = "wikipedia_text"
-
-    @property
-    def filename(self) -> Path:
-        return (Path(self.read_config.download_dir) / f"{self.get_filename_prefix()}.txt").resolve()
-
-    @property
-    def text(self):
-        return self._get_content()
-
-    @SourceConnectionNetworkError.wrap
-    def _get_content(self):
-        return self.page.content
-
-    @property
-    def _output_filename(self):
-        return Path(self.processor_config.output_dir) / f"{self.get_filename_prefix()}-txt.json"
-
-
-@dataclass
-class WikipediaIngestSummaryDoc(WikipediaIngestDoc):
-    registry_name: str = "wikipedia_summary"
-
-    @property
-    def filename(self) -> Path:
-        return (
-            Path(self.read_config.download_dir) / f"{self.get_filename_prefix()}-summary.txt"
-        ).resolve()
-
-    @property
-    def text(self):
-        return self._get_summary()
-
-    @SourceConnectionNetworkError.wrap
-    def _get_summary(self):
-        return self.page.summary
-
-    @property
-    def _output_filename(self):
-        return Path(self.processor_config.output_dir) / f"{self.get_filename_prefix()}-summary.json"
-
-
-@dataclass
-class WikipediaSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
-    connector_config: SimpleWikipediaConfig
-
-    def initialize(self):
-        pass
-
-    @requires_dependencies(["wikipedia"], extras="wikipedia")
-    def check_connection(self):
-        import wikipedia
-
-        try:
-            wikipedia.page(
-                self.connector_config.page_title,
-                auto_suggest=self.connector_config.auto_suggest,
-            )
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    def get_ingest_docs(self):
-        return [
-            WikipediaIngestTextDoc(
-                processor_config=self.processor_config,
-                connector_config=self.connector_config,
-                read_config=self.read_config,
-            ),
-            WikipediaIngestHTMLDoc(
-                processor_config=self.processor_config,
-                connector_config=self.connector_config,
-                read_config=self.read_config,
-            ),
-            WikipediaIngestSummaryDoc(
-                processor_config=self.processor_config,
-                connector_config=self.connector_config,
-                read_config=self.read_config,
-            ),
-        ]
diff --git a/unstructured/ingest/enhanced_dataclass/__init__.py b/unstructured/ingest/enhanced_dataclass/__init__.py
deleted file mode 100644
index 38c598c4a..000000000
--- a/unstructured/ingest/enhanced_dataclass/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .dataclasses import enhanced_field
-from .json_mixin import EnhancedDataClassJsonMixin
-
-__all__ = ["enhanced_field", "EnhancedDataClassJsonMixin"]
diff --git a/unstructured/ingest/enhanced_dataclass/core.py b/unstructured/ingest/enhanced_dataclass/core.py
deleted file mode 100644
index 8fd79af39..000000000
--- a/unstructured/ingest/enhanced_dataclass/core.py
+++ /dev/null
@@ -1,99 +0,0 @@
-import _thread
-import copy
-import functools
-from dataclasses import fields
-
-from dataclasses_json.core import (
-    Collection,
-    Enum,
-    Mapping,
-    _encode_overrides,
-    _handle_undefined_parameters_safe,
-    _user_overrides_or_exts,
-    is_dataclass,
-)
-
-
-def _recursive_repr(user_function):
-    # Copied from dataclasses as this method isn't exposed for importing
-    repr_running = set()
-
-    @functools.wraps(user_function)
-    def wrapper(self):
-        key = id(self), _thread.get_ident()
-        if key in repr_running:
-            return "..."
-        repr_running.add(key)
-        try:
-            result = user_function(self)
-        finally:
-            repr_running.discard(key)
-        return result
-
-    return wrapper
-
-
-def _asdict(
-    obj,
-    encode_json=False,
-    redact_sensitive=False,
-    redacted_text="***REDACTED***",
-    apply_name_overload: bool = True,
-):
-    """
-    A re-implementation of `asdict` (based on the original in the `dataclasses`
-    source) to support arbitrary Collection and Mapping types.
-    """
-    if is_dataclass(obj):
-        result = []
-        overrides = _user_overrides_or_exts(obj)
-        for field in fields(obj):
-            if overrides[field.name].encoder:
-                value = getattr(obj, field.name)
-            else:
-                value = _asdict(
-                    getattr(obj, field.name),
-                    encode_json=encode_json,
-                    redact_sensitive=redact_sensitive,
-                    redacted_text=redacted_text,
-                    apply_name_overload=apply_name_overload,
-                )
-            if getattr(field, "sensitive", False) and redact_sensitive and value:
-                value = redacted_text
-            if getattr(field, "overload_name", None) and apply_name_overload:
-                overload_name = getattr(field, "overload_name")
-                result.append((overload_name, value))
-            else:
-                result.append((field.name, value))
-
-        result = _handle_undefined_parameters_safe(cls=obj, kvs=dict(result), usage="to")
-        return _encode_overrides(
-            dict(result), _user_overrides_or_exts(obj), encode_json=encode_json
-        )
-    elif isinstance(obj, Mapping):
-        return {
-            _asdict(
-                k,
-                encode_json=encode_json,
-                redact_sensitive=redact_sensitive,
-                redacted_text=redacted_text,
-            ): _asdict(
-                v,
-                encode_json=encode_json,
-                redact_sensitive=redact_sensitive,
-                redacted_text=redacted_text,
-            )
-            for k, v in obj.items()
-        }
-    elif isinstance(obj, Collection) and not isinstance(obj, (str, bytes, Enum)):
-        return [
-            _asdict(
-                v,
-                encode_json=encode_json,
-                redact_sensitive=redact_sensitive,
-                redacted_text=redacted_text,
-            )
-            for v in obj
-        ]
-    else:
-        return copy.deepcopy(obj)
diff --git a/unstructured/ingest/enhanced_dataclass/dataclasses.py b/unstructured/ingest/enhanced_dataclass/dataclasses.py
deleted file mode 100644
index a58fb3b79..000000000
--- a/unstructured/ingest/enhanced_dataclass/dataclasses.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import typing as t
-from dataclasses import MISSING, Field
-
-from unstructured.ingest.enhanced_dataclass.core import _recursive_repr
-
-
-class EnhancedField(Field):
-    def __init__(self, *args, sensitive=False, overload_name: t.Optional[str] = None):
-        super().__init__(*args)
-        self.sensitive = sensitive
-        self.overload_name = overload_name
-
-    @_recursive_repr
-    def __repr__(self):
-        # Support for kw_only added in 3.10, to support as low as 3.8, need to dynamically map
-        fields_array = [
-            f"name={self.name!r}",
-            f"type={self.type!r}",
-            f"default={self.default!r}",
-            f"default_factory={self.default_factory!r}",
-            f"init={self.init!r}",
-            f"repr={self.repr!r}",
-            f"hash={self.hash!r}",
-            f"compare={self.compare!r}",
-            f"metadata={self.metadata!r}",
-            f"sensitive={self.sensitive!r}",
-            f"overload_name={self.overload_name!r}",
-            f"_field_type={self._field_type}",
-        ]
-        if kw_only := getattr(self, "kw_only", None):
-            fields_array.append(f"kw_only={kw_only!r}")
-        return "Field({})".format(",".join(fields_array))
-
-
-def enhanced_field(
-    *,
-    default=MISSING,
-    default_factory=MISSING,
-    init: bool = True,
-    repr: bool = True,
-    hash=None,
-    compare: bool = True,
-    metadata=None,
-    kw_only=MISSING,
-    sensitive: bool = False,
-    overload_name: t.Optional[str] = None,
-):
-    if default is not MISSING and default_factory is not MISSING:
-        raise ValueError("cannot specify both default and default_factory")
-    args = [default, default_factory, init, repr, hash, compare, metadata]
-    # Support for kw_only added in 3.10, to support as low as 3.8, need to dynamically map
-    if "kw_only" in EnhancedField.__slots__:
-        args.append(kw_only)
-    return EnhancedField(*args, sensitive=sensitive, overload_name=overload_name)
diff --git a/unstructured/ingest/enhanced_dataclass/json_mixin.py b/unstructured/ingest/enhanced_dataclass/json_mixin.py
deleted file mode 100644
index 04f365a6b..000000000
--- a/unstructured/ingest/enhanced_dataclass/json_mixin.py
+++ /dev/null
@@ -1,125 +0,0 @@
-from __future__ import annotations
-
-import json
-from dataclasses import InitVar, fields
-from typing import Any, Callable, Optional, Type, TypeVar, Union
-
-import dataclasses_json.core as dataclasses_json_core
-from dataclasses_json import DataClassJsonMixin
-
-from unstructured.ingest.enhanced_dataclass.core import _asdict
-
-A = TypeVar("A", bound="EnhancedDataClassJsonMixin")
-
-# Monkey-patch _decode_dataclass class to support name override
-og_decode_dataclass = dataclasses_json_core._decode_dataclass
-
-
-def custom_decode_dataclass(cls, kvs, infer_missing):
-    dataclass_fields = fields(cls)
-    for f in [
-        field
-        for field in dataclass_fields
-        if hasattr(field, "overload_name") and getattr(field, "overload_name", None)
-    ]:
-        field_name = f.name
-        overload_name = getattr(f, "overload_name")
-        if isinstance(kvs, dict) and overload_name in kvs:
-            kvs[field_name] = kvs.pop(overload_name)
-    return og_decode_dataclass(cls, kvs, infer_missing)
-
-
-dataclasses_json_core._decode_dataclass = custom_decode_dataclass
-
-
-class EnhancedDataClassJsonMixin(DataClassJsonMixin):
-    """A mixin class extending DataClassJsonMixin.
-
-    This class extends the functionality of DataClassJsonMixin to provide enhanced functionality
-    for JSON serialization and deserialization. It introduces options for redacting sensitive
-    information, custom encoding, and more advanced schema handling.
-
-    Attributes:
-        N/A (No additional attributes)
-
-    Methods:
-        to_json: Serialize the object to JSON format with customizable options.
-        from_dict: Deserialize a dictionary into an object of this class.
-        to_dict: Convert the object to a dictionary with customizable options.
-        schema: Generate a schema for validating and parsing JSON data based on this class.
-    """
-
-    @classmethod
-    def check_init_var(cls):
-        ann = cls.__dict__.get("__annotations__", {})
-        init_vars = {k: v for k, v in ann.items() if isinstance(v, InitVar)}
-        if init_vars:
-            raise TypeError(
-                "Class {} has the following fields defined with an InitVar which "
-                "cannot be used with EnhancedDataClassJsonMixin: {}".format(
-                    cls.__name__, ", ".join(init_vars.keys())
-                )
-            )
-
-    def to_json(
-        self,
-        *,
-        skipkeys: bool = False,
-        ensure_ascii: bool = True,
-        check_circular: bool = True,
-        allow_nan: bool = True,
-        indent: Optional[Union[int, str]] = None,
-        separators: Optional[tuple[str, str]] = None,
-        default: Optional[Callable[..., Any]] = None,
-        sort_keys: bool = False,
-        redact_sensitive: bool = False,
-        redacted_text: str = "***REDACTED***",
-        apply_name_overload: bool = True,
-        **kw: Any,
-    ) -> str:
-        self.check_init_var()
-        return json.dumps(
-            self.to_dict(
-                encode_json=False,
-                redact_sensitive=redact_sensitive,
-                redacted_text=redacted_text,
-                apply_name_overload=apply_name_overload,
-            ),
-            cls=dataclasses_json_core._ExtendedEncoder,
-            skipkeys=skipkeys,
-            ensure_ascii=ensure_ascii,
-            check_circular=check_circular,
-            allow_nan=allow_nan,
-            indent=indent,
-            separators=separators,
-            default=default,
-            sort_keys=sort_keys,
-            **kw,
-        )
-
-    @classmethod
-    def from_dict(
-        cls: Type[A],
-        kvs: dataclasses_json_core.Json,
-        *,
-        infer_missing=False,
-        apply_name_overload=False,
-    ) -> A:
-        cls.check_init_var()
-        return dataclasses_json_core._decode_dataclass(cls, kvs, infer_missing)
-
-    def to_dict(
-        self,
-        encode_json: bool = False,
-        redact_sensitive: bool = False,
-        redacted_text: str = "***REDACTED***",
-        apply_name_overload: bool = True,
-    ) -> dict[str, dataclasses_json_core.Json]:
-        self.check_init_var()
-        return _asdict(
-            self,
-            encode_json=encode_json,
-            redact_sensitive=redact_sensitive,
-            redacted_text=redacted_text,
-            apply_name_overload=apply_name_overload,
-        )
diff --git a/unstructured/ingest/error.py b/unstructured/ingest/error.py
deleted file mode 100644
index 8397caf6d..000000000
--- a/unstructured/ingest/error.py
+++ /dev/null
@@ -1,49 +0,0 @@
-from abc import ABC
-from functools import wraps
-
-
-class CustomError(Exception, ABC):
-    error_string: str
-
-    @classmethod
-    def wrap(cls, f):
-        """
-        Provides a wrapper for a function that catches any exception and
-        re-raises it as the customer error. If the exception itself is already an instance
-        of the custom error, re-raises original error.
-        """
-
-        @wraps(f)
-        def wrapper(*args, **kwargs):
-            try:
-                return f(*args, **kwargs)
-            except BaseException as error:
-                if not isinstance(error, cls) and not issubclass(type(error), cls):
-                    raise cls(cls.error_string.format(str(error))) from error
-                raise
-
-        return wrapper
-
-
-class SourceConnectionError(CustomError):
-    error_string = "Error in getting data from upstream data source: {}"
-
-
-class SourceConnectionNetworkError(SourceConnectionError):
-    error_string = "Error in connecting to upstream data source: {}"
-
-
-class DestinationConnectionError(CustomError):
-    error_string = "Error in connecting to downstream data source: {}"
-
-
-class EmbeddingEncoderConnectionError(CustomError):
-    error_string = "Error in connecting to the embedding model provider: {}"
-
-
-class WriteError(CustomError):
-    error_string = "Error in writing to downstream data source: {}"
-
-
-class PartitionError(CustomError):
-    error_string = "Error in partitioning content: {}"
diff --git a/unstructured/ingest/evaluate.py b/unstructured/ingest/evaluate.py
deleted file mode 100755
index c6446ac9d..000000000
--- a/unstructured/ingest/evaluate.py
+++ /dev/null
@@ -1,349 +0,0 @@
-#! /usr/bin/env python3
-
-from typing import List, Optional, Tuple, Union
-
-import click
-
-from unstructured.metrics.evaluate import (
-    ElementTypeMetricsCalculator,
-    ObjectDetectionAggregatedMetricsCalculator,
-    ObjectDetectionPerClassMetricsCalculator,
-    TableStructureMetricsCalculator,
-    TextExtractionMetricsCalculator,
-    filter_metrics,
-    get_mean_grouping,
-)
-
-
-@click.group()
-def main():
-    pass
-
-
-@main.command()
-@click.option("--output_dir", type=str, help="Directory to structured output.")
-@click.option("--source_dir", type=str, help="Directory to source.")
-@click.option(
-    "--output_list",
-    type=str,
-    multiple=True,
-    help="Optional: list of selected structured output file names under the \
-        directory to be evaluate. If none, all files under directory will be use.",
-)
-@click.option(
-    "--source_list",
-    type=str,
-    multiple=True,
-    help="Optional: list of selected source file names under the directory \
-        to be evaluate. If none, all files under directory will be use.",
-)
-@click.option(
-    "--export_dir",
-    type=str,
-    default="metrics",
-    help="Directory to save the output evaluation metrics to. Default to \
-        your/working/dir/metrics/",
-)
-@click.option("--group_by", type=str, help="Input field for aggregration, or leave blank if none.")
-@click.option(
-    "--weights",
-    type=(int, int, int),
-    default=(2, 1, 1),
-    show_default=True,
-    help="A list of weights to the Levenshtein distance calculation. Takes input as --weights 2 2 2\
-        See text_extraction.py/calculate_edit_distance for more details.",
-)
-@click.option(
-    "--visualize",
-    is_flag=True,
-    show_default=True,
-    default=False,
-    help="Add the flag to show progress bar.",
-)
-@click.option(
-    "--output_type",
-    type=str,
-    default="json",
-    show_default=True,
-    help="Takes in either `txt` or `json` as output_type.",
-)
-def measure_text_extraction_accuracy_command(
-    output_dir: str,
-    source_dir: str,
-    export_dir: str,
-    weights: Tuple[int, int, int],
-    visualize: bool,
-    output_type: str,
-    output_list: Optional[List[str]] = None,
-    source_list: Optional[List[str]] = None,
-    group_by: Optional[str] = None,
-):
-    return (
-        TextExtractionMetricsCalculator(
-            documents_dir=output_dir,
-            ground_truths_dir=source_dir,
-            group_by=group_by,
-            weights=weights,
-            document_type=output_type,
-        )
-        .on_files(document_paths=output_list, ground_truth_paths=source_list)
-        .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True)
-    )
-
-
-@main.command()
-@click.option("--output_dir", type=str, help="Directory to structured output.")
-@click.option("--source_dir", type=str, help="Directory to structured source.")
-@click.option(
-    "--output_list",
-    type=str,
-    multiple=True,
-    help="Optional: list of selected structured output file names under the \
-        directory to be evaluate. If none, all files under directory will be used.",
-)
-@click.option(
-    "--source_list",
-    type=str,
-    multiple=True,
-    help="Optional: list of selected source file names under the directory \
-        to be evaluate. If none, all files under directory will be used.",
-)
-@click.option(
-    "--export_dir",
-    type=str,
-    default="metrics",
-    help="Directory to save the output evaluation metrics to. Default to \
-        your/working/dir/metrics/",
-)
-@click.option(
-    "--visualize",
-    is_flag=True,
-    show_default=True,
-    default=False,
-    help="Add the flag to show progress bar.",
-)
-def measure_element_type_accuracy_command(
-    output_dir: str,
-    source_dir: str,
-    export_dir: str,
-    visualize: bool,
-    output_list: Optional[List[str]] = None,
-    source_list: Optional[List[str]] = None,
-):
-    return (
-        ElementTypeMetricsCalculator(
-            documents_dir=output_dir,
-            ground_truths_dir=source_dir,
-        )
-        .on_files(document_paths=output_list, ground_truth_paths=source_list)
-        .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True)
-    )
-
-
-@main.command()
-@click.option(
-    "--group_by",
-    type=str,
-    required=True,
-    help="The category to group by; valid values are 'doctype' and 'connector'.",
-)
-@click.option(
-    "--data_input",
-    type=str,
-    required=True,
-    help="A datafram or path to the CSV/TSV file containing the data",
-)
-@click.option(
-    "--export_dir",
-    type=str,
-    default="metrics",
-    help="Directory to save the output evaluation metrics to. Default to \
-        your/working/dir/metrics/",
-)
-@click.option(
-    "--eval_name",
-    type=str,
-    help="Evaluated metric. Expecting one of 'text_extraction' or 'element_type'",
-)
-@click.option(
-    "--agg_name",
-    type=str,
-    help="String to use with export filename. Default is `cct` for `text_extraction` \
-        and `element-type` for `element_type`",
-)
-@click.option(
-    "--export_filename", type=str, help="Optional. Define your file name for the output here."
-)
-def get_mean_grouping_command(
-    group_by: str,
-    data_input: str,
-    export_dir: str,
-    eval_name: str,
-    agg_name: Optional[str] = None,
-    export_filename: Optional[str] = None,
-):
-    return get_mean_grouping(
-        group_by=group_by,
-        data_input=data_input,
-        export_dir=export_dir,
-        eval_name=eval_name,
-        agg_name=agg_name,
-        export_filename=export_filename,
-    )
-
-
-@main.command()
-@click.option("--output_dir", type=str, help="Directory to structured output.")
-@click.option("--source_dir", type=str, help="Directory to structured source.")
-@click.option(
-    "--output_list",
-    type=str,
-    multiple=True,
-    help="Optional: list of selected structured output file names under the \
-        directory to be evaluate. If none, all files under directory will be used.",
-)
-@click.option(
-    "--source_list",
-    type=str,
-    multiple=True,
-    help="Optional: list of selected source file names under the directory \
-        to be evaluate. If none, all files under directory will be used.",
-)
-@click.option(
-    "--export_dir",
-    type=str,
-    default="metrics",
-    help="Directory to save the output evaluation metrics to. Default to \
-        your/working/dir/metrics/",
-)
-@click.option(
-    "--visualize",
-    is_flag=True,
-    show_default=True,
-    default=False,
-    help="Add the flag to show progress bar.",
-)
-@click.option(
-    "--cutoff",
-    type=float,
-    show_default=True,
-    default=0.8,
-    help="The cutoff value for the element level alignment. \
-        If not set, a default value is used",
-)
-def measure_table_structure_accuracy_command(
-    output_dir: str,
-    source_dir: str,
-    export_dir: str,
-    visualize: bool,
-    output_list: Optional[List[str]] = None,
-    source_list: Optional[List[str]] = None,
-    cutoff: Optional[float] = None,
-):
-    return (
-        TableStructureMetricsCalculator(
-            documents_dir=output_dir,
-            ground_truths_dir=source_dir,
-            cutoff=cutoff,
-        )
-        .on_files(document_paths=output_list, ground_truth_paths=source_list)
-        .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True)
-    )
-
-
-@main.command()
-@click.option("--output_dir", type=str, help="Directory to structured output.")
-@click.option("--source_dir", type=str, help="Directory to structured source.")
-@click.option(
-    "--output_list",
-    type=str,
-    multiple=True,
-    help=(
-        "Optional: list of selected structured output file names under the "
-        "directory to be evaluated. If none, all files under directory will be used."
-    ),
-)
-@click.option(
-    "--source_list",
-    type=str,
-    multiple=True,
-    help="Optional: list of selected source file names under the directory \
-        to be evaluate. If none, all files under directory will be used.",
-)
-@click.option(
-    "--export_dir",
-    type=str,
-    default="metrics",
-    help="Directory to save the output evaluation metrics to. Default to \
-        your/working/dir/metrics/",
-)
-@click.option(
-    "--visualize",
-    is_flag=True,
-    show_default=True,
-    default=False,
-    help="Add the flag to show progress bar.",
-)
-def measure_object_detection_metrics_command(
-    output_dir: str,
-    source_dir: str,
-    export_dir: str,
-    visualize: bool,
-    output_list: Optional[List[str]] = None,
-    source_list: Optional[List[str]] = None,
-):
-    aggregated_df = (
-        ObjectDetectionAggregatedMetricsCalculator(
-            documents_dir=output_dir,
-            ground_truths_dir=source_dir,
-        )
-        .on_files(document_paths=output_list, ground_truth_paths=source_list)
-        .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True)
-    )
-    per_class_df = (
-        ObjectDetectionPerClassMetricsCalculator(
-            documents_dir=output_dir,
-            ground_truths_dir=source_dir,
-        )
-        .on_files(document_paths=output_list, ground_truth_paths=source_list)
-        .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True)
-    )
-    return aggregated_df, per_class_df
-
-
-@main.command()
-@click.option(
-    "--data_input", type=str, required=True, help="Takes in path to data file as .tsv .csv .txt"
-)
-@click.option(
-    "--filter_list",
-    type=str,
-    required=True,
-    help="Takes in list of string to filter the data_input.",
-)
-@click.option(
-    "--filter_by",
-    type=str,
-    required=True,
-    help="Field from data_input to match with filter_list. Default is `filename`.",
-)
-@click.option(
-    "--export_filename", type=str, help="Export filename. Required when return_type is `file`"
-)
-@click.option("--export_dir", type=str, help="Export directory.")
-@click.option("--return_type", type=str, help="`dataframe` or `file`. Default is `file`.")
-def filter_metrics_command(
-    data_input: str,
-    filter_list: Union[str, List[str]],
-    filter_by: str = "filename",
-    export_filename: Optional[str] = None,
-    export_dir: str = "metrics",
-    return_type: str = "file",
-):
-    return filter_metrics(
-        data_input, filter_list, filter_by, export_filename, export_dir, return_type
-    )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/unstructured/ingest/img/unstructured_ingest_cli_pipeline_diagram.png b/unstructured/ingest/img/unstructured_ingest_cli_pipeline_diagram.png
deleted file mode 100644
index cf2c94f47..000000000
Binary files a/unstructured/ingest/img/unstructured_ingest_cli_pipeline_diagram.png and /dev/null differ
diff --git a/unstructured/ingest/ingest_backoff/__init__.py b/unstructured/ingest/ingest_backoff/__init__.py
deleted file mode 100644
index 81d08bf36..000000000
--- a/unstructured/ingest/ingest_backoff/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from ._wrapper import RetryHandler
-
-__all__ = ["RetryHandler"]
diff --git a/unstructured/ingest/ingest_backoff/_common.py b/unstructured/ingest/ingest_backoff/_common.py
deleted file mode 100644
index 5b1f87759..000000000
--- a/unstructured/ingest/ingest_backoff/_common.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import logging
-import sys
-import traceback
-
-
-# Default startup handler
-def _log_start(details, logger, log_level):
-    max_tried = details.get("max_tries")
-    max_time = details.get("max_time")
-    if max_tried is not None and max_time is not None:
-        s = "%.1fs or %d tries"
-        s_args = [max_time, max_tried]
-    elif max_tried is not None:
-        s = "%d tries"
-        s_args = [max_tried]
-    else:
-        s = "%.1fs"
-        s_args = [max_time]
-    exception = details.get("exception")
-    if isinstance(exception, tuple):
-        exception = list(exception)
-    elif not isinstance(exception, list):
-        exception = [exception]
-    exception_s = ", ".join([e.__name__ for e in exception])
-    if log_level >= logging.INFO:
-        msg = f"Attempting %s(...), will retry for {s} given these issues: %s"
-        log_args = [details["target"].__name__] + s_args + [exception_s]
-    else:
-        msg = f"Attempting %s(%s), will retry for {s} given these issues: %s"
-        target_input_list = []
-        if args := details.get("args"):
-            target_input_list.extend([str(d) for d in args])
-        if kwargs := details.get("kwargs"):
-            target_input_list.extend([f"{k}={str(v)}" for k, v in kwargs.items()])
-        target_input = ", ".join(target_input_list) if target_input_list else ""
-        log_args = (
-            [
-                details["target"].__name__,
-                target_input,
-            ]
-            + s_args
-            + [exception_s]
-        )
-    logger.log(log_level, msg, *log_args)
-
-
-# Default backoff handler
-def _log_backoff(details, logger, log_level):
-    if log_level >= logging.INFO:
-        msg = "Backing off %s(...) for %.1fs (%s)"
-        log_args = [details["target"].__name__, details["tries"]]
-    else:
-        msg = "Backing off %.1fs seconds after %d tries calling function %s(%s) -> %s"
-        target_input_list = []
-        if args := details.get("args"):
-            target_input_list.extend([str(d) for d in args])
-        if kwargs := details.get("kwargs"):
-            target_input_list.extend([f"{k}={str(v)}" for k, v in kwargs.items()])
-        target_input = ", ".join(target_input_list) if target_input_list else ""
-        log_args = [
-            details["wait"],
-            details["tries"],
-            details["target"].__name__,
-            target_input,
-        ]
-    exc_typ, exc, _ = sys.exc_info()
-    if exc is not None:
-        exc_fmt = traceback.format_exception_only(exc_typ, exc)[-1]
-        log_args.append(exc_fmt.rstrip("\n"))
-    else:
-        log_args.append(str(details["value"]))
-    logger.log(log_level, msg, *log_args)
-
-
-# Default giveup handler
-def _log_giveup(details, logger, log_level):
-    if log_level >= logging.INFO:
-        msg = "Giving up %s(...) after %.1fs (%s)"
-        log_args = [details["target"].__name__, details["tries"]]
-    else:
-        msg = "Giving up after %d tries (%.1fs) calling function %s(%s) -> %s"
-        target_input_list = []
-        if args := details.get("args"):
-            target_input_list.extend([str(d) for d in args])
-        if kwargs := details.get("kwargs"):
-            target_input_list.extend([f"{k}={str(v)}" for k, v in kwargs.items()])
-        target_input = ", ".join(target_input_list) if target_input_list else "..."
-        log_args = [
-            details["tries"],
-            details["wait"],
-            details["target"].__name__,
-            target_input,
-        ]
-
-    exc_typ, exc, _ = sys.exc_info()
-    if exc is not None:
-        exc_fmt = traceback.format_exception_only(exc_typ, exc)[-1]
-        log_args.append(exc_fmt.rstrip("\n"))
-    else:
-        log_args.append(details["value"])
-
-    logger.log(log_level, msg, *log_args)
diff --git a/unstructured/ingest/ingest_backoff/_wrapper.py b/unstructured/ingest/ingest_backoff/_wrapper.py
deleted file mode 100644
index 66e9d193a..000000000
--- a/unstructured/ingest/ingest_backoff/_wrapper.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# coding:utf-8
-import logging
-from collections.abc import Iterable as IterableType
-from typing import Any, Iterable, Optional, Type, Union
-
-from backoff import _sync
-from backoff._common import _config_handlers, _prepare_logger
-from backoff._jitter import full_jitter
-from backoff._typing import (
-    _Handler,
-    _Jitterer,
-    _MaybeCallable,
-    _MaybeLogger,
-    _MaybeSequence,
-    _Predicate,
-    _WaitGenerator,
-)
-
-from unstructured.ingest.ingest_backoff._common import _log_backoff, _log_giveup, _log_start
-
-
-class RetryHandler:
-    def __init__(
-        self,
-        wait_gen: _WaitGenerator,
-        exception: _MaybeSequence[Type[Exception]],
-        *,
-        max_tries: Optional[_MaybeCallable[int]] = None,
-        max_time: Optional[_MaybeCallable[float]] = None,
-        jitter: Union[_Jitterer, None] = full_jitter,
-        giveup: _Predicate[Exception] = lambda e: False,
-        on_start: Union[_Handler, Iterable[_Handler], None] = None,
-        on_success: Union[_Handler, Iterable[_Handler], None] = None,
-        on_backoff: Union[_Handler, Iterable[_Handler], None] = None,
-        on_giveup: Union[_Handler, Iterable[_Handler], None] = None,
-        raise_on_giveup: bool = True,
-        logger: _MaybeLogger = "backoff",
-        start_log_level: int = logging.INFO,
-        backoff_log_level: int = logging.INFO,
-        giveup_log_level: int = logging.ERROR,
-        **wait_gen_kwargs: Any,
-    ):
-        prepared_logger = _prepare_logger(logger)
-        on_success = _config_handlers(on_success)
-        on_start = _config_handlers(
-            on_start,
-            default_handler=_log_start,
-            logger=prepared_logger,
-            log_level=start_log_level,
-        )
-        on_backoff = _config_handlers(
-            on_backoff,
-            default_handler=_log_backoff,
-            logger=prepared_logger,
-            log_level=backoff_log_level,
-        )
-        on_giveup = _config_handlers(
-            on_giveup,
-            default_handler=_log_giveup,
-            logger=prepared_logger,
-            log_level=giveup_log_level,
-        )
-        prepared_logger.debug(
-            "Initiating retry handler with "
-            "max_tries={}, "
-            "max_time={}, "
-            "exception={}, "
-            "start_log_level={}, "
-            "backoff_log_level={}, "
-            "giveup_log_level={}".format(
-                max_tries,
-                max_time,
-                (
-                    ", ".join([e.__name__ for e in exception])
-                    if isinstance(exception, IterableType)
-                    else exception.__name__
-                ),
-                logging.getLevelName(start_log_level),
-                logging.getLevelName(backoff_log_level),
-                logging.getLevelName(giveup_log_level),
-            ),
-        )
-        self.on_start = on_start
-        self.on_success = on_success
-        self.on_backoff = on_backoff
-        self.on_giveup = on_giveup
-        self.jitter = jitter
-        self.giveup = giveup
-        self.raise_on_giveup = raise_on_giveup
-        self.wait_gen_kwargs = wait_gen_kwargs
-        self.wait_gen = wait_gen
-        self.exception = exception
-        self.max_tries = max_tries
-        self.max_time = max_time
-
-    def __call__(self, target, *args, **kwargs):
-        _sync._call_handlers(
-            self.on_start,
-            target=target,
-            args=args,
-            kwargs=kwargs,
-            tries=None,
-            elapsed=None,
-            max_tries=self.max_tries,
-            max_time=self.max_time,
-            exception=self.exception,
-        )
-        wrapped_func = _sync.retry_exception(
-            target,
-            self.wait_gen,
-            self.exception,
-            max_tries=self.max_tries,
-            max_time=self.max_time,
-            jitter=self.jitter,
-            giveup=self.giveup,
-            on_success=self.on_success,
-            on_backoff=self.on_backoff,
-            on_giveup=self.on_giveup,
-            raise_on_giveup=self.raise_on_giveup,
-            wait_gen_kwargs=self.wait_gen_kwargs,
-        )
-        return wrapped_func(*args, **kwargs)
diff --git a/unstructured/ingest/interfaces.py b/unstructured/ingest/interfaces.py
deleted file mode 100644
index 95edd13b1..000000000
--- a/unstructured/ingest/interfaces.py
+++ /dev/null
@@ -1,845 +0,0 @@
-"""Defines Abstract Base Classes (ABC's) core to batch processing documents
-through Unstructured."""
-
-from __future__ import annotations
-
-import functools
-import json
-import os
-import re
-from abc import ABC, abstractmethod
-from dataclasses import InitVar, dataclass, field
-from datetime import datetime
-from pathlib import Path
-from typing import Any, Optional, Type, TypeVar
-
-from dataclasses_json import DataClassJsonMixin
-from dataclasses_json.core import Json, _decode_dataclass
-
-from unstructured.documents.elements import DataSourceMetadata
-from unstructured.embed.interfaces import BaseEmbeddingEncoder, Element
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field
-from unstructured.ingest.enhanced_dataclass.core import _asdict
-from unstructured.ingest.error import PartitionError, SourceConnectionError
-from unstructured.ingest.logger import logger
-from unstructured.partition.api import partition_via_api
-from unstructured.staging.base import elements_to_dicts, flatten_dict
-
-A = TypeVar("A", bound="DataClassJsonMixin")
-
-# -- Needed to resolve TypeError raised by using InitVar and __future__.annotations
-# -- See more here: https://stackoverflow.com/questions/70400639/
-InitVar.__call__ = lambda *args: None  # type: ignore
-
-SUPPORTED_REMOTE_FSSPEC_PROTOCOLS = [
-    "s3",
-    "s3a",
-    "abfs",
-    "az",
-    "gs",
-    "gcs",
-    "box",
-    "dropbox",
-    "sftp",
-]
-
-
-@dataclass
-class BaseSessionHandle(ABC):
-    """Abstract Base Class for sharing resources that are local to an individual process.
-    e.g., a connection for making a request for fetching documents."""
-
-
-@dataclass
-class BaseConfig(EnhancedDataClassJsonMixin, ABC):
-    pass
-
-
-@dataclass
-class AccessConfig(BaseConfig):
-    """Meant to designate holding any sensitive information associated with other configs
-    and also for access specific configs."""
-
-
-@dataclass
-class RetryStrategyConfig(BaseConfig):
-    """
-    Contains all info needed for decorator to pull from `self` for backoff
-    and retry triggered by exception.
-
-    Args:
-        max_retries: The maximum number of attempts to make before giving
-            up. Once exhausted, the exception will be allowed to escape.
-            The default value of None means there is no limit to the
-            number of tries. If a callable is passed, it will be
-            evaluated at runtime and its return value used.
-        max_retry_time: The maximum total amount of time to try for before
-            giving up. Once expired, the exception will be allowed to
-            escape. If a callable is passed, it will be
-            evaluated at runtime and its return value used.
-    """
-
-    max_retries: Optional[int] = None
-    max_retry_time: Optional[float] = None
-
-
-@dataclass
-class PartitionConfig(BaseConfig):
-    # where to write structured data outputs
-    pdf_infer_table_structure: bool = False
-    strategy: str = "auto"
-    ocr_languages: Optional[list[str]] = None
-    encoding: Optional[str] = None
-    additional_partition_args: dict[str, Any] = field(default_factory=dict)
-    skip_infer_table_types: Optional[list[str]] = None
-    fields_include: list[str] = field(
-        default_factory=lambda: ["element_id", "text", "type", "metadata", "embeddings"],
-    )
-    flatten_metadata: bool = False
-    metadata_exclude: list[str] = field(default_factory=list)
-    metadata_include: list[str] = field(default_factory=list)
-    partition_endpoint: Optional[str] = "https://api.unstructured.io/general/v0/general"
-    partition_by_api: bool = False
-    api_key: Optional[str] = str(enhanced_field(default=None, sensitive=True)) or None
-    hi_res_model_name: Optional[str] = None
-
-
-@dataclass
-class ProcessorConfig(BaseConfig):
-    reprocess: bool = False
-    verbose: bool = False
-    work_dir: str = str((Path.home() / ".cache" / "unstructured" / "ingest" / "pipeline").resolve())
-    output_dir: str = "structured-output"
-    num_processes: int = 2
-    raise_on_error: bool = False
-
-
-@dataclass
-class FileStorageConfig(BaseConfig):
-    remote_url: str
-    uncompress: bool = False
-    recursive: bool = False
-    file_glob: Optional[list[str]] = None
-
-
-@dataclass
-class FsspecConfig(FileStorageConfig):
-    access_config: Optional[AccessConfig] = None
-    protocol: str = field(init=False)
-    path_without_protocol: str = field(init=False)
-    dir_path: str = field(init=False)
-    file_path: str = field(init=False)
-
-    def get_access_config(self) -> dict[str, Any]:
-        if self.access_config:
-            return self.access_config.to_dict(apply_name_overload=False)
-        else:
-            return {}
-
-    def __post_init__(self):
-        self.protocol, self.path_without_protocol = self.remote_url.split("://")
-        if self.protocol not in SUPPORTED_REMOTE_FSSPEC_PROTOCOLS:
-            raise ValueError(
-                f"Protocol {self.protocol} not supported yet, only "
-                f"{SUPPORTED_REMOTE_FSSPEC_PROTOCOLS} are supported.",
-            )
-
-        # dropbox root is an empty string
-        match = re.match(rf"{self.protocol}://([\s])/", self.remote_url)
-        if match and self.protocol == "dropbox":
-            self.dir_path = " "
-            self.file_path = ""
-            return
-
-        # dropbox paths can start with slash
-        match = re.match(rf"{self.protocol}:///([^/\s]+?)/([^\s]*)", self.remote_url)
-        if match and self.protocol == "dropbox":
-            self.dir_path = match.group(1)
-            self.file_path = match.group(2) or ""
-            return
-
-        # just a path with no trailing prefix
-        match = re.match(rf"{self.protocol}://([^/\s]+?)(/*)$", self.remote_url)
-        if match:
-            self.dir_path = match.group(1)
-            self.file_path = ""
-            return
-
-        # valid path with a dir and/or file
-        match = re.match(rf"{self.protocol}://([^/\s]+?)/([^\s]*)", self.remote_url)
-        if not match:
-            raise ValueError(
-                f"Invalid path {self.remote_url}. "
-                f"Expected <protocol>://<dir-path>/<file-or-dir-path>.",
-            )
-        self.dir_path = match.group(1)
-        self.file_path = match.group(2) or ""
-
-
-@dataclass
-class ReadConfig(BaseConfig):
-    # where raw documents are stored for processing, and then removed if not preserve_downloads
-    download_dir: Optional[str] = ""
-    re_download: bool = False
-    preserve_downloads: bool = False
-    download_only: bool = False
-    max_docs: Optional[int] = None
-
-
-@dataclass
-class EmbeddingConfig(BaseConfig):
-    provider: str
-    api_key: Optional[str] = str(enhanced_field(default=None, sensitive=True)) or None
-    model_name: Optional[str] = None
-    aws_access_key_id: Optional[str] = None
-    aws_secret_access_key: Optional[str] = None
-    aws_region: Optional[str] = None
-
-    def get_embedder(self) -> BaseEmbeddingEncoder:
-        kwargs: dict[str, Any] = {}
-        if self.api_key:
-            kwargs["api_key"] = self.api_key
-        if self.model_name:
-            kwargs["model_name"] = self.model_name
-        # TODO make this more dynamic to map to encoder configs
-        if self.provider == "langchain-openai":
-            from unstructured.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder
-
-            return OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig(**kwargs))
-        elif self.provider == "langchain-huggingface":
-            from unstructured.embed.huggingface import (
-                HuggingFaceEmbeddingConfig,
-                HuggingFaceEmbeddingEncoder,
-            )
-
-            return HuggingFaceEmbeddingEncoder(config=HuggingFaceEmbeddingConfig(**kwargs))
-        elif self.provider == "octoai":
-            from unstructured.embed.octoai import OctoAiEmbeddingConfig, OctoAIEmbeddingEncoder
-
-            return OctoAIEmbeddingEncoder(config=OctoAiEmbeddingConfig(**kwargs))
-        elif self.provider == "langchain-aws-bedrock":
-            from unstructured.embed.bedrock import BedrockEmbeddingConfig, BedrockEmbeddingEncoder
-
-            return BedrockEmbeddingEncoder(
-                config=BedrockEmbeddingConfig(
-                    aws_access_key_id=self.aws_access_key_id,
-                    aws_secret_access_key=self.aws_secret_access_key,
-                    region_name=self.aws_region,
-                )
-            )
-        elif self.provider == "langchain-vertexai":
-            from unstructured.embed.vertexai import (
-                VertexAIEmbeddingConfig,
-                VertexAIEmbeddingEncoder,
-            )
-
-            return VertexAIEmbeddingEncoder(config=VertexAIEmbeddingConfig(**kwargs))
-        elif self.provider == "langchain-voyageai":
-            from unstructured.embed.voyageai import (
-                VoyageAIEmbeddingConfig,
-                VoyageAIEmbeddingEncoder,
-            )
-
-            return VoyageAIEmbeddingEncoder(config=VoyageAIEmbeddingConfig(**kwargs))
-        elif self.provider == "mixedbread-ai":
-            from unstructured.embed.mixedbreadai import (
-                MixedbreadAIEmbeddingConfig,
-                MixedbreadAIEmbeddingEncoder,
-            )
-
-            return MixedbreadAIEmbeddingEncoder(config=MixedbreadAIEmbeddingConfig(**kwargs))
-        else:
-            raise ValueError(f"{self.provider} not a recognized encoder")
-
-
-@dataclass
-class ChunkingConfig(BaseConfig):
-    chunk_elements: InitVar[bool] = False
-    chunking_strategy: Optional[str] = None
-    combine_text_under_n_chars: Optional[int] = None
-    include_orig_elements: Optional[bool] = None
-    max_characters: Optional[int] = None
-    multipage_sections: Optional[bool] = None
-    new_after_n_chars: Optional[int] = None
-    overlap: Optional[int] = None
-    overlap_all: Optional[bool] = None
-
-    def __post_init__(self, chunk_elements: bool) -> None:
-        """Resolve chunking_strategy if chunk_elements is True.
-
-        If chunk_elements is True and chunking_strategy is None, default to 'by_title'. Otherwise,
-        do nothing and keep the defined value of chunking_strategy."
-        """
-        if chunk_elements and self.chunking_strategy is None:
-            self.chunking_strategy = "by_title"
-
-
-@dataclass
-class PermissionsConfig(BaseConfig):
-    application_id: Optional[str] = enhanced_field(overload_name="permissions_application_id")
-    tenant: Optional[str] = enhanced_field(overload_name="permissions_tenant")
-    client_cred: Optional[str] = enhanced_field(
-        default=None, sensitive=True, overload_name="permissions_client_cred"
-    )
-
-
-# module-level variable to store session handle
-global_write_session_handle: Optional[BaseSessionHandle] = None
-
-
-@dataclass
-class WriteConfig(BaseConfig):
-    pass
-
-
-@dataclass
-class BaseConnectorConfig(BaseConfig, ABC):
-    """Abstract definition on which to define connector-specific attributes."""
-
-
-@dataclass
-class SourceMetadata(EnhancedDataClassJsonMixin, ABC):
-    date_created: Optional[str] = None
-    date_modified: Optional[str] = None
-    version: Optional[str] = None
-    source_url: Optional[str] = None
-    exists: Optional[bool] = None
-    permissions_data: Optional[list[dict[str, Any]]] = None
-
-
-class IngestDocJsonMixin(EnhancedDataClassJsonMixin):
-    """
-    Inherently, DataClassJsonMixin does not add in any @property fields to the json/dict
-    created from the dataclass. This explicitly sets properties to look for on the IngestDoc
-    class when creating the json/dict for serialization purposes.
-    """
-
-    metadata_properties = [
-        "date_created",
-        "date_modified",
-        "date_processed",
-        "exists",
-        "permissions_data",
-        "version",
-        "source_url",
-    ]
-    properties_to_serialize = [
-        "base_filename",
-        "filename",
-        "_output_filename",
-        "record_locator",
-        "_source_metadata",
-        "unique_id",
-    ]
-
-    def add_props(self, as_dict: dict[str, Any], props: list[str]):
-        for prop in props:
-            val = getattr(self, prop)
-            if isinstance(val, Path):
-                val = str(val)
-            if isinstance(val, DataClassJsonMixin):
-                val = val.to_dict(encode_json=False)
-            as_dict[prop] = val
-
-    def to_dict(self, **kwargs) -> dict[str, Json]:
-        as_dict = _asdict(self, **kwargs)
-        if "_session_handle" in as_dict:
-            as_dict.pop("_session_handle", None)
-        self.add_props(as_dict=as_dict, props=self.properties_to_serialize)
-        if getattr(self, "_source_metadata") is not None:
-            self.add_props(as_dict=as_dict, props=self.metadata_properties)
-        return as_dict
-
-    @classmethod
-    def from_dict(
-        cls: Type[A], kvs: Json, *, infer_missing=False, apply_name_overload: bool = True
-    ) -> A:
-        doc = super().from_dict(
-            kvs=kvs, infer_missing=infer_missing, apply_name_overload=apply_name_overload
-        )
-        if meta := kvs.get("_source_metadata"):
-            setattr(doc, "_source_metadata", SourceMetadata.from_dict(meta))
-        if date_processed := kvs.get("_date_processed"):
-            setattr(doc, "_date_processed", date_processed)
-        return doc
-
-
-class BatchIngestDocJsonMixin(EnhancedDataClassJsonMixin):
-    """
-    Inherently, DataClassJsonMixin does not add in any @property fields to the json/dict
-    created from the dataclass. This explicitly sets properties to look for on the IngestDoc
-    class when creating the json/dict for serialization purposes.
-    """
-
-    properties_to_serialize = ["unique_id"]
-
-    def add_props(self, as_dict: dict[str, Any], props: list[str]):
-        for prop in props:
-            val = getattr(self, prop)
-            if isinstance(val, Path):
-                val = str(val)
-            if isinstance(val, DataClassJsonMixin):
-                val = val.to_dict(encode_json=False)
-            as_dict[prop] = val
-
-    def to_dict(self, encode_json=False) -> dict[str, Json]:
-        as_dict = _asdict(self, encode_json=encode_json)
-        self.add_props(as_dict=as_dict, props=self.properties_to_serialize)
-        return as_dict
-
-    @classmethod
-    def from_dict(cls: Type[A], kvs: Json, *, infer_missing=False) -> A:
-        doc = _decode_dataclass(cls, kvs, infer_missing)
-        return doc
-
-
-@dataclass
-class BaseIngestDoc(ABC):
-    processor_config: ProcessorConfig
-    read_config: ReadConfig
-    connector_config: BaseConnectorConfig
-
-    @property
-    @abstractmethod
-    def unique_id(self) -> str:
-        pass
-
-
-@dataclass
-class BaseSingleIngestDoc(BaseIngestDoc, IngestDocJsonMixin, ABC):
-    """An "ingest document" is specific to a connector, and provides
-    methods to fetch a single raw document, store it locally for processing, any cleanup
-    needed after successful processing of the doc, and the ability to write the doc's
-    structured outputs once processed.
-
-    Crucially, it is not responsible for the actual processing of the raw document.
-    """
-
-    _source_metadata: Optional[SourceMetadata] = field(init=False, default=None)
-    _date_processed: Optional[str] = field(init=False, default=None)
-
-    @property
-    def source_metadata(self) -> SourceMetadata:
-        if self._source_metadata is None:
-            self.update_source_metadata()
-        # Provide guarantee that the field was set by update_source_metadata()
-        if self._source_metadata is None:
-            raise ValueError("failed to set source metadata")
-        return self._source_metadata
-
-    @source_metadata.setter
-    def source_metadata(self, value: SourceMetadata):
-        self._source_metadata = value
-
-    @property
-    def date_created(self) -> Optional[str]:
-        """The date the document was created on the source system."""
-        return self.source_metadata.date_created
-
-    @property
-    def date_modified(self) -> Optional[str]:
-        """The date the document was last modified on the source system."""
-        return self.source_metadata.date_modified
-
-    @property
-    def date_processed(self) -> Optional[str]:
-        """The date the document was last processed by Unstructured.
-        self._date_processed is assigned internally in self.partition_file()"""
-        return self._date_processed
-
-    @property
-    def exists(self) -> Optional[bool]:
-        """Whether the document exists on the remote source."""
-        return self.source_metadata.exists
-
-    @property
-    @abstractmethod
-    def filename(self):
-        """The local filename of the document after fetching from remote source."""
-
-    @property
-    def base_filename(self) -> Optional[str]:
-        if self.read_config.download_dir and self.filename:
-            download_path = str(Path(self.read_config.download_dir).resolve())
-            full_path = str(self.filename)
-            base_path = full_path.replace(download_path, "")
-            return base_path
-        return None
-
-    @property
-    def base_output_filename(self) -> Optional[str]:
-        if self.processor_config.output_dir and self._output_filename:
-            output_path = str(Path(self.processor_config.output_dir).resolve())
-            full_path = str(self._output_filename)
-            base_path = full_path.replace(output_path, "")
-            return base_path
-        return None
-
-    @property
-    @abstractmethod
-    def _output_filename(self):
-        """Filename of the structured output for this doc."""
-
-    @property
-    def record_locator(self) -> Optional[dict[str, Any]]:  # Values must be JSON-serializable
-        """A dictionary with any data necessary to uniquely identify the document on
-        the source system."""
-        return None
-
-    @property
-    def unique_id(self) -> str:
-        return self.filename
-
-    @property
-    def source_url(self) -> Optional[str]:
-        """The url of the source document."""
-        return self.source_metadata.source_url  # type: ignore
-
-    @property
-    def version(self) -> Optional[str]:
-        """The version of the source document, this could be the last modified date, an
-        explicit version number, or anything else that can be used to uniquely identify
-        the version of the document."""
-        return self.source_metadata.version  # type: ignore
-
-    @property
-    def permissions_data(self) -> Optional[list[dict[str, Any]]]:
-        """Access control data, aka permissions or sharing, from the source system."""
-        if self.source_metadata is None:
-            self.update_source_metadata()
-        return self.source_metadata.permissions_data  # type: ignore
-
-    @abstractmethod
-    def cleanup_file(self):
-        """Removes the local copy the file (or anything else) after successful processing."""
-
-    @staticmethod
-    def skip_if_file_exists(func):
-        """Decorator that checks if a file exists, is not empty, and should not re-download,
-        if so log a message indicating as much and skip the decorated function."""
-
-        @functools.wraps(func)
-        def wrapper(self, *args, **kwargs):
-            if (
-                not self.read_config.re_download
-                and self.filename.is_file()
-                and self.filename.stat().st_size
-            ):
-                logger.debug(f"File exists: {self.filename}, skipping {func.__name__}")
-                return None
-            return func(self, *args, **kwargs)
-
-        return wrapper
-
-    # TODO: set as @abstractmethod and pass or raise NotImplementedError
-    def update_source_metadata(self, **kwargs) -> None:
-        """Sets the SourceMetadata and the  properties for the doc"""
-        self._source_metadata = SourceMetadata()
-
-    def update_permissions_data(self):
-        """Sets the _permissions_data property for the doc.
-        This property is later used to fill the corresponding SourceMetadata.permissions_data field,
-        and after that carries on to the permissions_data property."""
-        self._permissions_data: Optional[list[dict[str, Any]]] = None
-
-    # NOTE(crag): Future BaseIngestDoc classes could define get_file_object() methods
-    # in addition to or instead of get_file()
-    @abstractmethod
-    @SourceConnectionError.wrap
-    def get_file(self):
-        """Fetches the "remote" doc and stores it locally on the filesystem."""
-
-    def has_output(self) -> bool:
-        """Determine if structured output for this doc already exists."""
-        return self._output_filename.is_file() and self._output_filename.stat().st_size
-
-    @PartitionError.wrap
-    def partition_file(
-        self,
-        partition_config: PartitionConfig,
-        **partition_kwargs,
-    ) -> list[Element]:
-        from unstructured.partition.auto import partition
-
-        if not partition_config.partition_by_api:
-            logger.debug("Using local partition")
-            elements = partition(
-                filename=str(self.filename),
-                data_source_metadata=DataSourceMetadata(
-                    url=self.source_url,
-                    version=self.version,
-                    record_locator=self.record_locator,
-                    date_created=self.date_created,
-                    date_modified=self.date_modified,
-                    date_processed=self.date_processed,
-                    permissions_data=self.permissions_data,
-                ),
-                **partition_kwargs,
-            )
-        else:
-            endpoint = partition_config.partition_endpoint
-
-            logger.debug(f"Using remote partition ({endpoint})")
-
-            elements = partition_via_api(
-                filename=str(self.filename),
-                api_key=partition_config.api_key,
-                api_url=endpoint,
-                **partition_kwargs,
-            )
-            # TODO: add m_data_source_metadata to unstructured-api pipeline_api and then
-            # pass the stringified json here
-        return elements
-
-    def process_file(
-        self,
-        partition_config: PartitionConfig,
-        **partition_kwargs,
-    ) -> Optional[list[dict[str, Any]]]:
-        self._date_processed = datetime.utcnow().isoformat()
-        if self.read_config.download_only:
-            return None
-        logger.info(f"Processing {self.filename}")
-
-        elements = self.partition_file(partition_config=partition_config, **partition_kwargs)
-        element_dicts = elements_to_dicts(elements)
-
-        self.isd_elems_no_filename: list[dict[str, Any]] = []
-        for elem in element_dicts:
-            if partition_config.metadata_exclude and partition_config.metadata_include:
-                raise ValueError(
-                    "Arguments `--metadata-include` and `--metadata-exclude` are "
-                    "mutually exclusive with each other.",
-                )
-            elif partition_config.metadata_exclude:
-                ex_list = partition_config.metadata_exclude
-                for ex in ex_list:
-                    if "." in ex:  # handle nested fields
-                        nested_fields = ex.split(".")
-                        current_elem = elem
-                        for f in nested_fields[:-1]:
-                            if f in current_elem:
-                                current_elem = current_elem[f]
-                        field_to_exclude = nested_fields[-1]
-                        if field_to_exclude in current_elem:
-                            current_elem.pop(field_to_exclude, None)
-                    else:  # handle top-level fields
-                        elem["metadata"].pop(ex, None)  # type: ignore[attr-defined]
-            elif partition_config.metadata_include:
-                in_list = partition_config.metadata_include
-                for k in list(elem["metadata"].keys()):  # type: ignore[attr-defined]
-                    if k not in in_list:
-                        elem["metadata"].pop(k, None)  # type: ignore[attr-defined]
-            in_list = partition_config.fields_include
-            elem = {k: v for k, v in elem.items() if k in in_list}
-
-            if partition_config.flatten_metadata and "metadata" in elem:
-                metadata = elem.pop("metadata")
-                elem.update(flatten_dict(metadata, keys_to_omit=["data_source_record_locator"]))
-
-            self.isd_elems_no_filename.append(elem)
-
-        return self.isd_elems_no_filename
-
-
-@dataclass
-class BaseIngestDocBatch(BaseIngestDoc, BatchIngestDocJsonMixin, ABC):
-    ingest_docs: list[BaseSingleIngestDoc] = field(default_factory=list)
-
-    @abstractmethod
-    @SourceConnectionError.wrap
-    def get_files(self):
-        """Fetches the "remote" docs and stores it locally on the filesystem."""
-
-
-@dataclass
-class BaseConnector(EnhancedDataClassJsonMixin, ABC):
-    @abstractmethod
-    def check_connection(self):
-        pass
-
-
-@dataclass
-class BaseSourceConnector(BaseConnector, ABC):
-    """Abstract Base Class for a connector to a remote source, e.g. S3 or Google Drive."""
-
-    processor_config: ProcessorConfig
-    read_config: ReadConfig
-    connector_config: BaseConnectorConfig
-
-    @abstractmethod
-    def cleanup(self, cur_dir=None):
-        """Any additional cleanup up need after processing is complete. E.g., removing
-        temporary download dirs that are empty.
-
-        By convention, documents that failed to process are typically not cleaned up."""
-
-    @abstractmethod
-    def initialize(self):
-        """Initializes the connector. Should also validate the connector is properly
-        configured: e.g., list a single a document from the source."""
-
-    @abstractmethod
-    def get_ingest_docs(self):
-        """Returns all ingest docs (derived from BaseIngestDoc).
-        This does not imply downloading all the raw documents themselves,
-        rather each IngestDoc is capable of fetching its content (in another process)
-        with IngestDoc.get_file()."""
-
-
-@dataclass
-class BaseDestinationConnector(BaseConnector, ABC):
-    write_config: WriteConfig
-    connector_config: BaseConnectorConfig
-
-    def __init__(self, write_config: WriteConfig, connector_config: BaseConnectorConfig):
-        self.write_config = write_config
-        self.connector_config = connector_config
-
-    def conform_dict(self, data: dict[str, Any]) -> None:
-        """
-        When the original dictionary needs to be modified in place
-        """
-        return
-
-    def normalize_dict(self, element_dict: dict[str, Any]) -> dict[str, Any]:
-        """
-        When the original dictionary needs to be mapped to a new one
-        """
-        return element_dict
-
-    @abstractmethod
-    def initialize(self):
-        """Initializes the connector. Should also validate the connector is properly
-        configured."""
-
-    def write(self, docs: list[BaseSingleIngestDoc]) -> None:
-        elements_dict = self.get_elements_dict(docs=docs)
-        self.modify_and_write_dict(elements_dict=elements_dict)
-
-    def get_elements_dict(self, docs: list[BaseSingleIngestDoc]) -> list[dict[str, Any]]:
-        dict_list: list[dict[str, Any]] = []
-        for doc in docs:
-            local_path = doc._output_filename
-            with open(local_path) as json_file:
-                dict_content = json.load(json_file)
-                logger.info(
-                    f"Extending {len(dict_content)} json elements from content in {local_path}",
-                )
-                dict_list.extend(dict_content)
-        return dict_list
-
-    @abstractmethod
-    def write_dict(self, *args, elements_dict: list[dict[str, Any]], **kwargs) -> None:
-        pass
-
-    def modify_and_write_dict(self, *args, elements_dict: list[dict[str, Any]], **kwargs) -> None:
-        """
-        Modify in this instance means this method wraps calls to conform_dict() and
-        normalize() before actually processing the content via write_dict()
-        """
-        for d in elements_dict:
-            self.conform_dict(data=d)
-        elements_dict_normalized = [self.normalize_dict(element_dict=d) for d in elements_dict]
-        return self.write_dict(*args, elements_dict=elements_dict_normalized, **kwargs)
-
-    def write_elements(self, elements: list[Element], *args, **kwargs) -> None:
-        elements_dict = [e.to_dict() for e in elements]
-        self.modify_and_write_dict(*args, elements_dict=elements_dict, **kwargs)
-
-
-class SourceConnectorCleanupMixin:
-    read_config: ReadConfig
-
-    def cleanup(self, cur_dir=None):
-        """Recursively clean up downloaded files and directories."""
-        if self.read_config.preserve_downloads or self.read_config.download_only:
-            return
-        if cur_dir is None:
-            cur_dir = self.read_config.download_dir
-        if cur_dir is None or not Path(cur_dir).is_dir():
-            return
-        sub_dirs = os.listdir(cur_dir)
-        os.chdir(cur_dir)
-        for sub_dir in sub_dirs:
-            # don't traverse symlinks, not that there every should be any
-            if os.path.isdir(sub_dir) and not os.path.islink(sub_dir):
-                self.cleanup(sub_dir)
-        os.chdir("..")
-        if len(os.listdir(cur_dir)) == 0:
-            os.rmdir(cur_dir)
-
-
-class PermissionsCleanupMixin:
-    processor_config: ProcessorConfig
-
-    def cleanup_permissions(self, cur_dir=None):
-        def has_no_folders(folder_path):
-            folders = [
-                item
-                for item in os.listdir(folder_path)
-                if os.path.isdir(os.path.join(folder_path, item))
-            ]
-            return len(folders) == 0
-
-        """Recursively clean up downloaded files and directories."""
-        if cur_dir is None:
-            cur_dir = Path(self.processor_config.output_dir, "permissions_data")
-        if not Path(cur_dir).exists():
-            return
-        if Path(cur_dir).is_file():
-            cur_file = cur_dir
-            os.remove(cur_file)
-            return
-        sub_dirs = os.listdir(cur_dir)
-        os.chdir(cur_dir)
-        for sub_dir in sub_dirs:
-            # don't traverse symlinks, not that there every should be any
-            if not os.path.islink(sub_dir):
-                self.cleanup_permissions(sub_dir)
-        os.chdir("..")
-        if has_no_folders(cur_dir):
-            os.rmdir(cur_dir)
-
-
-class IngestDocCleanupMixin:
-    read_config: ReadConfig
-
-    @property
-    @abstractmethod
-    def filename(self):
-        """The local filename of the document after fetching from remote source."""
-
-    def cleanup_file(self):
-        """Removes the local copy of the file after successful processing."""
-        if (
-            not self.read_config.preserve_downloads
-            and self.filename.is_file()
-            and not self.read_config.download_only
-        ):
-            logger.debug(f"Cleaning up {self}")
-            os.unlink(self.filename)
-
-
-class ConfigSessionHandleMixin:
-    @abstractmethod
-    def create_session_handle(self) -> BaseSessionHandle:
-        """Creates a session handle that will be assigned on each IngestDoc to share
-        session related resources across all document handling for a given subprocess."""
-
-
-@dataclass
-class IngestDocSessionHandleMixin:
-    connector_config: ConfigSessionHandleMixin
-    _session_handle: Optional[BaseSessionHandle] = field(default=None, init=False)
-
-    @property
-    def session_handle(self):
-        """If a session handle is not assigned, creates a new one and assigns it."""
-        if self._session_handle is None:
-            self._session_handle = self.connector_config.create_session_handle()
-        return self._session_handle
-
-    @session_handle.setter
-    def session_handle(self, session_handle: BaseSessionHandle):
-        self._session_handle = session_handle
diff --git a/unstructured/ingest/logger.py b/unstructured/ingest/logger.py
deleted file mode 100644
index ed4e7180e..000000000
--- a/unstructured/ingest/logger.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import ast
-import json
-import logging
-import typing as t
-
-logger = logging.getLogger("unstructured.ingest")
-
-
-def default_is_data_sensitive(k: str, v: t.Any) -> bool:
-    sensitive_fields = [
-        "account_name",
-        "client_id",
-    ]
-    sensitive_triggers = ["key", "cred", "token", "password", "oauth", "secret"]
-    return (
-        v
-        and any([s in k.lower() for s in sensitive_triggers])  # noqa: C419
-        or k.lower() in sensitive_fields
-    )
-
-
-def hide_sensitive_fields(
-    data: dict, is_sensitive_fn: t.Callable[[str, t.Any], bool] = default_is_data_sensitive
-) -> dict:
-    """
-    Will recursively look through every k, v pair in this dict and any nested ones and run
-    is_sensitive_fn to dynamically redact the value of the k, v pair. Will also check if
-    any string value can be parsed as valid json and process that dict as well and replace
-    the original string with the json.dumps() version of the redacted dict.
-    """
-    new_data = data.copy()
-    for k, v in new_data.items():
-        if is_sensitive_fn(k, v):
-            new_data[k] = "*******"
-        if isinstance(v, dict):
-            new_data[k] = hide_sensitive_fields(v)
-        if isinstance(v, str):
-            # Need to take into account strings generated via json.dumps() or simply printing a dict
-            try:
-                json_data = json.loads(v)
-                if isinstance(json_data, dict):
-                    updated_data = hide_sensitive_fields(json_data)
-                    new_data[k] = json.dumps(updated_data)
-            except json.JSONDecodeError:
-                pass
-
-    return new_data
-
-
-def redact_jsons(s: str) -> str:
-    """
-    Takes in a generic string and pulls out all valid json content. Leverages
-    hide_sensitive_fields() to redact any sensitive information and replaces the
-    original json with the new redacted format. There can be any number of valid
-    jsons in a generic string and this will work. Having extra '{' without a
-    closing '}' will cause this to break though. i.e '{ text, {"a": 3}'.
-
-    """
-    chars = list(s)
-    if "{" not in chars:
-        return s
-    i = 0
-    jsons = []
-    i = 0
-    while i < len(chars):
-        char = chars[i]
-        if char == "{":
-            stack = [char]
-            current = [char]
-            while len(stack) != 0 and i < len(chars):
-                i += 1
-                char = chars[i]
-                current.append(char)
-                if char == "{":
-                    stack.append(char)
-                if char == "}":
-                    stack.pop(-1)
-            jsons.append("".join(current))
-            continue
-        i += 1
-    for j in jsons:
-        try:
-            formatted_j = json.dumps(json.loads(j))
-        except json.JSONDecodeError:
-            formatted_j = json.dumps(ast.literal_eval(j))
-        hidden_j = json.dumps(hide_sensitive_fields(json.loads(formatted_j)))
-        s = s.replace(j, hidden_j)
-    return s
-
-
-class SensitiveFormatter(logging.Formatter):
-    def format(self, record):
-        s = super().format(record=record)
-        return redact_jsons(s)
-
-
-def remove_root_handlers(logger: logging.Logger) -> None:
-    # NOTE(robinson) - in some environments such as Google Colab, there is a root handler
-    # that doesn't not mask secrets, meaning sensitive info such as api keys appear in logs.
-    # Removing these when they exist prevents this behavior
-    if logger.root.hasHandlers():
-        for handler in logger.root.handlers:
-            logger.root.removeHandler(handler)
-
-
-def ingest_log_streaming_init(level: int) -> None:
-    handler = logging.StreamHandler()
-    handler.name = "ingest_log_handler"
-    formatter = SensitiveFormatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s")
-    handler.setFormatter(formatter)
-
-    # Only want to add the handler once
-    if "ingest_log_handler" not in [h.name for h in logger.handlers]:
-        logger.addHandler(handler)
-
-    remove_root_handlers(logger)
-    logger.setLevel(level)
-
-
-def make_default_logger(level: int) -> logging.Logger:
-    """Return a custom logger."""
-    logger = logging.getLogger("unstructured.ingest")
-    handler = logging.StreamHandler()
-    handler.name = "ingest_log_handler"
-    formatter = SensitiveFormatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s")
-    handler.setFormatter(formatter)
-    logger.addHandler(handler)
-    logger.setLevel(level)
-    remove_root_handlers(logger)
-    return logger
diff --git a/unstructured/ingest/main.py b/unstructured/ingest/main.py
deleted file mode 100755
index ead616f40..000000000
--- a/unstructured/ingest/main.py
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/env python3
-from unstructured.ingest.cli.cli import get_cmd
-
-
-def main():
-    ingest_cmd = get_cmd()
-    ingest_cmd()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/unstructured/ingest/pipeline/__init__.py b/unstructured/ingest/pipeline/__init__.py
deleted file mode 100644
index 439647b60..000000000
--- a/unstructured/ingest/pipeline/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from .doc_factory import DocFactory
-from .interfaces import PipelineContext, ReformatNode
-from .partition import Partitioner
-from .permissions import PermissionsDataCleaner
-from .pipeline import Pipeline
-from .reformat.chunking import Chunker
-from .reformat.embedding import Embedder
-from .source import Reader
-from .write import Writer
-
-__all__ = [
-    "DocFactory",
-    "Partitioner",
-    "Reader",
-    "Embedder",
-    "PipelineContext",
-    "Pipeline",
-    "Writer",
-    "Chunker",
-    "ReformatNode",
-    "PermissionsDataCleaner",
-]
diff --git a/unstructured/ingest/pipeline/copy.py b/unstructured/ingest/pipeline/copy.py
deleted file mode 100644
index 5ec195265..000000000
--- a/unstructured/ingest/pipeline/copy.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import os
-import shutil
-from pathlib import Path
-
-from unstructured.ingest.connector.registry import create_ingest_doc_from_dict
-from unstructured.ingest.logger import logger
-from unstructured.ingest.pipeline.interfaces import CopyNode
-
-
-class Copier(CopyNode):
-    def run(self, json_path: str):
-        filename = os.path.basename(json_path)
-        doc_hash = os.path.splitext(filename)[0]
-        ingest_doc_dict = self.pipeline_context.ingest_docs_map[doc_hash]
-        ingest_doc = create_ingest_doc_from_dict(ingest_doc_dict)
-        desired_output = ingest_doc._output_filename
-        Path(desired_output).parent.mkdir(parents=True, exist_ok=True)
-        logger.info(f"Copying {json_path} -> {desired_output}")
-        shutil.copy(json_path, desired_output)
diff --git a/unstructured/ingest/pipeline/doc_factory.py b/unstructured/ingest/pipeline/doc_factory.py
deleted file mode 100644
index 38feca4e4..000000000
--- a/unstructured/ingest/pipeline/doc_factory.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.pipeline.interfaces import DocFactoryNode
-
-
-@dataclass
-class DocFactory(DocFactoryNode):
-    def run(self, *args, **kwargs) -> t.Iterable[dict]:
-        docs = self.source_doc_connector.get_ingest_docs()
-        json_docs = [doc.to_dict() for doc in docs]
-        return json_docs
diff --git a/unstructured/ingest/pipeline/interfaces.py b/unstructured/ingest/pipeline/interfaces.py
deleted file mode 100644
index 8db9e536c..000000000
--- a/unstructured/ingest/pipeline/interfaces.py
+++ /dev/null
@@ -1,265 +0,0 @@
-import hashlib
-import json
-import logging
-import multiprocessing as mp
-import typing as t
-from abc import ABC, abstractmethod
-from dataclasses import dataclass, field
-from multiprocessing.managers import DictProxy
-from pathlib import Path
-
-import backoff
-from dataclasses_json import DataClassJsonMixin
-
-from unstructured.ingest.error import SourceConnectionNetworkError
-from unstructured.ingest.ingest_backoff import RetryHandler
-from unstructured.ingest.interfaces import (
-    BaseDestinationConnector,
-    BaseSourceConnector,
-    PartitionConfig,
-    ProcessorConfig,
-    ReadConfig,
-    RetryStrategyConfig,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-
-
-@dataclass
-class PipelineContext(ProcessorConfig):
-    """
-    Data that gets shared across each pipeline node
-    """
-
-    def __post_init__(self):
-        self._ingest_docs_map: t.Optional[DictProxy] = None
-
-    @property
-    def ingest_docs_map(self) -> DictProxy:
-        if self._ingest_docs_map is None:
-            raise ValueError("ingest_docs_map never initialized")
-        return self._ingest_docs_map
-
-    @ingest_docs_map.setter
-    def ingest_docs_map(self, value: DictProxy):
-        self._ingest_docs_map = value
-
-
-@dataclass
-class PipelineNode(DataClassJsonMixin, ABC):
-    """
-    Class that encapsulates logic to run during a single pipeline step
-    """
-
-    pipeline_context: PipelineContext
-
-    def __call__(self, iterable: t.Optional[t.Iterable[t.Any]] = None) -> t.Any:
-        iterable = iterable if iterable else []
-        if iterable:
-            logger.info(
-                f"Calling {self.__class__.__name__} " f"with {len(iterable)} docs",  # type: ignore
-            )
-
-        self.initialize()
-        if not self.supported_multiprocessing():
-            if iterable:
-                self.result = self.run(iterable)
-            else:
-                self.result = self.run()
-        elif self.pipeline_context.num_processes == 1:
-            if iterable:
-                self.result = [self.run(it) for it in iterable]
-            else:
-                self.result = self.run()
-        else:
-            with mp.Pool(
-                processes=self.pipeline_context.num_processes,
-                initializer=ingest_log_streaming_init,
-                initargs=(logging.DEBUG if self.pipeline_context.verbose else logging.INFO,),
-            ) as pool:
-                self.result = pool.map(self.run, iterable)
-        # Remove None which may be caused by failed docs that didn't raise an error
-        if isinstance(self.result, t.Iterable):
-            self.result = [r for r in self.result if r is not None]
-        return self.result
-
-    def supported_multiprocessing(self) -> bool:
-        return True
-
-    @abstractmethod
-    def run(self, *args, **kwargs) -> t.Optional[t.Any]:
-        pass
-
-    def initialize(self):
-        if path := self.get_path():
-            logger.info(f"Creating {path}")
-            path.mkdir(parents=True, exist_ok=True)
-        ingest_log_streaming_init(logging.DEBUG if self.pipeline_context.verbose else logging.INFO)
-
-    def get_path(self) -> t.Optional[Path]:
-        return None
-
-
-@dataclass
-class DocFactoryNode(PipelineNode):
-    """
-    Encapsulated logic to generate a list of ingest docs
-    """
-
-    source_doc_connector: BaseSourceConnector
-
-    def initialize(self):
-        logger.info(
-            f"Running doc factory to generate ingest docs. "
-            f"Source connector: {self.source_doc_connector.to_json()}",
-        )
-        super().initialize()
-        self.source_doc_connector.initialize()
-
-    @abstractmethod
-    def run(self, *args, **kwargs) -> t.Iterable[dict]:
-        pass
-
-    def supported_multiprocessing(self) -> bool:
-        return False
-
-
-@dataclass
-class SourceNode(PipelineNode):
-    """A pipeline node representing logic to pull data from a source using base ingest documents.
-
-    This class encapsulates the logic for pulling data from a specified source using base ingest
-    documents. The output of this logic is expected to be in JSON format representing the data
-    itself.
-
-    Attributes:
-        read_config: A configuration object specifying how to read data from the source.
-        retry_strategy_config: Optional configuration specifying the strategy for network errors.
-
-    Properties:
-        retry_strategy: A retry handler configured based on the retry strategy configuration.
-
-    Methods:
-        initialize: Initializes the source node and logs the process.
-        run: Abstract method for downloading data associated with ingest documents.
-    """
-
-    read_config: ReadConfig
-    retry_strategy_config: t.Optional[RetryStrategyConfig] = None
-
-    @property
-    def retry_strategy(self) -> t.Optional[RetryHandler]:
-        if retry_strategy_config := self.retry_strategy_config:
-            return RetryHandler(
-                backoff.expo,
-                SourceConnectionNetworkError,
-                max_time=retry_strategy_config.max_retry_time,
-                max_tries=retry_strategy_config.max_retries,
-                logger=logger,
-                start_log_level=logger.level,
-                backoff_log_level=logger.level,
-            )
-        return None
-
-    def initialize(self):
-        logger.info("Running source node to download data associated with ingest docs")
-        super().initialize()
-
-    @abstractmethod
-    def run(self, ingest_doc_json: str) -> t.Optional[str]:
-        pass
-
-
-@dataclass
-class PartitionNode(PipelineNode):
-    """
-    Encapsulates logic to run partition on the json files as the output of the source node
-    """
-
-    partition_config: PartitionConfig
-    partition_kwargs: dict = field(default_factory=dict)
-
-    def initialize(self):
-        logger.info(
-            f"Running partition node to extract content from json files. "
-            f"Config: {self.partition_config.to_json()}, "
-            f"partition kwargs: {json.dumps(self.partition_kwargs)}]",
-        )
-        super().initialize()
-
-    def create_hash(self) -> str:
-        hash_dict = self.partition_config.to_dict()
-        hash_dict["partition_kwargs"] = self.partition_kwargs
-        return hashlib.sha256(json.dumps(hash_dict, sort_keys=True).encode()).hexdigest()[:32]
-
-    @abstractmethod
-    def run(self, json_path: str) -> t.Optional[str]:
-        pass
-
-    def get_path(self) -> Path:
-        return (Path(self.pipeline_context.work_dir) / "partitioned").resolve()
-
-
-@dataclass
-class ReformatNode(PipelineNode, ABC):
-    """
-    Encapsulated any logic to reformat the output List[Element]
-    content from partition before writing it
-    """
-
-    @abstractmethod
-    def run(self, elements_json: str) -> t.Optional[str]:
-        pass
-
-
-@dataclass
-class WriteNode(PipelineNode):
-    """
-    Encapsulated logic to write the final result to a downstream data connection
-    """
-
-    dest_doc_connector: BaseDestinationConnector
-
-    @abstractmethod
-    def run(self, json_paths: t.List[str]):
-        pass
-
-    def initialize(self):
-        logger.info(
-            f"Running write node to upload content. "
-            f"Destination connector: {self.dest_doc_connector.to_json(redact_sensitive=True)}]",
-        )
-        super().initialize()
-        self.dest_doc_connector.initialize()
-
-    def supported_multiprocessing(self) -> bool:
-        return False
-
-
-@dataclass
-class CopyNode(PipelineNode):
-    """
-    Encapsulated logic to copy the final result of the pipeline to the designated output location.
-    """
-
-    def initialize(self):
-        logger.info("Running copy node to move content to desired output location")
-        super().initialize()
-
-    @abstractmethod
-    def run(self, json_path: str):
-        pass
-
-
-@dataclass
-class PermissionsNode(PipelineNode):
-    """
-    Encapsulated logic to do operations on permissions related data.
-    """
-
-    def initialize(self):
-        logger.info("Running permissions node to cleanup the permissions folder")
-        super().initialize()
-
-    @abstractmethod
-    def run(self):
-        pass
diff --git a/unstructured/ingest/pipeline/partition.py b/unstructured/ingest/pipeline/partition.py
deleted file mode 100644
index 4aa2ccc86..000000000
--- a/unstructured/ingest/pipeline/partition.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import hashlib
-import json
-import typing as t
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Optional
-
-from unstructured.ingest.connector.registry import create_ingest_doc_from_dict
-from unstructured.ingest.error import PartitionError
-from unstructured.ingest.logger import logger
-from unstructured.ingest.pipeline.interfaces import PartitionNode
-from unstructured.ingest.pipeline.utils import get_ingest_doc_hash
-
-
-@dataclass
-class Partitioner(PartitionNode):
-    @PartitionError.wrap
-    def run(self, ingest_doc_dict) -> Optional[str]:
-        try:
-            doc = create_ingest_doc_from_dict(ingest_doc_dict)
-            doc_filename_hash = get_ingest_doc_hash(ingest_doc_dict)
-            hashed_filename = hashlib.sha256(
-                f"{self.create_hash()}{doc_filename_hash}".encode(),
-            ).hexdigest()[:32]
-            self.pipeline_context.ingest_docs_map[hashed_filename] = ingest_doc_dict
-            doc_filename = f"{hashed_filename}.json"
-            json_path = (Path(self.get_path()) / doc_filename).resolve()
-            if (
-                not self.pipeline_context.reprocess
-                and json_path.is_file()
-                and json_path.stat().st_size
-            ):
-                logger.info(f"File exists: {json_path}, skipping partition")
-                return str(json_path)
-            partition_kwargs: t.Dict[str, t.Any] = {
-                "strategy": self.partition_config.strategy,
-                "encoding": self.partition_config.encoding,
-                "pdf_infer_table_structure": self.partition_config.pdf_infer_table_structure,
-                "languages": self.partition_config.ocr_languages,
-                "hi_res_model_name": self.partition_config.hi_res_model_name,
-            }
-            if self.partition_config.skip_infer_table_types:
-                partition_kwargs["skip_infer_table_types"] = (
-                    self.partition_config.skip_infer_table_types
-                )
-            if self.partition_config.additional_partition_args:
-                partition_kwargs.update(self.partition_config.additional_partition_args)
-            elements = doc.process_file(
-                partition_config=self.partition_config,
-                **partition_kwargs,
-            )
-            with open(json_path, "w", encoding="utf8") as output_f:
-                logger.info(f"writing partitioned content to {json_path}")
-                json.dump(elements, output_f, ensure_ascii=False, indent=2, sort_keys=True)
-            return str(json_path)
-        except Exception as e:
-            if self.pipeline_context.raise_on_error:
-                raise
-            logger.error(f"failed to partition doc: {ingest_doc_dict}, {e}", exc_info=True)
-            return None
diff --git a/unstructured/ingest/pipeline/permissions.py b/unstructured/ingest/pipeline/permissions.py
deleted file mode 100644
index 5a93b3cca..000000000
--- a/unstructured/ingest/pipeline/permissions.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import PermissionsCleanupMixin, ProcessorConfig
-from unstructured.ingest.pipeline.interfaces import PermissionsNode
-
-
-@dataclass
-class PermissionsDataCleaner(PermissionsNode, PermissionsCleanupMixin):
-    processor_config: ProcessorConfig
-
-    def run(self):
-        self.cleanup_permissions()
diff --git a/unstructured/ingest/pipeline/pipeline.py b/unstructured/ingest/pipeline/pipeline.py
deleted file mode 100644
index 6c6897885..000000000
--- a/unstructured/ingest/pipeline/pipeline.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import logging
-import multiprocessing as mp
-from dataclasses import dataclass, field
-from typing import Any, Optional
-
-from dataclasses_json import DataClassJsonMixin
-
-from unstructured.ingest.connector.registry import create_ingest_doc_from_dict
-from unstructured.ingest.interfaces import BaseIngestDocBatch, BaseSingleIngestDoc
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.pipeline.copy import Copier
-from unstructured.ingest.pipeline.interfaces import (
-    DocFactoryNode,
-    PartitionNode,
-    PipelineContext,
-    ReformatNode,
-    SourceNode,
-    WriteNode,
-)
-from unstructured.ingest.pipeline.permissions import PermissionsDataCleaner
-from unstructured.ingest.pipeline.utils import get_ingest_doc_hash
-
-
-@dataclass
-class Pipeline(DataClassJsonMixin):
-    pipeline_context: PipelineContext
-    doc_factory_node: DocFactoryNode
-    source_node: SourceNode
-    partition_node: Optional[PartitionNode] = None
-    write_node: Optional[WriteNode] = None
-    reformat_nodes: "list[ReformatNode]" = field(default_factory=list)
-    permissions_node: Optional[PermissionsDataCleaner] = None
-
-    def initialize(self):
-        ingest_log_streaming_init(logging.DEBUG if self.pipeline_context.verbose else logging.INFO)
-
-    def get_nodes_str(self):
-        nodes = [self.doc_factory_node, self.source_node, self.partition_node]
-        nodes.extend(self.reformat_nodes)
-        if self.write_node:
-            nodes.append(self.write_node)
-        nodes.append(Copier(pipeline_context=self.pipeline_context))
-        return " -> ".join([node.__class__.__name__ for node in nodes])
-
-    def expand_batch_docs(self, dict_docs: "list[dict[str, Any]]") -> "list[dict[str, Any]]":
-        expanded_docs: list[dict[str, Any]] = []
-        for d in dict_docs:
-            doc = create_ingest_doc_from_dict(d)
-            if isinstance(doc, BaseSingleIngestDoc):
-                expanded_docs.append(doc.to_dict())
-            elif isinstance(doc, BaseIngestDocBatch):
-                expanded_docs.extend([single_doc.to_dict() for single_doc in doc.ingest_docs])
-            else:
-                raise ValueError(
-                    f"type of doc ({type(doc)}) is not a recognized type: "
-                    f"BaseSingleIngestDoc or BaseSingleIngestDoc"
-                )
-        return expanded_docs
-
-    def run(self):
-        logger.info(
-            f"running pipeline: {self.get_nodes_str()} "
-            f"with config: {self.pipeline_context.to_json()}",
-        )
-        self.initialize()
-        manager = mp.Manager()
-        self.pipeline_context.ingest_docs_map = manager.dict()
-        # -- Get the documents to be processed --
-        dict_docs = self.doc_factory_node()
-        dict_docs = [manager.dict(d) for d in dict_docs]
-        if not dict_docs:
-            logger.info("no docs found to process")
-            return
-        logger.info(
-            f"processing {len(dict_docs)} docs via "
-            f"{self.pipeline_context.num_processes} processes",
-        )
-        for doc in dict_docs:
-            self.pipeline_context.ingest_docs_map[get_ingest_doc_hash(doc)] = doc
-        fetched_filenames = self.source_node(iterable=dict_docs)
-        if self.source_node.read_config.download_only:
-            logger.info("stopping pipeline after downloading files")
-            return
-        if not fetched_filenames:
-            logger.info("No files to run partition over")
-            return
-        # -- To support batches ingest docs, expand those into the populated single ingest
-        # -- docs after downloading content
-        dict_docs = self.expand_batch_docs(dict_docs=dict_docs)
-        if self.partition_node is None:
-            raise ValueError("partition node not set")
-        partitioned_jsons = self.partition_node(iterable=dict_docs)
-        if not partitioned_jsons:
-            logger.info("No files to process after partitioning")
-            return
-        for reformat_node in self.reformat_nodes:
-            reformatted_jsons = reformat_node(iterable=partitioned_jsons)
-            if not reformatted_jsons:
-                logger.info(f"No files to process after {reformat_node.__class__.__name__}")
-                return
-            partitioned_jsons = reformatted_jsons
-
-        # -- Copy the final destination to the desired location --
-        copier = Copier(
-            pipeline_context=self.pipeline_context,
-        )
-        copier(iterable=partitioned_jsons)
-
-        if self.write_node:
-            logger.info(
-                f"uploading elements from {len(partitioned_jsons)} "
-                "document(s) to the destination"
-            )
-            self.write_node(iterable=partitioned_jsons)
-
-        if self.permissions_node:
-            self.permissions_node.cleanup_permissions()
diff --git a/unstructured/ingest/pipeline/reformat/__init__.py b/unstructured/ingest/pipeline/reformat/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/unstructured/ingest/pipeline/reformat/chunking.py b/unstructured/ingest/pipeline/reformat/chunking.py
deleted file mode 100644
index b061cfa1c..000000000
--- a/unstructured/ingest/pipeline/reformat/chunking.py
+++ /dev/null
@@ -1,129 +0,0 @@
-from __future__ import annotations
-
-import hashlib
-import json
-import os.path
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Optional
-
-from unstructured.chunking import dispatch
-from unstructured.documents.elements import Element, assign_and_map_hash_ids
-from unstructured.ingest.interfaces import ChunkingConfig, PartitionConfig
-from unstructured.ingest.logger import logger
-from unstructured.ingest.pipeline.interfaces import ReformatNode
-from unstructured.partition.api import partition_via_api
-from unstructured.staging.base import elements_from_json, elements_to_dicts
-
-
-@dataclass
-class Chunker(ReformatNode):
-    """Implementation for the chunking node in the ingest pipeline.
-
-    Parameters
-    ----------
-    pipeline_context: PipelineContext (inherited from parent class)
-    chunking_config: ChunkingConfig
-    partition_config: PartitionConfig
-    """
-
-    chunking_config: ChunkingConfig
-    partition_config: PartitionConfig
-
-    def initialize(self):
-        logger.info(
-            f"Running chunking node. Chunking config: {self.chunking_config.to_json()}]",
-        )
-        super().initialize()
-
-    def create_hash(self) -> str:
-        hash_dict = self.chunking_config.to_dict()
-        return hashlib.sha256(json.dumps(hash_dict, sort_keys=True).encode()).hexdigest()[:32]
-
-    def run(self, elements_json: str) -> Optional[str]:
-        try:
-            elements_json_filename = os.path.basename(elements_json)
-            filename_ext = os.path.basename(elements_json_filename)
-            filename = os.path.splitext(filename_ext)[0]
-            hashed_filename = hashlib.sha256(
-                f"{self.create_hash()}{filename}".encode(),
-            ).hexdigest()[:32]
-            json_filename = f"{hashed_filename}.json"
-            json_path = (Path(self.get_path()) / json_filename).resolve()
-            self.pipeline_context.ingest_docs_map[hashed_filename] = (
-                self.pipeline_context.ingest_docs_map[filename]
-            )
-            if (
-                not self.pipeline_context.reprocess
-                and json_path.is_file()
-                and json_path.stat().st_size
-            ):
-                logger.debug(f"File exists: {json_path}, skipping chunking")
-                return str(json_path)
-
-            chunked_elements = self.chunk(elements_json)
-
-            # -- return if chunking_strategy is None --
-            if chunked_elements is None:
-                logger.info(f"chunking_strategy is None, skipping chunking for {filename_ext}")
-                return
-
-            assign_and_map_hash_ids(chunked_elements)
-
-            element_dicts = elements_to_dicts(chunked_elements)
-            with open(json_path, "w", encoding="utf8") as output_f:
-                logger.info(f"writing chunking content to {json_path}")
-                json.dump(element_dicts, output_f, ensure_ascii=False, indent=2)
-                return str(json_path)
-
-        except Exception as e:
-            if self.pipeline_context.raise_on_error:
-                raise
-            logger.error(f"failed to run chunking on file {elements_json}, {e}", exc_info=True)
-            return None
-
-    def get_path(self) -> Path:
-        return (Path(self.pipeline_context.work_dir) / "chunked").resolve()
-
-    def chunk(self, elements_json_file: str) -> Optional[list[Element]]:
-        """Called by Chunker.run() to properly execute the defined chunking_strategy."""
-        # -- No chunking_strategy means no chunking --
-        if self.chunking_config.chunking_strategy is None:
-            return
-        # -- Chunk locally for open-source chunking strategies, even when partitioning remotely --
-        if self.chunking_config.chunking_strategy in ("basic", "by_title"):
-            return dispatch.chunk(
-                elements=elements_from_json(filename=elements_json_file),
-                chunking_strategy=self.chunking_config.chunking_strategy,
-                combine_text_under_n_chars=self.chunking_config.combine_text_under_n_chars,
-                include_orig_elements=self.chunking_config.include_orig_elements,
-                max_characters=self.chunking_config.max_characters,
-                multipage_sections=self.chunking_config.multipage_sections,
-                new_after_n_chars=self.chunking_config.new_after_n_chars,
-                overlap=self.chunking_config.overlap,
-                overlap_all=self.chunking_config.overlap_all,
-            )
-        # -- Chunk remotely --
-        if self.partition_config.partition_by_api:
-            return partition_via_api(
-                filename=elements_json_file,
-                # -- (jennings) If api_key or api_url are None, partition_via_api will raise an
-                # -- error, which will be caught and logged by Chunker.run()
-                api_key=self.partition_config.api_key,  # type: ignore
-                api_url=self.partition_config.partition_endpoint,  # type: ignore
-                chunking_strategy=self.chunking_config.chunking_strategy,
-                combine_under_n_chars=self.chunking_config.combine_text_under_n_chars,
-                include_orig_elements=self.chunking_config.include_orig_elements,
-                max_characters=self.chunking_config.max_characters,
-                multipage_sections=self.chunking_config.multipage_sections,
-                new_after_n_chars=self.chunking_config.new_after_n_chars,
-                overlap=self.chunking_config.overlap,
-                overlap_all=self.chunking_config.overlap_all,
-            )
-        # -- Warn that the defined chunking_strategy is not locally available --
-        logger.warning(
-            f"There is no locally available chunking_strategy:"
-            f" {self.chunking_config.chunking_strategy}."
-            f" If trying to partition remotely, check that `partition_by_api`, `api_url`,"
-            f" and `api_key` are correctly defined."
-        )
diff --git a/unstructured/ingest/pipeline/reformat/embedding.py b/unstructured/ingest/pipeline/reformat/embedding.py
deleted file mode 100644
index 58d47b429..000000000
--- a/unstructured/ingest/pipeline/reformat/embedding.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import hashlib
-import json
-import os.path
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Optional
-
-from unstructured.ingest.interfaces import (
-    EmbeddingConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.ingest.pipeline.interfaces import ReformatNode
-from unstructured.staging.base import elements_from_json, elements_to_dicts
-
-
-@dataclass
-class Embedder(ReformatNode):
-    embedder_config: EmbeddingConfig
-
-    def initialize(self):
-        logger.info(
-            f"Running embedding node. Embedding config: {self.embedder_config.to_json()}]",
-        )
-        super().initialize()
-
-    def create_hash(self) -> str:
-        hash_dict = self.embedder_config.to_dict()
-        return hashlib.sha256(json.dumps(hash_dict, sort_keys=True).encode()).hexdigest()[:32]
-
-    def run(self, elements_json: str) -> Optional[str]:
-        try:
-            elements_json_filename = os.path.basename(elements_json)
-            filename_ext = os.path.basename(elements_json_filename)
-            filename = os.path.splitext(filename_ext)[0]
-            hashed_filename = hashlib.sha256(
-                f"{self.create_hash()}{filename}".encode(),
-            ).hexdigest()[:32]
-            json_filename = f"{hashed_filename}.json"
-            json_path = (Path(self.get_path()) / json_filename).resolve()
-            self.pipeline_context.ingest_docs_map[hashed_filename] = (
-                self.pipeline_context.ingest_docs_map[filename]
-            )
-            if (
-                not self.pipeline_context.reprocess
-                and json_path.is_file()
-                and json_path.stat().st_size
-            ):
-                logger.debug(f"File exists: {json_path}, skipping embedding")
-                return str(json_path)
-            elements = elements_from_json(filename=elements_json)
-            embedder = self.embedder_config.get_embedder()
-            embedded_elements = embedder.embed_documents(elements=elements)
-            element_dicts = elements_to_dicts(embedded_elements)
-            with open(json_path, "w", encoding="utf8") as output_f:
-                logger.info(f"writing embeddings content to {json_path}")
-                json.dump(element_dicts, output_f, ensure_ascii=False, indent=2)
-            return str(json_path)
-        except Exception as e:
-            if self.pipeline_context.raise_on_error:
-                raise
-            logger.error(f"failed to embed content from file {elements_json}, {e}", exc_info=True)
-            return None
-
-    def get_path(self) -> Path:
-        return (Path(self.pipeline_context.work_dir) / "embedded").resolve()
diff --git a/unstructured/ingest/pipeline/source.py b/unstructured/ingest/pipeline/source.py
deleted file mode 100644
index ee1087a07..000000000
--- a/unstructured/ingest/pipeline/source.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import os
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.connector.registry import create_ingest_doc_from_dict
-from unstructured.ingest.interfaces import (
-    BaseIngestDocBatch,
-    BaseSessionHandle,
-    BaseSingleIngestDoc,
-    IngestDocSessionHandleMixin,
-)
-from unstructured.ingest.logger import logger
-from unstructured.ingest.pipeline.interfaces import SourceNode
-
-# module-level variable to store session handle
-session_handle: t.Optional[BaseSessionHandle] = None
-
-
-@dataclass
-class Reader(SourceNode):
-    def get_single(self, doc: BaseSingleIngestDoc, ingest_doc_dict: dict) -> str:
-        if (
-            not self.read_config.re_download
-            and doc.filename.is_file()
-            and doc.filename.stat().st_size
-        ):
-            logger.info(f"File exists: {doc.filename}, skipping download")
-            # Still need to fetch metadata if file exists locally
-            doc.update_source_metadata()
-        else:
-            serialized_doc = doc.to_json(redact_sensitive=True)
-            logger.debug(f"Fetching {serialized_doc} - PID: {os.getpid()}")
-            if self.retry_strategy:
-                self.retry_strategy(doc.get_file)
-            else:
-                doc.get_file()
-        for k, v in doc.to_dict().items():
-            ingest_doc_dict[k] = v
-        return doc.filename
-
-    def get_batch(self, doc_batch: BaseIngestDocBatch, ingest_doc_dict: dict) -> t.List[str]:
-        if self.retry_strategy:
-            self.retry_strategy(doc_batch.get_files)
-        else:
-            doc_batch.get_files()
-        for k, v in doc_batch.to_dict().items():
-            ingest_doc_dict[k] = v
-        return [doc.filename for doc in doc_batch.ingest_docs]
-
-    def run(self, ingest_doc_dict: dict) -> t.Optional[t.Union[str, t.List[str]]]:
-        try:
-            global session_handle
-            doc = create_ingest_doc_from_dict(ingest_doc_dict)
-            if isinstance(doc, IngestDocSessionHandleMixin):
-                if session_handle is None:
-                    # create via doc.session_handle, which is a property that creates a
-                    # session handle if one is not already defined
-                    session_handle = doc.session_handle
-                else:
-                    doc._session_handle = session_handle
-            if isinstance(doc, BaseSingleIngestDoc):
-                return self.get_single(doc=doc, ingest_doc_dict=ingest_doc_dict)
-            elif isinstance(doc, BaseIngestDocBatch):
-                return self.get_batch(doc_batch=doc, ingest_doc_dict=ingest_doc_dict)
-            else:
-                raise ValueError(
-                    f"type of doc ({type(doc)}) is not a recognized type: "
-                    f"BaseSingleIngestDoc or BaseSingleIngestDoc"
-                )
-        except Exception as e:
-            if self.pipeline_context.raise_on_error:
-                raise
-            logger.error(
-                f"failed to get data associated with source doc: {ingest_doc_dict}, {e}",
-                exc_info=True,
-            )
-            return None
diff --git a/unstructured/ingest/pipeline/utils.py b/unstructured/ingest/pipeline/utils.py
deleted file mode 100644
index bcd6aa2ab..000000000
--- a/unstructured/ingest/pipeline/utils.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import hashlib
-
-
-def get_ingest_doc_hash(json_as_dict: dict) -> str:
-    hashed = hashlib.sha256(json_as_dict["unique_id"].encode()).hexdigest()[:32]
-    return hashed
diff --git a/unstructured/ingest/pipeline/write.py b/unstructured/ingest/pipeline/write.py
deleted file mode 100644
index 7a0540983..000000000
--- a/unstructured/ingest/pipeline/write.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import os.path
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.connector.registry import create_ingest_doc_from_dict
-from unstructured.ingest.pipeline.interfaces import WriteNode
-
-
-@dataclass
-class Writer(WriteNode):
-    def run(self, json_paths: t.List[str]):
-        ingest_docs = []
-        for json_path in json_paths:
-            filename = os.path.basename(json_path)
-            doc_hash = os.path.splitext(filename)[0]
-            ingest_doc_dict = self.pipeline_context.ingest_docs_map[doc_hash]
-            ingest_docs.append(create_ingest_doc_from_dict(ingest_doc_dict))
-        self.dest_doc_connector.write(docs=ingest_docs)
diff --git a/unstructured/ingest/processor.py b/unstructured/ingest/processor.py
deleted file mode 100644
index cf4c775cd..000000000
--- a/unstructured/ingest/processor.py
+++ /dev/null
@@ -1,93 +0,0 @@
-from __future__ import annotations
-
-import multiprocessing as mp
-from contextlib import suppress
-from typing import Optional
-
-from unstructured.ingest.interfaces import (
-    BaseDestinationConnector,
-    BaseSourceConnector,
-    ChunkingConfig,
-    EmbeddingConfig,
-    PartitionConfig,
-    PermissionsConfig,
-    ProcessorConfig,
-    RetryStrategyConfig,
-)
-from unstructured.ingest.pipeline import (
-    Chunker,
-    DocFactory,
-    Embedder,
-    Partitioner,
-    PermissionsDataCleaner,
-    Pipeline,
-    PipelineContext,
-    Reader,
-    ReformatNode,
-    Writer,
-)
-
-with suppress(RuntimeError):
-    mp.set_start_method("spawn")
-
-
-def process_documents(
-    processor_config: ProcessorConfig,
-    source_doc_connector: BaseSourceConnector,
-    partition_config: PartitionConfig,
-    dest_doc_connector: Optional[BaseDestinationConnector] = None,
-    chunking_config: Optional[ChunkingConfig] = None,
-    embedder_config: Optional[EmbeddingConfig] = None,
-    permissions_config: Optional[PermissionsConfig] = None,
-    retry_strategy_config: Optional[RetryStrategyConfig] = None,
-) -> None:
-    pipeline_config = PipelineContext.from_dict(processor_config.to_dict())
-    doc_factory = DocFactory(
-        pipeline_context=pipeline_config,
-        source_doc_connector=source_doc_connector,
-    )
-    reader = Reader(
-        pipeline_context=pipeline_config,
-        retry_strategy_config=retry_strategy_config,
-        read_config=source_doc_connector.read_config,
-    )
-    partitioner = Partitioner(pipeline_context=pipeline_config, partition_config=partition_config)
-    reformat_nodes: list[ReformatNode] = []
-    if chunking_config:
-        reformat_nodes.append(
-            Chunker(
-                pipeline_context=pipeline_config,
-                chunking_config=chunking_config,
-                partition_config=partition_config,
-            ),
-        )
-    if embedder_config:
-        reformat_nodes.append(
-            Embedder(
-                pipeline_context=pipeline_config,
-                embedder_config=embedder_config,
-            ),
-        )
-    writer = (
-        Writer(
-            pipeline_context=pipeline_config,
-            dest_doc_connector=dest_doc_connector,
-        )
-        if dest_doc_connector
-        else None
-    )
-    permissions_data_cleaner = (
-        PermissionsDataCleaner(pipeline_context=pipeline_config, processor_config=processor_config)
-        if permissions_config
-        else None
-    )
-    pipeline = Pipeline(
-        pipeline_context=pipeline_config,
-        doc_factory_node=doc_factory,
-        source_node=reader,
-        partition_node=partitioner,
-        reformat_nodes=reformat_nodes,
-        write_node=writer,
-        permissions_node=permissions_data_cleaner,
-    )
-    pipeline.run()
diff --git a/unstructured/ingest/runner/__init__.py b/unstructured/ingest/runner/__init__.py
deleted file mode 100644
index 872ebb10d..000000000
--- a/unstructured/ingest/runner/__init__.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import typing as t
-from typing import Type
-
-from .airtable import AirtableRunner
-from .astradb import AstraDBRunner
-from .base_runner import Runner
-from .biomed import BiomedRunner
-from .confluence import ConfluenceRunner
-from .delta_table import DeltaTableRunner
-from .discord import DiscordRunner
-from .elasticsearch import ElasticSearchRunner
-from .fsspec.azure import AzureRunner
-from .fsspec.box import BoxRunner
-from .fsspec.dropbox import DropboxRunner
-from .fsspec.fsspec import FsspecRunner
-from .fsspec.gcs import GCSRunner
-from .fsspec.s3 import S3Runner
-from .fsspec.sftp import SftpRunner
-from .github import GithubRunner
-from .gitlab import GitlabRunner
-from .google_drive import GoogleDriveRunner
-from .hubspot import HubSpotRunner
-from .jira import JiraRunner
-from .kafka import KafkaRunner
-from .local import LocalRunner
-from .mongodb import MongoDBRunner
-from .notion import NotionRunner
-from .onedrive import OneDriveRunner
-from .opensearch import OpenSearchRunner
-from .outlook import OutlookRunner
-from .reddit import RedditRunner
-from .salesforce import SalesforceRunner
-from .sharepoint import SharePointRunner
-from .slack import SlackRunner
-from .wikipedia import WikipediaRunner
-
-runner_map: t.Dict[str, Type[Runner]] = {
-    "airtable": AirtableRunner,
-    "astradb": AstraDBRunner,
-    "azure": AzureRunner,
-    "biomed": BiomedRunner,
-    "box": BoxRunner,
-    "confluence": ConfluenceRunner,
-    "delta_table": DeltaTableRunner,
-    "discord": DiscordRunner,
-    "dropbox": DropboxRunner,
-    "elasticsearch": ElasticSearchRunner,
-    "fsspec": FsspecRunner,
-    "gcs": GCSRunner,
-    "github": GithubRunner,
-    "gitlab": GitlabRunner,
-    "gdrive": GoogleDriveRunner,
-    "google_drive": GoogleDriveRunner,
-    "hubspot": HubSpotRunner,
-    "jira": JiraRunner,
-    "kafka": KafkaRunner,
-    "local": LocalRunner,
-    "mongodb": MongoDBRunner,
-    "notion": NotionRunner,
-    "onedrive": OneDriveRunner,
-    "opensearch": OpenSearchRunner,
-    "outlook": OutlookRunner,
-    "reddit": RedditRunner,
-    "s3": S3Runner,
-    "salesforce": SalesforceRunner,
-    "sftp": SftpRunner,
-    "sharepoint": SharePointRunner,
-    "slack": SlackRunner,
-    "wikipedia": WikipediaRunner,
-}
-
-__all__ = [
-    "AirtableRunner",
-    "AstraRunner",
-    "AzureRunner",
-    "BiomedRunner",
-    "BoxRunner",
-    "ConfluenceRunner",
-    "DeltaTableRunner",
-    "DiscordRunner",
-    "DropboxRunner",
-    "ElasticSearchRunner",
-    "FsspecRunner",
-    "GCSRunner",
-    "GoogleDriveRunner",
-    "GithubRunner",
-    "GitlabRunner",
-    "JiraRunner",
-    "KafkaRunner",
-    "LocalRunner",
-    "MongoDBRunner",
-    "NotionRunner",
-    "OneDriveRunner",
-    "OpenSearchRunner",
-    "OutlookRunner",
-    "RedditRunner",
-    "S3Runner",
-    "SalesforceRunner",
-    "SharePointRunner",
-    "SlackRunner",
-    "WikipediaRunner",
-    "runner_map",
-    "Runner",
-]
diff --git a/unstructured/ingest/runner/airtable.py b/unstructured/ingest/runner/airtable.py
deleted file mode 100644
index ec148221c..000000000
--- a/unstructured/ingest/runner/airtable.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.airtable import SimpleAirtableConfig
-
-
-@dataclass
-class AirtableRunner(Runner):
-    connector_config: "SimpleAirtableConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            self.connector_config.access_config.personal_access_token.encode("utf-8"),
-        )
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="airtable",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.airtable import (
-            AirtableSourceConnector,
-        )
-
-        return AirtableSourceConnector
diff --git a/unstructured/ingest/runner/astradb.py b/unstructured/ingest/runner/astradb.py
deleted file mode 100644
index a07c66b93..000000000
--- a/unstructured/ingest/runner/astradb.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.astradb import SimpleAstraDBConfig
-
-
-@dataclass
-class AstraDBRunner(Runner):
-    connector_config: "SimpleAstraDBConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            str(self.connector_config.access_config.api_endpoint).encode("utf-8"),
-        )
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="astradb",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.astradb import (
-            AstraDBSourceConnector,
-        )
-
-        return AstraDBSourceConnector
diff --git a/unstructured/ingest/runner/base_runner.py b/unstructured/ingest/runner/base_runner.py
deleted file mode 100644
index dbc9c58d1..000000000
--- a/unstructured/ingest/runner/base_runner.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import logging
-import typing as t
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
-from unstructured.ingest.interfaces import (
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-    BaseSourceConnector,
-    ChunkingConfig,
-    EmbeddingConfig,
-    PartitionConfig,
-    PermissionsConfig,
-    ProcessorConfig,
-    ReadConfig,
-    RetryStrategyConfig,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-
-@dataclass
-class Runner(EnhancedDataClassJsonMixin, ABC):
-    connector_config: BaseConnectorConfig
-    processor_config: ProcessorConfig
-    read_config: ReadConfig
-    partition_config: PartitionConfig
-    writer: t.Optional[Writer] = None
-    writer_kwargs: t.Optional[dict] = None
-    embedding_config: t.Optional[EmbeddingConfig] = None
-    chunking_config: t.Optional[ChunkingConfig] = None
-    permissions_config: t.Optional[PermissionsConfig] = None
-    retry_strategy_config: t.Optional[RetryStrategyConfig] = None
-
-    def run(self, *args, **kwargs):
-        ingest_log_streaming_init(logging.DEBUG if self.processor_config.verbose else logging.INFO)
-        self.update_read_config()
-        source_connector = self.get_source_connector()
-        self.process_documents(
-            source_doc_connector=source_connector,
-        )
-
-    @abstractmethod
-    def update_read_config(self):
-        pass
-
-    @abstractmethod
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        pass
-
-    def get_source_connector(self) -> BaseSourceConnector:
-        source_connector_cls = self.get_source_connector_cls()
-        return source_connector_cls(
-            processor_config=self.processor_config,
-            connector_config=self.connector_config,
-            read_config=self.read_config,
-        )
-
-    def get_dest_doc_connector(self) -> t.Optional[BaseDestinationConnector]:
-        writer_kwargs = self.writer_kwargs if self.writer_kwargs else {}
-        if self.writer:
-            return self.writer.get_connector(**writer_kwargs)
-        return None
-
-    def get_permissions_config(self) -> t.Optional[PermissionsConfig]:
-        if self.permissions_config is None:
-            return None
-
-        permissions_config_filled = bool(
-            self.permissions_config.application_id
-            and self.permissions_config.client_cred
-            and self.permissions_config.tenant,
-        )
-
-        return self.permissions_config if permissions_config_filled else None
-
-    def process_documents(self, source_doc_connector: BaseSourceConnector):
-        process_documents(
-            processor_config=self.processor_config,
-            source_doc_connector=source_doc_connector,
-            partition_config=self.partition_config,
-            dest_doc_connector=self.get_dest_doc_connector(),
-            embedder_config=self.embedding_config,
-            chunking_config=self.chunking_config,
-            permissions_config=self.get_permissions_config(),
-            retry_strategy_config=self.retry_strategy_config,
-        )
diff --git a/unstructured/ingest/runner/biomed.py b/unstructured/ingest/runner/biomed.py
deleted file mode 100644
index 045d4486c..000000000
--- a/unstructured/ingest/runner/biomed.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.biomed import SimpleBiomedConfig
-
-
-@dataclass
-class BiomedRunner(Runner):
-    connector_config: "SimpleBiomedConfig"
-
-    def update_read_config(self):
-        base_path = (
-            self.connector_config.path
-            if self.connector_config.path
-            else "{}-{}-{}".format(
-                self.connector_config.api_id if self.connector_config.api_id else "",
-                self.connector_config.api_from if self.connector_config.api_from else "",
-                self.connector_config.api_until if self.connector_config.api_until else "",
-            )
-        )
-
-        hashed_dir_name = hashlib.sha256(
-            base_path.encode("utf-8"),
-        )
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="biomed",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.biomed import (
-            BiomedSourceConnector,
-        )
-
-        return BiomedSourceConnector
diff --git a/unstructured/ingest/runner/confluence.py b/unstructured/ingest/runner/confluence.py
deleted file mode 100644
index 3f6057512..000000000
--- a/unstructured/ingest/runner/confluence.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.confluence import SimpleConfluenceConfig
-
-
-@dataclass
-class ConfluenceRunner(Runner):
-    connector_config: "SimpleConfluenceConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            self.connector_config.url.encode("utf-8"),
-        )
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="confluence",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.confluence import (
-            ConfluenceSourceConnector,
-        )
-
-        return ConfluenceSourceConnector
diff --git a/unstructured/ingest/runner/delta_table.py b/unstructured/ingest/runner/delta_table.py
deleted file mode 100644
index 5dc418710..000000000
--- a/unstructured/ingest/runner/delta_table.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.delta_table import SimpleDeltaTableConfig
-
-
-@dataclass
-class DeltaTableRunner(Runner):
-    connector_config: "SimpleDeltaTableConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            str(self.connector_config.table_uri).encode("utf-8"),
-        )
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="delta_table",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.delta_table import (
-            DeltaTableSourceConnector,
-        )
-
-        return DeltaTableSourceConnector
diff --git a/unstructured/ingest/runner/discord.py b/unstructured/ingest/runner/discord.py
deleted file mode 100644
index 28f11a9be..000000000
--- a/unstructured/ingest/runner/discord.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.discord import SimpleDiscordConfig
-
-
-@dataclass
-class DiscordRunner(Runner):
-    connector_config: "SimpleDiscordConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            ",".join(self.connector_config.channels).encode("utf-8"),
-        )
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="discord",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.discord import (
-            DiscordSourceConnector,
-        )
-
-        return DiscordSourceConnector
diff --git a/unstructured/ingest/runner/elasticsearch.py b/unstructured/ingest/runner/elasticsearch.py
deleted file mode 100644
index a1cb75b84..000000000
--- a/unstructured/ingest/runner/elasticsearch.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.elasticsearch import SimpleElasticsearchConfig
-
-
-@dataclass
-class ElasticSearchRunner(Runner):
-    connector_config: "SimpleElasticsearchConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            "{}_{}".format(
-                ",".join(self.connector_config.access_config.hosts),
-                self.connector_config.index_name,
-            ).encode(
-                "utf-8",
-            ),
-        )
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="elasticsearch",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.elasticsearch import (
-            ElasticsearchSourceConnector,
-        )
-
-        return ElasticsearchSourceConnector
diff --git a/unstructured/ingest/runner/fsspec/__init__.py b/unstructured/ingest/runner/fsspec/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/unstructured/ingest/runner/fsspec/azure.py b/unstructured/ingest/runner/fsspec/azure.py
deleted file mode 100644
index e92f4502f..000000000
--- a/unstructured/ingest/runner/fsspec/azure.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_remote_url
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.fsspec.azure import SimpleAzureBlobStorageConfig
-
-
-@dataclass
-class AzureRunner(Runner):
-    connector_config: "SimpleAzureBlobStorageConfig"
-
-    def update_read_config(self):
-        self.read_config.download_dir = update_download_dir_remote_url(
-            connector_name="azure",
-            read_config=self.read_config,
-            remote_url=self.connector_config.remote_url,  # type: ignore
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.fsspec.azure import (
-            AzureBlobStorageSourceConnector,
-        )
-
-        return AzureBlobStorageSourceConnector
diff --git a/unstructured/ingest/runner/fsspec/box.py b/unstructured/ingest/runner/fsspec/box.py
deleted file mode 100644
index c219576f5..000000000
--- a/unstructured/ingest/runner/fsspec/box.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_remote_url
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.fsspec.box import SimpleBoxConfig
-
-
-@dataclass
-class BoxRunner(Runner):
-    connector_config: "SimpleBoxConfig"
-
-    def update_read_config(self):
-        self.read_config.download_dir = update_download_dir_remote_url(
-            connector_name="box",
-            read_config=self.read_config,
-            remote_url=self.connector_config.remote_url,  # type: ignore
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.fsspec.box import BoxSourceConnector
-
-        return BoxSourceConnector
diff --git a/unstructured/ingest/runner/fsspec/dropbox.py b/unstructured/ingest/runner/fsspec/dropbox.py
deleted file mode 100644
index ef408918c..000000000
--- a/unstructured/ingest/runner/fsspec/dropbox.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_remote_url
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.fsspec.dropbox import SimpleDropboxConfig
-
-
-@dataclass
-class DropboxRunner(Runner):
-    connector_config: "SimpleDropboxConfig"
-
-    def update_read_config(self):
-        self.read_config.download_dir = update_download_dir_remote_url(
-            connector_name="dropbox",
-            read_config=self.read_config,
-            remote_url=self.connector_config.remote_url,  # type: ignore
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.fsspec.dropbox import (
-            DropboxSourceConnector,
-        )
-
-        return DropboxSourceConnector
diff --git a/unstructured/ingest/runner/fsspec/fsspec.py b/unstructured/ingest/runner/fsspec/fsspec.py
deleted file mode 100644
index e98251a81..000000000
--- a/unstructured/ingest/runner/fsspec/fsspec.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import typing as t
-import warnings
-from dataclasses import dataclass
-from urllib.parse import urlparse
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_remote_url
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.fsspec.fsspec import SimpleFsspecConfig
-
-
-@dataclass
-class FsspecRunner(Runner):
-    connector_config: "SimpleFsspecConfig"
-
-    def update_read_config(self):
-        self.read_config.download_dir = update_download_dir_remote_url(
-            connector_name="fsspec",
-            read_config=self.read_config,
-            remote_url=self.fsspec_config.remote_url,  # type: ignore
-            logger=logger,
-        )
-
-        protocol = urlparse(self.fsspec_config.remote_url).scheme  # type: ignore
-        warnings.warn(
-            f"`fsspec` protocol {protocol} is not directly supported by `unstructured`,"
-            " so use it at your own risk. Supported protocols are `gcs`, `gs`, `s3`, `s3a`,"
-            "`dropbox`, `abfs`, `az` and `sftp`.",
-            UserWarning,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.fsspec.fsspec import (
-            FsspecSourceConnector,
-        )
-
-        return FsspecSourceConnector
diff --git a/unstructured/ingest/runner/fsspec/gcs.py b/unstructured/ingest/runner/fsspec/gcs.py
deleted file mode 100644
index 1c3e043e3..000000000
--- a/unstructured/ingest/runner/fsspec/gcs.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_remote_url
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.fsspec.gcs import SimpleGcsConfig
-
-
-@dataclass
-class GCSRunner(Runner):
-    connector_config: "SimpleGcsConfig"
-
-    def update_read_config(self):
-        self.read_config.download_dir = update_download_dir_remote_url(
-            connector_name="gcs",
-            read_config=self.read_config,
-            remote_url=self.connector_config.remote_url,  # type: ignore
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.fsspec.gcs import GcsSourceConnector
-
-        return GcsSourceConnector
diff --git a/unstructured/ingest/runner/fsspec/s3.py b/unstructured/ingest/runner/fsspec/s3.py
deleted file mode 100644
index 086e2a58d..000000000
--- a/unstructured/ingest/runner/fsspec/s3.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_remote_url
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.fsspec.s3 import SimpleS3Config
-
-
-@dataclass
-class S3Runner(Runner):
-    connector_config: "SimpleS3Config"
-
-    def update_read_config(self):
-        self.read_config.download_dir = update_download_dir_remote_url(
-            connector_name="s3",
-            read_config=self.read_config,
-            remote_url=self.connector_config.remote_url,  # type: ignore
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.fsspec.s3 import S3SourceConnector
-
-        return S3SourceConnector
diff --git a/unstructured/ingest/runner/fsspec/sftp.py b/unstructured/ingest/runner/fsspec/sftp.py
deleted file mode 100644
index db73ad7e1..000000000
--- a/unstructured/ingest/runner/fsspec/sftp.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_remote_url
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.fsspec.sftp import SimpleSftpConfig
-
-
-@dataclass
-class SftpRunner(Runner):
-    connector_config: "SimpleSftpConfig"
-
-    def update_read_config(self):
-        self.read_config.download_dir = update_download_dir_remote_url(
-            connector_name="sftp",
-            read_config=self.read_config,
-            remote_url=self.connector_config.remote_url,  # type: ignore
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.fsspec.sftp import SftpSourceConnector
-
-        return SftpSourceConnector
diff --git a/unstructured/ingest/runner/github.py b/unstructured/ingest/runner/github.py
deleted file mode 100644
index 86cf191be..000000000
--- a/unstructured/ingest/runner/github.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.github import SimpleGitHubConfig
-
-
-@dataclass
-class GithubRunner(Runner):
-    connector_config: "SimpleGitHubConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            f"{self.connector_config.url}_{self.connector_config.branch}".encode(
-                "utf-8",
-            ),
-        )
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="github",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.github import (
-            GitHubSourceConnector,
-        )
-
-        return GitHubSourceConnector
diff --git a/unstructured/ingest/runner/gitlab.py b/unstructured/ingest/runner/gitlab.py
deleted file mode 100644
index c6b8e5c3a..000000000
--- a/unstructured/ingest/runner/gitlab.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.gitlab import SimpleGitlabConfig
-
-
-@dataclass
-class GitlabRunner(Runner):
-    connector_config: "SimpleGitlabConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            f"{self.connector_config.url}_{self.connector_config.branch}".encode(
-                "utf-8",
-            ),
-        )
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="gitlab",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.gitlab import (
-            GitLabSourceConnector,
-        )
-
-        return GitLabSourceConnector
diff --git a/unstructured/ingest/runner/google_drive.py b/unstructured/ingest/runner/google_drive.py
deleted file mode 100644
index 8972c9a15..000000000
--- a/unstructured/ingest/runner/google_drive.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.google_drive import SimpleGoogleDriveConfig
-
-
-@dataclass
-class GoogleDriveRunner(Runner):
-    connector_config: "SimpleGoogleDriveConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            self.connector_config.drive_id.encode("utf-8"),
-        )
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="google_drive",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.google_drive import (
-            GoogleDriveSourceConnector,
-        )
-
-        return GoogleDriveSourceConnector
diff --git a/unstructured/ingest/runner/hubspot.py b/unstructured/ingest/runner/hubspot.py
deleted file mode 100644
index 2e988e759..000000000
--- a/unstructured/ingest/runner/hubspot.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.hubspot import SimpleHubSpotConfig
-
-
-@dataclass
-class HubSpotRunner(Runner):
-    connector_config: "SimpleHubSpotConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            self.connector_config.access_config.api_token.encode("utf-8"),
-        )
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="hubspot",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.hubspot import (
-            HubSpotSourceConnector,
-        )
-
-        return HubSpotSourceConnector
diff --git a/unstructured/ingest/runner/jira.py b/unstructured/ingest/runner/jira.py
deleted file mode 100644
index d632de9d8..000000000
--- a/unstructured/ingest/runner/jira.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.jira import SimpleJiraConfig
-
-
-@dataclass
-class JiraRunner(Runner):
-    connector_config: "SimpleJiraConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            self.connector_config.url.encode("utf-8"),
-        )
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="jira",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.jira import (
-            JiraSourceConnector,
-        )
-
-        return JiraSourceConnector
diff --git a/unstructured/ingest/runner/kafka.py b/unstructured/ingest/runner/kafka.py
deleted file mode 100644
index ba8a75094..000000000
--- a/unstructured/ingest/runner/kafka.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.kafka import SimpleKafkaConfig
-
-
-@dataclass
-class KafkaRunner(Runner):
-    connector_config: "SimpleKafkaConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            str(self.connector_config.bootstrap_server).encode("utf-8"),
-        )
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="kafka",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.kafka import (
-            KafkaSourceConnector,
-        )
-
-        return KafkaSourceConnector
diff --git a/unstructured/ingest/runner/local.py b/unstructured/ingest/runner/local.py
deleted file mode 100644
index a8c4ab19c..000000000
--- a/unstructured/ingest/runner/local.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.runner.base_runner import Runner
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.local import SimpleLocalConfig
-
-
-@dataclass
-class LocalRunner(Runner):
-    connector_config: "SimpleLocalConfig"
-
-    def update_read_config(self):
-        pass
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.local import (
-            LocalSourceConnector,
-        )
-
-        return LocalSourceConnector
diff --git a/unstructured/ingest/runner/mongodb.py b/unstructured/ingest/runner/mongodb.py
deleted file mode 100644
index bdde249cd..000000000
--- a/unstructured/ingest/runner/mongodb.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.mongodb import SimpleMongoDBConfig
-
-
-@dataclass
-class MongoDBRunner(Runner):
-    connector_config: "SimpleMongoDBConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            str(self.connector_config.access_config.uri).encode("utf-8"),
-        )
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="mongodb",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.mongodb import (
-            MongoDBSourceConnector,
-        )
-
-        return MongoDBSourceConnector
diff --git a/unstructured/ingest/runner/notion.py b/unstructured/ingest/runner/notion.py
deleted file mode 100644
index ee7fd9c5e..000000000
--- a/unstructured/ingest/runner/notion.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.notion.connector import SimpleNotionConfig
-
-
-@dataclass
-class NotionRunner(Runner):
-    connector_config: "SimpleNotionConfig"
-
-    def update_read_config(self):
-        if not self.connector_config.page_ids and not self.connector_config.database_ids:
-            raise ValueError("no page ids nor database ids provided")
-
-        if self.connector_config.page_ids and self.connector_config.database_ids:
-            hashed_dir_name = hashlib.sha256(
-                "{},{}".format(
-                    ",".join(self.connector_config.page_ids),
-                    ",".join(self.connector_config.database_ids),
-                ).encode("utf-8"),
-            )
-        elif self.connector_config.page_ids:
-            hashed_dir_name = hashlib.sha256(
-                ",".join(self.connector_config.page_ids).encode("utf-8"),
-            )
-        elif self.connector_config.database_ids:
-            hashed_dir_name = hashlib.sha256(
-                ",".join(self.connector_config.database_ids).encode("utf-8"),
-            )
-        else:
-            raise ValueError("could not create local cache directory name")
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="notion",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.notion.connector import (
-            NotionSourceConnector,
-        )
-
-        return NotionSourceConnector
-
-    def get_source_connector(self) -> BaseSourceConnector:
-        source_connector_cls = self.get_source_connector_cls()
-        return source_connector_cls(
-            processor_config=self.processor_config,
-            connector_config=self.connector_config,
-            read_config=self.read_config,
-            retry_strategy_config=self.retry_strategy_config,
-        )
diff --git a/unstructured/ingest/runner/onedrive.py b/unstructured/ingest/runner/onedrive.py
deleted file mode 100644
index 6c2312614..000000000
--- a/unstructured/ingest/runner/onedrive.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.onedrive import SimpleOneDriveConfig
-
-
-@dataclass
-class OneDriveRunner(Runner):
-    connector_config: "SimpleOneDriveConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            f"{self.connector_config.tenant}_{self.connector_config.user_pname}".encode("utf-8"),
-        )
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="onedrive",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.onedrive import (
-            OneDriveSourceConnector,
-        )
-
-        return OneDriveSourceConnector
diff --git a/unstructured/ingest/runner/opensearch.py b/unstructured/ingest/runner/opensearch.py
deleted file mode 100644
index e3ce03a71..000000000
--- a/unstructured/ingest/runner/opensearch.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.opensearch import SimpleOpenSearchConfig
-
-
-@dataclass
-class OpenSearchRunner(Runner):
-    connector_config: "SimpleOpenSearchConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            "{}_{}".format(
-                ",".join(self.connector_config.access_config.hosts),
-                self.connector_config.index_name,
-            ).encode(
-                "utf-8",
-            ),
-        )
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="opensearch",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.opensearch import (
-            OpenSearchSourceConnector,
-        )
-
-        return OpenSearchSourceConnector
diff --git a/unstructured/ingest/runner/outlook.py b/unstructured/ingest/runner/outlook.py
deleted file mode 100644
index 3672dacef..000000000
--- a/unstructured/ingest/runner/outlook.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.outlook import SimpleOutlookConfig
-
-
-@dataclass
-class OutlookRunner(Runner):
-    connector_config: "SimpleOutlookConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(self.connector_config.user_email.encode("utf-8"))
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="outlook",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.outlook import (
-            OutlookSourceConnector,
-        )
-
-        return OutlookSourceConnector
diff --git a/unstructured/ingest/runner/reddit.py b/unstructured/ingest/runner/reddit.py
deleted file mode 100644
index 0d59acd74..000000000
--- a/unstructured/ingest/runner/reddit.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.reddit import SimpleRedditConfig
-
-
-@dataclass
-class RedditRunner(Runner):
-    connector_config: "SimpleRedditConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            self.connector_config.subreddit_name.encode("utf-8"),
-        )
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="reddit",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.reddit import (
-            RedditSourceConnector,
-        )
-
-        return RedditSourceConnector
diff --git a/unstructured/ingest/runner/salesforce.py b/unstructured/ingest/runner/salesforce.py
deleted file mode 100644
index 06326e556..000000000
--- a/unstructured/ingest/runner/salesforce.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.salesforce import SimpleSalesforceConfig
-
-
-@dataclass
-class SalesforceRunner(Runner):
-    connector_config: "SimpleSalesforceConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(self.connector_config.username.encode("utf-8"))
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="salesforce",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.salesforce import (
-            SalesforceSourceConnector,
-        )
-
-        return SalesforceSourceConnector
diff --git a/unstructured/ingest/runner/sharepoint.py b/unstructured/ingest/runner/sharepoint.py
deleted file mode 100644
index f5e0dd36b..000000000
--- a/unstructured/ingest/runner/sharepoint.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.sharepoint import SimpleSharepointConfig
-
-
-@dataclass
-class SharePointRunner(Runner):
-    connector_config: "SimpleSharepointConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            f"{self.connector_config.site}_{self.connector_config.path}".encode("utf-8"),
-        )
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="sharepoint",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.sharepoint import (
-            SharepointSourceConnector,
-        )
-
-        return SharepointSourceConnector
diff --git a/unstructured/ingest/runner/slack.py b/unstructured/ingest/runner/slack.py
deleted file mode 100644
index 2d4231473..000000000
--- a/unstructured/ingest/runner/slack.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import hashlib
-import typing as t
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.slack import SimpleSlackConfig
-
-
-class SlackRunner(Runner):
-    connector_config: "SimpleSlackConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            ",".join(self.connector_config.channels).encode("utf-8"),
-        )
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="slack",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.slack import (
-            SlackSourceConnector,
-        )
-
-        return SlackSourceConnector
diff --git a/unstructured/ingest/runner/utils.py b/unstructured/ingest/runner/utils.py
deleted file mode 100644
index 0816923ed..000000000
--- a/unstructured/ingest/runner/utils.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from __future__ import annotations
-
-import hashlib
-import logging
-from pathlib import Path
-
-from unstructured.ingest.interfaces import (
-    ReadConfig,
-)
-
-
-def update_download_dir_remote_url(
-    connector_name: str,
-    read_config: ReadConfig,
-    remote_url: str,
-    logger: logging.Logger,
-) -> str:
-    hashed_dir_name = hashlib.sha256(remote_url.encode("utf-8"))
-    return update_download_dir_hash(
-        connector_name=connector_name,
-        read_config=read_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-
-def update_download_dir_hash(
-    connector_name: str,
-    read_config: ReadConfig,
-    hashed_dir_name: hashlib._Hash,
-    logger: logging.Logger,
-) -> str:
-    if not read_config.download_dir:
-        cache_path = Path.home() / ".cache" / "unstructured" / "ingest"
-        if not cache_path.exists():
-            cache_path.mkdir(parents=True, exist_ok=True)
-        download_dir = cache_path / connector_name / hashed_dir_name.hexdigest()[:10]
-        if read_config.preserve_downloads:
-            logger.warning(
-                f"Preserving downloaded files but download_dir is not specified,"
-                f" using {download_dir}",
-            )
-        new_download_dir = str(download_dir)
-        logger.debug(f"updating download directory to: {new_download_dir}")
-    else:
-        new_download_dir = read_config.download_dir
-    return new_download_dir
diff --git a/unstructured/ingest/runner/wikipedia.py b/unstructured/ingest/runner/wikipedia.py
deleted file mode 100644
index 7a67dcd43..000000000
--- a/unstructured/ingest/runner/wikipedia.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import hashlib
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseSourceConnector
-from unstructured.ingest.logger import logger
-from unstructured.ingest.runner.base_runner import Runner
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.wikipedia import SimpleWikipediaConfig
-
-
-@dataclass
-class WikipediaRunner(Runner):
-    connector_config: "SimpleWikipediaConfig"
-
-    def update_read_config(self):
-        hashed_dir_name = hashlib.sha256(
-            self.connector_config.page_title.encode("utf-8"),
-        )
-
-        self.read_config.download_dir = update_download_dir_hash(
-            connector_name="wikipedia",
-            read_config=self.read_config,
-            hashed_dir_name=hashed_dir_name,
-            logger=logger,
-        )
-
-    def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
-        from unstructured.ingest.connector.wikipedia import (
-            WikipediaSourceConnector,
-        )
-
-        return WikipediaSourceConnector
diff --git a/unstructured/ingest/runner/writers/__init__.py b/unstructured/ingest/runner/writers/__init__.py
deleted file mode 100644
index 8b07adb9e..000000000
--- a/unstructured/ingest/runner/writers/__init__.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import typing as t
-
-from .astradb import AstraDBWriter
-from .azure_cognitive_search import AzureCognitiveSearchWriter
-from .base_writer import Writer
-from .chroma import ChromaWriter
-from .clarifai import ClarifaiWriter
-from .databricks_volumes import DatabricksVolumesWriter
-from .delta_table import DeltaTableWriter
-from .elasticsearch import ElasticsearchWriter
-from .fsspec.azure import AzureWriter
-from .fsspec.box import BoxWriter
-from .fsspec.dropbox import DropboxWriter
-from .fsspec.gcs import GcsWriter
-from .fsspec.s3 import S3Writer
-from .kafka import KafkaWriter
-from .mongodb import MongodbWriter
-from .opensearch import OpenSearchWriter
-from .pinecone import PineconeWriter
-from .qdrant import QdrantWriter
-from .sql import SqlWriter
-from .vectara import VectaraWriter
-from .weaviate import WeaviateWriter
-
-writer_map: t.Dict[str, t.Type[Writer]] = {
-    "astradb": AstraDBWriter,
-    "azure": AzureWriter,
-    "azure_cognitive_search": AzureCognitiveSearchWriter,
-    "box": BoxWriter,
-    "chroma": ChromaWriter,
-    "clarifai": ClarifaiWriter,
-    "databricks_volumes": DatabricksVolumesWriter,
-    "delta_table": DeltaTableWriter,
-    "dropbox": DropboxWriter,
-    "elasticsearch": ElasticsearchWriter,
-    "gcs": GcsWriter,
-    "kafka": KafkaWriter,
-    "mongodb": MongodbWriter,
-    "opensearch": OpenSearchWriter,
-    "pinecone": PineconeWriter,
-    "qdrant": QdrantWriter,
-    "s3": S3Writer,
-    "sql": SqlWriter,
-    "vectara": VectaraWriter,
-    "weaviate": WeaviateWriter,
-}
-
-__all__ = ["writer_map"]
diff --git a/unstructured/ingest/runner/writers/astradb.py b/unstructured/ingest/runner/writers/astradb.py
deleted file mode 100644
index b12ee7234..000000000
--- a/unstructured/ingest/runner/writers/astradb.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.astradb import AstraDBWriteConfig, SimpleAstraDBConfig
-
-
-@dataclass
-class AstraDBWriter(Writer, EnhancedDataClassJsonMixin):
-    write_config: "AstraDBWriteConfig"
-    connector_config: "SimpleAstraDBConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.astradb import (
-            AstraDBDestinationConnector,
-        )
-
-        return AstraDBDestinationConnector
diff --git a/unstructured/ingest/runner/writers/azure_cognitive_search.py b/unstructured/ingest/runner/writers/azure_cognitive_search.py
deleted file mode 100644
index 69204e3f3..000000000
--- a/unstructured/ingest/runner/writers/azure_cognitive_search.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.azure_cognitive_search import (
-        AzureCognitiveSearchWriteConfig,
-        SimpleAzureCognitiveSearchStorageConfig,
-    )
-
-
-@dataclass
-class AzureCognitiveSearchWriter(Writer):
-    connector_config: "SimpleAzureCognitiveSearchStorageConfig"
-    write_config: "AzureCognitiveSearchWriteConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.azure_cognitive_search import (
-            AzureCognitiveSearchDestinationConnector,
-        )
-
-        return AzureCognitiveSearchDestinationConnector
diff --git a/unstructured/ingest/runner/writers/base_writer.py b/unstructured/ingest/runner/writers/base_writer.py
deleted file mode 100644
index e28d11b07..000000000
--- a/unstructured/ingest/runner/writers/base_writer.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import typing as t
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import (
-    BaseConnectorConfig,
-    BaseDestinationConnector,
-    WriteConfig,
-)
-
-
-@dataclass
-class Writer(ABC):
-    connector_config: BaseConnectorConfig
-    write_config: WriteConfig
-
-    @abstractmethod
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        pass
-
-    def get_connector(self, **kwargs) -> BaseDestinationConnector:
-        connector_cls = self.get_connector_cls()
-        return connector_cls(
-            write_config=self.write_config,
-            connector_config=self.connector_config,
-        )
diff --git a/unstructured/ingest/runner/writers/chroma.py b/unstructured/ingest/runner/writers/chroma.py
deleted file mode 100644
index e41753d01..000000000
--- a/unstructured/ingest/runner/writers/chroma.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.chroma import ChromaWriteConfig, SimpleChromaConfig
-
-
-@dataclass
-class ChromaWriter(Writer, EnhancedDataClassJsonMixin):
-    write_config: "ChromaWriteConfig"
-    connector_config: "SimpleChromaConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.chroma import (
-            ChromaDestinationConnector,
-        )
-
-        return ChromaDestinationConnector
diff --git a/unstructured/ingest/runner/writers/clarifai.py b/unstructured/ingest/runner/writers/clarifai.py
deleted file mode 100644
index 9742e1eee..000000000
--- a/unstructured/ingest/runner/writers/clarifai.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.clarifai import ClarifaiWriteConfig, SimpleClarifaiConfig
-
-
-@dataclass
-class ClarifaiWriter(Writer):
-    write_config: "ClarifaiWriteConfig"
-    connector_config: "SimpleClarifaiConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.clarifai import ClarifaiDestinationConnector
-
-        return ClarifaiDestinationConnector
diff --git a/unstructured/ingest/runner/writers/databricks_volumes.py b/unstructured/ingest/runner/writers/databricks_volumes.py
deleted file mode 100644
index 74703f850..000000000
--- a/unstructured/ingest/runner/writers/databricks_volumes.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.databricks_volumes import (
-        DatabricksVolumesWriteConfig,
-        SimpleDatabricksVolumesConfig,
-    )
-
-
-@dataclass
-class DatabricksVolumesWriter(Writer, EnhancedDataClassJsonMixin):
-    write_config: "DatabricksVolumesWriteConfig"
-    connector_config: "SimpleDatabricksVolumesConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.databricks_volumes import (
-            DatabricksVolumesDestinationConnector,
-        )
-
-        return DatabricksVolumesDestinationConnector
diff --git a/unstructured/ingest/runner/writers/delta_table.py b/unstructured/ingest/runner/writers/delta_table.py
deleted file mode 100644
index 6337e03d9..000000000
--- a/unstructured/ingest/runner/writers/delta_table.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.delta_table import (
-        DeltaTableWriteConfig,
-        SimpleDeltaTableConfig,
-    )
-
-
-@dataclass
-class DeltaTableWriter(Writer):
-    write_config: "DeltaTableWriteConfig"
-    connector_config: "SimpleDeltaTableConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.delta_table import (
-            DeltaTableDestinationConnector,
-        )
-
-        return DeltaTableDestinationConnector
diff --git a/unstructured/ingest/runner/writers/elasticsearch.py b/unstructured/ingest/runner/writers/elasticsearch.py
deleted file mode 100644
index 7ce8b451f..000000000
--- a/unstructured/ingest/runner/writers/elasticsearch.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.elasticsearch import (
-        ElasticsearchWriteConfig,
-        SimpleElasticsearchConfig,
-    )
-
-
-@dataclass
-class ElasticsearchWriter(Writer):
-    connector_config: "SimpleElasticsearchConfig"
-    write_config: "ElasticsearchWriteConfig"
-
-    def get_connector_cls(self) -> BaseDestinationConnector:
-        from unstructured.ingest.connector.elasticsearch import (
-            ElasticsearchDestinationConnector,
-        )
-
-        return ElasticsearchDestinationConnector
diff --git a/unstructured/ingest/runner/writers/fsspec/__init__.py b/unstructured/ingest/runner/writers/fsspec/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/unstructured/ingest/runner/writers/fsspec/azure.py b/unstructured/ingest/runner/writers/fsspec/azure.py
deleted file mode 100644
index 66835898e..000000000
--- a/unstructured/ingest/runner/writers/fsspec/azure.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.fsspec.azure import (
-        AzureWriteConfig,
-        SimpleAzureBlobStorageConfig,
-    )
-
-
-@dataclass
-class AzureWriter(Writer):
-    connector_config: "SimpleAzureBlobStorageConfig"
-    write_config: "AzureWriteConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.fsspec.azure import (
-            AzureBlobStorageDestinationConnector,
-        )
-
-        return AzureBlobStorageDestinationConnector
diff --git a/unstructured/ingest/runner/writers/fsspec/box.py b/unstructured/ingest/runner/writers/fsspec/box.py
deleted file mode 100644
index 5f4599a40..000000000
--- a/unstructured/ingest/runner/writers/fsspec/box.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.fsspec.box import BoxWriteConfig, SimpleBoxConfig
-
-
-@dataclass
-class BoxWriter(Writer):
-    connector_config: "SimpleBoxConfig"
-    write_config: "BoxWriteConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.fsspec.box import (
-            BoxDestinationConnector,
-        )
-
-        return BoxDestinationConnector
diff --git a/unstructured/ingest/runner/writers/fsspec/dropbox.py b/unstructured/ingest/runner/writers/fsspec/dropbox.py
deleted file mode 100644
index 0c9389079..000000000
--- a/unstructured/ingest/runner/writers/fsspec/dropbox.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.fsspec.dropbox import DropboxWriteConfig, SimpleDropboxConfig
-
-
-@dataclass
-class DropboxWriter(Writer):
-    connector_config: "SimpleDropboxConfig"
-    write_config: "DropboxWriteConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.fsspec.dropbox import (
-            DropboxDestinationConnector,
-        )
-
-        return DropboxDestinationConnector
diff --git a/unstructured/ingest/runner/writers/fsspec/gcs.py b/unstructured/ingest/runner/writers/fsspec/gcs.py
deleted file mode 100644
index 728a109d2..000000000
--- a/unstructured/ingest/runner/writers/fsspec/gcs.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.fsspec.gcs import GcsWriteConfig, SimpleGcsConfig
-
-
-@dataclass
-class GcsWriter(Writer):
-    connector_config: "SimpleGcsConfig"
-    write_config: "GcsWriteConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.fsspec.gcs import GcsDestinationConnector
-
-        return GcsDestinationConnector
diff --git a/unstructured/ingest/runner/writers/fsspec/s3.py b/unstructured/ingest/runner/writers/fsspec/s3.py
deleted file mode 100644
index 64d2b3131..000000000
--- a/unstructured/ingest/runner/writers/fsspec/s3.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.fsspec.s3 import S3WriteConfig, SimpleS3Config
-
-
-@dataclass
-class S3Writer(Writer):
-    connector_config: "SimpleS3Config"
-    write_config: "S3WriteConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.fsspec.s3 import (
-            S3DestinationConnector,
-        )
-
-        return S3DestinationConnector
diff --git a/unstructured/ingest/runner/writers/kafka.py b/unstructured/ingest/runner/writers/kafka.py
deleted file mode 100644
index f8e5a3e3d..000000000
--- a/unstructured/ingest/runner/writers/kafka.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.kafka import KafkaWriteConfig, SimpleKafkaConfig
-
-
-@dataclass
-class KafkaWriter(Writer):
-    write_config: "KafkaWriteConfig"
-    connector_config: "SimpleKafkaConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.kafka import (
-            KafkaDestinationConnector,
-        )
-
-        return KafkaDestinationConnector
diff --git a/unstructured/ingest/runner/writers/mongodb.py b/unstructured/ingest/runner/writers/mongodb.py
deleted file mode 100644
index 5798a0161..000000000
--- a/unstructured/ingest/runner/writers/mongodb.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.mongodb import MongoDBWriteConfig, SimpleMongoDBConfig
-
-
-@dataclass
-class MongodbWriter(Writer):
-    write_config: "MongoDBWriteConfig"
-    connector_config: "SimpleMongoDBConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.mongodb import (
-            MongoDBDestinationConnector,
-        )
-
-        return MongoDBDestinationConnector
diff --git a/unstructured/ingest/runner/writers/opensearch.py b/unstructured/ingest/runner/writers/opensearch.py
deleted file mode 100644
index f0c62b578..000000000
--- a/unstructured/ingest/runner/writers/opensearch.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.elasticsearch import (
-        ElasticsearchWriteConfig,
-    )
-    from unstructured.ingest.connector.opensearch import (
-        SimpleOpenSearchConfig,
-    )
-
-
-@dataclass
-class OpenSearchWriter(Writer):
-    connector_config: "SimpleOpenSearchConfig"
-    write_config: "ElasticsearchWriteConfig"
-
-    def get_connector_cls(self) -> BaseDestinationConnector:
-        from unstructured.ingest.connector.opensearch import (
-            OpenSearchDestinationConnector,
-        )
-
-        return OpenSearchDestinationConnector
diff --git a/unstructured/ingest/runner/writers/pinecone.py b/unstructured/ingest/runner/writers/pinecone.py
deleted file mode 100644
index 86fd9580a..000000000
--- a/unstructured/ingest/runner/writers/pinecone.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.pinecone import PineconeWriteConfig, SimplePineconeConfig
-
-
-@dataclass
-class PineconeWriter(Writer):
-    write_config: "PineconeWriteConfig"
-    connector_config: "SimplePineconeConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.pinecone import (
-            PineconeDestinationConnector,
-        )
-
-        return PineconeDestinationConnector
diff --git a/unstructured/ingest/runner/writers/qdrant.py b/unstructured/ingest/runner/writers/qdrant.py
deleted file mode 100644
index e7e632405..000000000
--- a/unstructured/ingest/runner/writers/qdrant.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.qdrant import QdrantWriteConfig, SimpleQdrantConfig
-
-
-@dataclass
-class QdrantWriter(Writer):
-    write_config: "QdrantWriteConfig"
-    connector_config: "SimpleQdrantConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.qdrant import QdrantDestinationConnector
-
-        return QdrantDestinationConnector
diff --git a/unstructured/ingest/runner/writers/sql.py b/unstructured/ingest/runner/writers/sql.py
deleted file mode 100644
index 70c710a1f..000000000
--- a/unstructured/ingest/runner/writers/sql.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.sql import SimpleSqlConfig
-    from unstructured.ingest.interfaces import WriteConfig
-
-
-@dataclass
-class SqlWriter(Writer):
-    write_config: "WriteConfig"
-    connector_config: "SimpleSqlConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.sql import (
-            SqlDestinationConnector,
-        )
-
-        return SqlDestinationConnector
diff --git a/unstructured/ingest/runner/writers/vectara.py b/unstructured/ingest/runner/writers/vectara.py
deleted file mode 100644
index f29128022..000000000
--- a/unstructured/ingest/runner/writers/vectara.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.vectara import SimpleVectaraConfig, VectaraWriteConfig
-
-
-@dataclass
-class VectaraWriter(Writer, EnhancedDataClassJsonMixin):
-    write_config: "VectaraWriteConfig"
-    connector_config: "SimpleVectaraConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.vectara import (
-            VectaraDestinationConnector,
-        )
-
-        return VectaraDestinationConnector
diff --git a/unstructured/ingest/runner/writers/weaviate.py b/unstructured/ingest/runner/writers/weaviate.py
deleted file mode 100644
index 96c7b0071..000000000
--- a/unstructured/ingest/runner/writers/weaviate.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import typing as t
-from dataclasses import dataclass
-
-from unstructured.ingest.interfaces import BaseDestinationConnector
-from unstructured.ingest.runner.writers.base_writer import Writer
-
-if t.TYPE_CHECKING:
-    from unstructured.ingest.connector.weaviate import SimpleWeaviateConfig, WeaviateWriteConfig
-
-
-@dataclass
-class WeaviateWriter(Writer):
-    write_config: "WeaviateWriteConfig"
-    connector_config: "SimpleWeaviateConfig"
-
-    def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
-        from unstructured.ingest.connector.weaviate import (
-            WeaviateDestinationConnector,
-        )
-
-        return WeaviateDestinationConnector
diff --git a/unstructured/ingest/utils/__init__.py b/unstructured/ingest/utils/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/unstructured/ingest/utils/compression.py b/unstructured/ingest/utils/compression.py
deleted file mode 100644
index 41f4b3240..000000000
--- a/unstructured/ingest/utils/compression.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import copy
-import os
-import sys
-import tarfile
-import zipfile
-from dataclasses import dataclass
-from pathlib import Path
-from typing import List, Optional
-
-from unstructured.ingest.connector.local import LocalSourceConnector, SimpleLocalConfig
-from unstructured.ingest.interfaces import (
-    BaseConnectorConfig,
-    BaseSingleIngestDoc,
-    ProcessorConfig,
-    ReadConfig,
-)
-from unstructured.ingest.logger import logger
-
-ZIP_FILE_EXT = [".zip"]
-TAR_FILE_EXT = [".tar", ".tar.gz", ".tgz"]
-
-
-def uncompress_file(filename: str, path: Optional[str] = None) -> str:
-    """
-    Takes in a compressed zip or tar file and uncompresses it
-    """
-    # Create path if it doesn't already exist
-    if path:
-        Path(path).mkdir(parents=True, exist_ok=True)
-
-    if any(filename.endswith(ext) for ext in ZIP_FILE_EXT):
-        return uncompress_zip_file(zip_filename=filename, path=path)
-    elif any(filename.endswith(ext) for ext in TAR_FILE_EXT):
-        return uncompress_tar_file(tar_filename=filename, path=path)
-    else:
-        raise ValueError(
-            "filename {} not a recognized compressed extension: {}".format(
-                filename,
-                ", ".join(ZIP_FILE_EXT + TAR_FILE_EXT),
-            ),
-        )
-
-
-def uncompress_zip_file(zip_filename: str, path: Optional[str] = None) -> str:
-    head, tail = os.path.split(zip_filename)
-    for ext in ZIP_FILE_EXT:
-        if tail.endswith(ext):
-            tail = tail[: -(len(ext))]
-            break
-    path = path if path else os.path.join(head, f"{tail}-zip-uncompressed")
-    logger.info(f"extracting zip {zip_filename} -> {path}")
-    with zipfile.ZipFile(zip_filename) as zfile:
-        zfile.extractall(path=path)
-    return path
-
-
-def uncompress_tar_file(tar_filename: str, path: Optional[str] = None) -> str:
-    head, tail = os.path.split(tar_filename)
-    for ext in TAR_FILE_EXT:
-        if tail.endswith(ext):
-            tail = tail[: -(len(ext))]
-            break
-
-    path = path if path else os.path.join(head, f"{tail}-tar-uncompressed")
-    logger.info(f"extracting tar {tar_filename} -> {path}")
-    with tarfile.open(tar_filename, "r:gz") as tfile:
-        # NOTE(robinson: Mitigate against malicious content being extracted from the tar file.
-        # This was added in Python 3.12
-        # Ref: https://docs.python.org/3/library/tarfile.html#extraction-filters
-        if sys.version_info >= (3, 12):
-            tfile.extraction_filter = tarfile.tar_filter
-        else:
-            logger.warning(
-                "Extraction filtering for tar files is available for Python 3.12 and above. "
-                "Consider upgrading your Python version to improve security. "
-                "See https://docs.python.org/3/library/tarfile.html#extraction-filters"
-            )
-        tfile.extractall(path=path)
-    return path
-
-
-@dataclass
-class CompressionSourceConnectorMixin:
-    processor_config: ProcessorConfig
-    read_config: ReadConfig
-    connector_config: BaseConnectorConfig
-
-    def process_compressed_doc(self, doc: BaseSingleIngestDoc) -> List[BaseSingleIngestDoc]:
-        """
-        Utility function which helps process compressed files. Extracts the contents and returns
-        generated ingest docs via local source connector
-        """
-        # Download the raw file to local
-        doc.get_file()
-        path = uncompress_file(filename=str(doc.filename))
-        new_read_configs = copy.copy(self.read_config)
-        new_process_configs = copy.copy(self.processor_config)
-        relative_path = path.replace(self.read_config.download_dir, "")
-
-        if self.processor_config.output_dir.endswith(os.sep):
-            new_process_configs.output_dir = f"{self.processor_config.output_dir}{relative_path}"
-        else:
-            new_process_configs.output_dir = (
-                f"{self.processor_config.output_dir}{os.sep}{relative_path}"
-            )
-
-        local_connector = LocalSourceConnector(
-            connector_config=SimpleLocalConfig(
-                input_path=path,
-                recursive=True,
-            ),
-            read_config=new_read_configs,
-            processor_config=new_process_configs,
-        )
-        logger.info(f"Created local source connector: {local_connector.to_json()}")
-        local_connector.initialize()
-        return local_connector.get_ingest_docs()
diff --git a/unstructured/ingest/utils/data_prep.py b/unstructured/ingest/utils/data_prep.py
deleted file mode 100644
index 722de16e4..000000000
--- a/unstructured/ingest/utils/data_prep.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import itertools
-import json
-
-
-def batch_generator(iterable, batch_size=100):
-    """A helper function to break an iterable into batches of size batch_size."""
-    it = iter(iterable)
-    chunk = tuple(itertools.islice(it, batch_size))
-    while chunk:
-        yield chunk
-        chunk = tuple(itertools.islice(it, batch_size))
-
-
-def generator_batching_wbytes(iterable, batch_size_limit_bytes=15_000_000):
-    """A helper function to break an iterable into chunks of specified bytes."""
-    current_batch, current_batch_size = [], 0
-
-    for item in iterable:
-        item_size_bytes = len(json.dumps(item).encode("utf-8"))
-
-        if current_batch_size + item_size_bytes <= batch_size_limit_bytes:
-            current_batch.append(item)
-            current_batch_size += item_size_bytes
-        else:
-            yield current_batch
-            current_batch, current_batch_size = [item], item_size_bytes
-
-    if current_batch:
-        yield current_batch
diff --git a/unstructured/ingest/utils/string_and_date_utils.py b/unstructured/ingest/utils/string_and_date_utils.py
deleted file mode 100644
index 89f1ca84d..000000000
--- a/unstructured/ingest/utils/string_and_date_utils.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import json
-import typing as t
-from datetime import datetime
-
-from dateutil import parser
-
-
-def json_to_dict(json_string: str) -> t.Union[str, t.Dict[str, t.Any]]:
-    """Helper function attempts to deserialize json string to a dictionary."""
-    try:
-        return json.loads(json_string)
-    except json.JSONDecodeError:
-        # Not neccessary an error if it is a path or malformed json
-        pass
-    try:
-        # This is common when single quotes are used instead of double quotes
-        return json.loads(json_string.replace("'", '"'))
-    except json.JSONDecodeError:
-        # Not neccessary an error if it is a path
-        pass
-    return json_string
-
-
-def ensure_isoformat_datetime(timestamp: t.Union[datetime, str]) -> str:
-    """
-    Ensures that the input value is converted to an ISO format datetime string.
-    Handles both datetime objects and strings.
-    """
-    if isinstance(timestamp, datetime):
-        return timestamp.isoformat()
-    elif isinstance(timestamp, str):
-        try:
-            # Parse the datetime string in various formats
-            dt = parser.parse(timestamp)
-            return dt.isoformat()
-        except ValueError as e:
-            raise ValueError(f"String '{timestamp}' could not be parsed as a datetime.") from e
-    else:
-        raise TypeError(f"Expected input type datetime or str, but got {type(timestamp)}.")
diff --git a/unstructured/ingest/utils/table.py b/unstructured/ingest/utils/table.py
deleted file mode 100644
index 65fd7b92f..000000000
--- a/unstructured/ingest/utils/table.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import typing as t
-
-import pandas as pd
-
-from unstructured.staging.base import flatten_dict, get_default_pandas_dtypes
-
-
-def convert_to_pandas_dataframe(
-    elements_dict: t.List[t.Dict[str, t.Any]],
-    drop_empty_cols: bool = False,
-) -> pd.DataFrame:
-    # Flatten metadata if it hasn't already been flattened
-    for d in elements_dict:
-        if metadata := d.pop("metadata", None):
-            d.update(flatten_dict(metadata, keys_to_omit=["data_source_record_locator"]))
-
-    df = pd.DataFrame.from_dict(
-        elements_dict,
-    )
-    dt = {k: v for k, v in get_default_pandas_dtypes().items() if k in df.columns}
-    df = df.astype(dt)
-    if drop_empty_cols:
-        df.dropna(axis=1, how="all", inplace=True)
-    return df
diff --git a/unstructured/ingest/v2/README.md b/unstructured/ingest/v2/README.md
deleted file mode 100644
index f7291aa5a..000000000
--- a/unstructured/ingest/v2/README.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# Ingest
-![Project unmaintained](https://img.shields.io/badge/project-unmaintained-red.svg)
-
-Project has been moved to: [Unstructured Ingest](https://github.com/Unstructured-IO/unstructured-ingest)
-
-This python module will be removed from this repo in the near future.
diff --git a/unstructured/ingest/v2/__init__.py b/unstructured/ingest/v2/__init__.py
deleted file mode 100644
index 9d48db4f9..000000000
--- a/unstructured/ingest/v2/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from __future__ import annotations
diff --git a/unstructured/ingest/v2/assets/pipeline.excalidraw b/unstructured/ingest/v2/assets/pipeline.excalidraw
deleted file mode 100644
index d59bc99dd..000000000
--- a/unstructured/ingest/v2/assets/pipeline.excalidraw
+++ /dev/null
@@ -1,1417 +0,0 @@
-{
-  "type": "excalidraw",
-  "version": 2,
-  "source": "https://excalidraw.com",
-  "elements": [
-    {
-      "id": "Y3a1yUDvwFK9AB6KmSl9a",
-      "type": "rectangle",
-      "x": 637.48046875,
-      "y": 239.11328125,
-      "width": 322.44921875,
-      "height": 97.015625,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#ffec99",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b1a",
-      "roundness": {
-        "type": 3
-      },
-      "seed": 2131406971,
-      "version": 139,
-      "versionNonce": 1482689781,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "7paHS6cDsoMgh1vsOhizN"
-        },
-        {
-          "id": "e6DNVpQ-gH7v6WNDWWSPD",
-          "type": "arrow"
-        }
-      ],
-      "updated": 1715951675553,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "7paHS6cDsoMgh1vsOhizN",
-      "type": "text",
-      "x": 759.9351119995117,
-      "y": 275.12109375,
-      "width": 77.53993225097656,
-      "height": 25,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b1b",
-      "roundness": null,
-      "seed": 860081397,
-      "version": 12,
-      "versionNonce": 1588840341,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1715951674833,
-      "link": null,
-      "locked": false,
-      "text": "Indexing",
-      "fontSize": 20,
-      "fontFamily": 1,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "Y3a1yUDvwFK9AB6KmSl9a",
-      "originalText": "Indexing",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "type": "rectangle",
-      "version": 205,
-      "versionNonce": 1999066491,
-      "index": "b1c",
-      "isDeleted": false,
-      "id": "LZrKOvKX6nGWVOrEpPaPS",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 637.244140625,
-      "y": 406.7421875,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#ffec99",
-      "width": 322.44921875,
-      "height": 97.015625,
-      "seed": 882087163,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": {
-        "type": 3
-      },
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "SjYgGO3cAHPreH7mJVBdm"
-        },
-        {
-          "id": "e6DNVpQ-gH7v6WNDWWSPD",
-          "type": "arrow"
-        },
-        {
-          "id": "Dn6kngn7QXyxmlCbzgO2R",
-          "type": "arrow"
-        }
-      ],
-      "updated": 1715951678396,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "text",
-      "version": 88,
-      "versionNonce": 1992691451,
-      "index": "b1d",
-      "isDeleted": false,
-      "id": "SjYgGO3cAHPreH7mJVBdm",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 741.9687957763672,
-      "y": 442.75,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "width": 112.99990844726562,
-      "height": 25,
-      "seed": 820854171,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1715951530614,
-      "link": null,
-      "locked": false,
-      "fontSize": 20,
-      "fontFamily": 1,
-      "text": "Downloading",
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "LZrKOvKX6nGWVOrEpPaPS",
-      "originalText": "Downloading",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "type": "rectangle",
-      "version": 252,
-      "versionNonce": 1617745173,
-      "index": "b1e",
-      "isDeleted": false,
-      "id": "62UjU0YjVR7TvLe7hLQCV",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "dotted",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 644.884765625,
-      "y": 586.75390625,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#ffec99",
-      "width": 322.44921875,
-      "height": 97.015625,
-      "seed": 1549110491,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": {
-        "type": 3
-      },
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "vRabBFX0KOEkJ6d4rZF5D"
-        },
-        {
-          "id": "Dn6kngn7QXyxmlCbzgO2R",
-          "type": "arrow"
-        },
-        {
-          "id": "0Q1io01It2PX9ESFiW49G",
-          "type": "arrow"
-        }
-      ],
-      "updated": 1715951680142,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "text",
-      "version": 146,
-      "versionNonce": 1440901275,
-      "index": "b1f",
-      "isDeleted": false,
-      "id": "vRabBFX0KOEkJ6d4rZF5D",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 739.9794387817383,
-      "y": 622.76171875,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "width": 132.25987243652344,
-      "height": 25,
-      "seed": 560281979,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1715951539363,
-      "link": null,
-      "locked": false,
-      "fontSize": 20,
-      "fontFamily": 1,
-      "text": "Uncompressing",
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "62UjU0YjVR7TvLe7hLQCV",
-      "originalText": "Uncompressing",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "type": "rectangle",
-      "version": 329,
-      "versionNonce": 1236647227,
-      "index": "b1g",
-      "isDeleted": false,
-      "id": "GZLTgdXXsgXo-4rDdd7BN",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 642.740234375,
-      "y": 752.87109375,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#a5d8ff",
-      "width": 322.44921875,
-      "height": 97.015625,
-      "seed": 857787003,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": {
-        "type": 3
-      },
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "3nbrNuxDWK3BIkJVVUKYs"
-        },
-        {
-          "id": "0Q1io01It2PX9ESFiW49G",
-          "type": "arrow"
-        },
-        {
-          "id": "5rxlnALV4R8RNKSSzjawZ",
-          "type": "arrow"
-        }
-      ],
-      "updated": 1715951692576,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "text",
-      "version": 237,
-      "versionNonce": 1218981717,
-      "index": "b1h",
-      "isDeleted": false,
-      "id": "3nbrNuxDWK3BIkJVVUKYs",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 748.6249008178711,
-      "y": 788.87890625,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "width": 110.67988586425781,
-      "height": 25,
-      "seed": 590856987,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1715951571504,
-      "link": null,
-      "locked": false,
-      "fontSize": 20,
-      "fontFamily": 1,
-      "text": "Partitioning",
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "GZLTgdXXsgXo-4rDdd7BN",
-      "originalText": "Partitioning",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "type": "rectangle",
-      "version": 425,
-      "versionNonce": 1862353237,
-      "index": "b1i",
-      "isDeleted": false,
-      "id": "JGKFyGpX1KS2mJhIpFiBT",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "dotted",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 642.431640625,
-      "y": 916.02734375,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#eebefa",
-      "width": 322.44921875,
-      "height": 97.015625,
-      "seed": 1945073307,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": {
-        "type": 3
-      },
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "mPevqaKIOyvM1_XLXsPLZ"
-        },
-        {
-          "id": "5rxlnALV4R8RNKSSzjawZ",
-          "type": "arrow"
-        },
-        {
-          "id": "xsN-wlmdU5K7UGi95CYsI",
-          "type": "arrow"
-        }
-      ],
-      "updated": 1715951696070,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "text",
-      "version": 340,
-      "versionNonce": 937753339,
-      "index": "b1j",
-      "isDeleted": false,
-      "id": "mPevqaKIOyvM1_XLXsPLZ",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 765.1862869262695,
-      "y": 952.03515625,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "width": 76.93992614746094,
-      "height": 25,
-      "seed": 161213243,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1715951559401,
-      "link": null,
-      "locked": false,
-      "fontSize": 20,
-      "fontFamily": 1,
-      "text": "Chunking",
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "JGKFyGpX1KS2mJhIpFiBT",
-      "originalText": "Chunking",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "type": "rectangle",
-      "version": 527,
-      "versionNonce": 1327555355,
-      "index": "b1k",
-      "isDeleted": false,
-      "id": "7SOrKIkV23-VpsfKkBWnF",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "dotted",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 641.716796875,
-      "y": 1079.15234375,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#eebefa",
-      "width": 322.44921875,
-      "height": 97.015625,
-      "seed": 1437476219,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": {
-        "type": 3
-      },
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "-UFDNMIXOpAYsEf9ubpNz"
-        },
-        {
-          "id": "xsN-wlmdU5K7UGi95CYsI",
-          "type": "arrow"
-        },
-        {
-          "id": "foUafDsehtG66kl3x246k",
-          "type": "arrow"
-        }
-      ],
-      "updated": 1715951698569,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "text",
-      "version": 451,
-      "versionNonce": 1228878331,
-      "index": "b1l",
-      "isDeleted": false,
-      "id": "-UFDNMIXOpAYsEf9ubpNz",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 756.0714492797852,
-      "y": 1115.16015625,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "width": 93.73991394042969,
-      "height": 25,
-      "seed": 1633795611,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1715951569483,
-      "link": null,
-      "locked": false,
-      "fontSize": 20,
-      "fontFamily": 1,
-      "text": "Embedding",
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "7SOrKIkV23-VpsfKkBWnF",
-      "originalText": "Embedding",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "type": "rectangle",
-      "version": 421,
-      "versionNonce": 1862165339,
-      "index": "b1m",
-      "isDeleted": false,
-      "id": "JncRqJ0FdwNeHFO0WQj7j",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "dotted",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 641.271484375,
-      "y": 1250.0859375,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#ffc9c9",
-      "width": 322.44921875,
-      "height": 97.015625,
-      "seed": 207501755,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": {
-        "type": 3
-      },
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "4aD6_9mkOZYxvLuujjZJ3"
-        },
-        {
-          "id": "foUafDsehtG66kl3x246k",
-          "type": "arrow"
-        },
-        {
-          "id": "bZvxt2MfEmkgYplJGYvAF",
-          "type": "arrow"
-        }
-      ],
-      "updated": 1715951685444,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "text",
-      "version": 335,
-      "versionNonce": 1654728507,
-      "index": "b1n",
-      "isDeleted": false,
-      "id": "4aD6_9mkOZYxvLuujjZJ3",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 767.2161254882812,
-      "y": 1286.09375,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "width": 70.5599365234375,
-      "height": 25,
-      "seed": 696601179,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1715951578801,
-      "link": null,
-      "locked": false,
-      "fontSize": 20,
-      "fontFamily": 1,
-      "text": "Staging",
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "JncRqJ0FdwNeHFO0WQj7j",
-      "originalText": "Staging",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "type": "rectangle",
-      "version": 405,
-      "versionNonce": 2565851,
-      "index": "b1o",
-      "isDeleted": false,
-      "id": "YZqdS6HqxV0eCvZhb-1TG",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 637.533203125,
-      "y": 1406.921875,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#ffc9c9",
-      "width": 322.44921875,
-      "height": 97.015625,
-      "seed": 586095477,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": {
-        "type": 3
-      },
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "X0wnY-7I3y5NxPAIay-cU"
-        },
-        {
-          "id": "bZvxt2MfEmkgYplJGYvAF",
-          "type": "arrow"
-        }
-      ],
-      "updated": 1715952782049,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "text",
-      "version": 327,
-      "versionNonce": 236892981,
-      "index": "b1p",
-      "isDeleted": false,
-      "id": "X0wnY-7I3y5NxPAIay-cU",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 754.2878494262695,
-      "y": 1442.9296875,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "width": 88.93992614746094,
-      "height": 25,
-      "seed": 1170597077,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1715952784484,
-      "link": null,
-      "locked": false,
-      "fontSize": 20,
-      "fontFamily": 1,
-      "text": "Uploading",
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "YZqdS6HqxV0eCvZhb-1TG",
-      "originalText": "Uploading",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "e6DNVpQ-gH7v6WNDWWSPD",
-      "type": "arrow",
-      "x": 792.36328125,
-      "y": 344.94140625,
-      "width": 0,
-      "height": 56.38671875,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b1r",
-      "roundness": {
-        "type": 2
-      },
-      "seed": 1826370165,
-      "version": 50,
-      "versionNonce": 1269906229,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1715951643784,
-      "link": null,
-      "locked": false,
-      "points": [
-        [
-          0,
-          0
-        ],
-        [
-          0,
-          56.38671875
-        ]
-      ],
-      "lastCommittedPoint": null,
-      "startBinding": {
-        "elementId": "Y3a1yUDvwFK9AB6KmSl9a",
-        "focus": 0.03933516663234279,
-        "gap": 8.8125
-      },
-      "endBinding": {
-        "elementId": "LZrKOvKX6nGWVOrEpPaPS",
-        "focus": -0.037869335045489234,
-        "gap": 5.4140625
-      },
-      "startArrowhead": null,
-      "endArrowhead": "arrow"
-    },
-    {
-      "id": "Dn6kngn7QXyxmlCbzgO2R",
-      "type": "arrow",
-      "x": 796.0859375,
-      "y": 512.30078125,
-      "width": 0,
-      "height": 62.3828125,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b1s",
-      "roundness": {
-        "type": 2
-      },
-      "seed": 414059669,
-      "version": 60,
-      "versionNonce": 138024373,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1715951647788,
-      "link": null,
-      "locked": false,
-      "points": [
-        [
-          0,
-          0
-        ],
-        [
-          0,
-          62.3828125
-        ]
-      ],
-      "lastCommittedPoint": null,
-      "startBinding": {
-        "elementId": "LZrKOvKX6nGWVOrEpPaPS",
-        "focus": 0.014779458974887034,
-        "gap": 8.54296875
-      },
-      "endBinding": {
-        "elementId": "62UjU0YjVR7TvLe7hLQCV",
-        "focus": -0.06217064217960677,
-        "gap": 12.0703125
-      },
-      "startArrowhead": null,
-      "endArrowhead": "arrow"
-    },
-    {
-      "id": "0Q1io01It2PX9ESFiW49G",
-      "type": "arrow",
-      "x": 796.01953125,
-      "y": 695.125,
-      "width": 0,
-      "height": 47.18359375,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b1t",
-      "roundness": {
-        "type": 2
-      },
-      "seed": 2076044405,
-      "version": 53,
-      "versionNonce": 518155253,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1715951652693,
-      "link": null,
-      "locked": false,
-      "points": [
-        [
-          0,
-          0
-        ],
-        [
-          0,
-          47.18359375
-        ]
-      ],
-      "lastCommittedPoint": null,
-      "startBinding": {
-        "elementId": "62UjU0YjVR7TvLe7hLQCV",
-        "focus": 0.06258252874120199,
-        "gap": 11.35546875
-      },
-      "endBinding": {
-        "elementId": "GZLTgdXXsgXo-4rDdd7BN",
-        "focus": -0.049281015663803655,
-        "gap": 10.5625
-      },
-      "startArrowhead": null,
-      "endArrowhead": "arrow"
-    },
-    {
-      "id": "5rxlnALV4R8RNKSSzjawZ",
-      "type": "arrow",
-      "x": 796.625,
-      "y": 862.3984375,
-      "width": 0,
-      "height": 40.19921875,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b1u",
-      "roundness": {
-        "type": 2
-      },
-      "seed": 343257781,
-      "version": 31,
-      "versionNonce": 60053493,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1715951657891,
-      "link": null,
-      "locked": false,
-      "points": [
-        [
-          0,
-          0
-        ],
-        [
-          0,
-          40.19921875
-        ]
-      ],
-      "lastCommittedPoint": null,
-      "startBinding": {
-        "elementId": "GZLTgdXXsgXo-4rDdd7BN",
-        "focus": 0.04552557936690613,
-        "gap": 12.51171875
-      },
-      "endBinding": {
-        "elementId": "JGKFyGpX1KS2mJhIpFiBT",
-        "focus": -0.0436115182865519,
-        "gap": 13.4296875
-      },
-      "startArrowhead": null,
-      "endArrowhead": "arrow"
-    },
-    {
-      "id": "xsN-wlmdU5K7UGi95CYsI",
-      "type": "arrow",
-      "x": 795.421875,
-      "y": 1024.8828125,
-      "width": 0,
-      "height": 39.421875,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b1v",
-      "roundness": {
-        "type": 2
-      },
-      "seed": 1318887093,
-      "version": 38,
-      "versionNonce": 303905173,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1715951661064,
-      "link": null,
-      "locked": false,
-      "points": [
-        [
-          0,
-          0
-        ],
-        [
-          0,
-          39.421875
-        ]
-      ],
-      "lastCommittedPoint": null,
-      "startBinding": {
-        "elementId": "JGKFyGpX1KS2mJhIpFiBT",
-        "focus": 0.05107393363780634,
-        "gap": 11.83984375
-      },
-      "endBinding": {
-        "elementId": "7SOrKIkV23-VpsfKkBWnF",
-        "focus": -0.04664009594534023,
-        "gap": 14.84765625
-      },
-      "startArrowhead": null,
-      "endArrowhead": "arrow"
-    },
-    {
-      "id": "foUafDsehtG66kl3x246k",
-      "type": "arrow",
-      "x": 792.3203125,
-      "y": 1187.8671875,
-      "width": 0,
-      "height": 44.78515625,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b1w",
-      "roundness": {
-        "type": 2
-      },
-      "seed": 1280415829,
-      "version": 34,
-      "versionNonce": 1235268021,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1715951664610,
-      "link": null,
-      "locked": false,
-      "points": [
-        [
-          0,
-          0
-        ],
-        [
-          0,
-          44.78515625
-        ]
-      ],
-      "lastCommittedPoint": null,
-      "startBinding": {
-        "elementId": "7SOrKIkV23-VpsfKkBWnF",
-        "focus": 0.06587762123396368,
-        "gap": 11.69921875
-      },
-      "endBinding": {
-        "elementId": "JncRqJ0FdwNeHFO0WQj7j",
-        "focus": -0.06311555840914873,
-        "gap": 17.43359375
-      },
-      "startArrowhead": null,
-      "endArrowhead": "arrow"
-    },
-    {
-      "id": "bZvxt2MfEmkgYplJGYvAF",
-      "type": "arrow",
-      "x": 789.81640625,
-      "y": 1358.8125,
-      "width": 0.08602962445024787,
-      "height": 35.25,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b1x",
-      "roundness": {
-        "type": 2
-      },
-      "seed": 288196725,
-      "version": 41,
-      "versionNonce": 714813627,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1715952782050,
-      "link": null,
-      "locked": false,
-      "points": [
-        [
-          0,
-          0
-        ],
-        [
-          0.08602962445024787,
-          35.25
-        ]
-      ],
-      "lastCommittedPoint": null,
-      "startBinding": {
-        "elementId": "JncRqJ0FdwNeHFO0WQj7j",
-        "focus": 0.07864610464341526,
-        "gap": 11.7109375
-      },
-      "endBinding": {
-        "elementId": "YZqdS6HqxV0eCvZhb-1TG",
-        "focus": -0.05395713956897283,
-        "gap": 12.859375
-      },
-      "startArrowhead": null,
-      "endArrowhead": "arrow"
-    },
-    {
-      "id": "u-6rLKVGZ91K-do_X6_7h",
-      "type": "rectangle",
-      "x": 1014.77734375,
-      "y": 243.0625,
-      "width": 22.22265625,
-      "height": 22.22265625,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#ffec99",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b1y",
-      "roundness": {
-        "type": 3
-      },
-      "seed": 643949941,
-      "version": 184,
-      "versionNonce": 115789461,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1715951856984,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "i8TMmsB--w6DYXWYRe_qm",
-      "type": "text",
-      "x": 1059.00390625,
-      "y": 242.80859375,
-      "width": 758.3992919921875,
-      "height": 25,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#ffec99",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b20",
-      "roundness": null,
-      "seed": 2000384187,
-      "version": 169,
-      "versionNonce": 848966645,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1715951856984,
-      "link": null,
-      "locked": false,
-      "text": "Steps associated with getting data from a source and ready for processing",
-      "fontSize": 20,
-      "fontFamily": 1,
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "Steps associated with getting data from a source and ready for processing",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "type": "rectangle",
-      "version": 271,
-      "versionNonce": 1366945109,
-      "index": "b21",
-      "isDeleted": false,
-      "id": "UMttgjHgvnZXjUlDiqbaB",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 1015.4722290039062,
-      "y": 297.1875,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#a5d8ff",
-      "width": 22.22265625,
-      "height": 22.22265625,
-      "seed": 2058850293,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": {
-        "type": 3
-      },
-      "boundElements": [],
-      "updated": 1715951856984,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "text",
-      "version": 298,
-      "versionNonce": 1658550965,
-      "index": "b22",
-      "isDeleted": false,
-      "id": "hf4pKQ55184WTVhdPC92w",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 1059.6987915039062,
-      "y": 296.93359375,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#ffec99",
-      "width": 365.3796691894531,
-      "height": 25,
-      "seed": 1703659861,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1715951856984,
-      "link": null,
-      "locked": false,
-      "fontSize": 20,
-      "fontFamily": 1,
-      "text": "Creating structured/enriched content",
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "Creating structured/enriched content",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "type": "rectangle",
-      "version": 269,
-      "versionNonce": 1600412693,
-      "index": "b23",
-      "isDeleted": false,
-      "id": "N4kjMAQ-BqLtvUxn3gpN_",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 1017.2026977539062,
-      "y": 354.03125,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#eebefa",
-      "width": 22.22265625,
-      "height": 22.22265625,
-      "seed": 548622613,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": {
-        "type": 3
-      },
-      "boundElements": [],
-      "updated": 1715951856984,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "text",
-      "version": 292,
-      "versionNonce": 252318069,
-      "index": "b24",
-      "isDeleted": false,
-      "id": "VZCSNlIntRGixA1659IRA",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 1061.4292602539062,
-      "y": 353.77734375,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#ffec99",
-      "width": 367.4396667480469,
-      "height": 25,
-      "seed": 347235957,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1715951856984,
-      "link": null,
-      "locked": false,
-      "fontSize": 20,
-      "fontFamily": 1,
-      "text": "Reformatting the structured content",
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "Reformatting the structured content",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "type": "rectangle",
-      "version": 249,
-      "versionNonce": 521280213,
-      "index": "b25",
-      "isDeleted": false,
-      "id": "-mFRWLXO9Tam2O1loV1l8",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 1017.7183227539062,
-      "y": 410.453125,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#ffc9c9",
-      "width": 22.22265625,
-      "height": 22.22265625,
-      "seed": 1321641467,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": {
-        "type": 3
-      },
-      "boundElements": [],
-      "updated": 1715951856984,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "text",
-      "version": 299,
-      "versionNonce": 2014443573,
-      "index": "b26",
-      "isDeleted": false,
-      "id": "l8FTa1uhh3FXC4DdeCjJX",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 1061.9448852539062,
-      "y": 410.19921875,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#ffc9c9",
-      "width": 652.2393798828125,
-      "height": 25,
-      "seed": 345386651,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1715951856984,
-      "link": null,
-      "locked": false,
-      "fontSize": 20,
-      "fontFamily": 1,
-      "text": "Steps associated with uploading the final result to a destination",
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "Steps associated with uploading the final result to a destination",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "type": "rectangle",
-      "version": 358,
-      "versionNonce": 998367509,
-      "index": "b27",
-      "isDeleted": false,
-      "id": "3uQWJDRthA7AWVdHSokLt",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 1018.3490600585938,
-      "y": 538.45703125,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "width": 22.22265625,
-      "height": 22.22265625,
-      "seed": 1078125621,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": {
-        "type": 3
-      },
-      "boundElements": [],
-      "updated": 1715952831362,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "text",
-      "version": 418,
-      "versionNonce": 2035692411,
-      "index": "b28",
-      "isDeleted": false,
-      "id": "4iycrxYTvkePRrwE9d55_",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 1062.5756225585938,
-      "y": 538.203125,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#ffc9c9",
-      "width": 135.0398712158203,
-      "height": 25,
-      "seed": 1059231125,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1715952836177,
-      "link": null,
-      "locked": false,
-      "fontSize": 20,
-      "fontFamily": 1,
-      "text": "Required step",
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "Required step",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "type": "rectangle",
-      "version": 409,
-      "versionNonce": 1303811067,
-      "index": "b2B",
-      "isDeleted": false,
-      "id": "Jr-S8g5xKeXX4hA1S9VNt",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "dotted",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 1019.7730331420898,
-      "y": 589.04296875,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "width": 22.22265625,
-      "height": 22.22265625,
-      "seed": 832846773,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": {
-        "type": 3
-      },
-      "boundElements": [],
-      "updated": 1715952853068,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "text",
-      "version": 481,
-      "versionNonce": 989351029,
-      "index": "b2C",
-      "isDeleted": false,
-      "id": "23iPs-E6gExYad4eWTKFP",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 1063.9995956420898,
-      "y": 588.7890625,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#ffc9c9",
-      "width": 133.33987426757812,
-      "height": 25,
-      "seed": 963443989,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1715952857188,
-      "link": null,
-      "locked": false,
-      "fontSize": 20,
-      "fontFamily": 1,
-      "text": "Optional Step",
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "Optional Step",
-      "autoResize": true,
-      "lineHeight": 1.25
-    }
-  ],
-  "appState": {
-    "gridSize": null,
-    "viewBackgroundColor": "#ffffff"
-  },
-  "files": {}
-}
\ No newline at end of file
diff --git a/unstructured/ingest/v2/assets/pipeline.png b/unstructured/ingest/v2/assets/pipeline.png
deleted file mode 100644
index 9cfcf64e8..000000000
Binary files a/unstructured/ingest/v2/assets/pipeline.png and /dev/null differ
diff --git a/unstructured/ingest/v2/assets/sequence.png b/unstructured/ingest/v2/assets/sequence.png
deleted file mode 100644
index 6b79db305..000000000
Binary files a/unstructured/ingest/v2/assets/sequence.png and /dev/null differ
diff --git a/unstructured/ingest/v2/assets/sequence.txt b/unstructured/ingest/v2/assets/sequence.txt
deleted file mode 100644
index 618859a6a..000000000
--- a/unstructured/ingest/v2/assets/sequence.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-title Ingest Flow
-
-
-Pipeline->Index: Pipeline.indexer_step.run()
-Index->Data Provider:fetch list of docs with metadata
-Data Provider->Index:
-Index->Local Filesystem:for each record, save the metadata as a json file
-Index->Pipeline: pipeline records a list of files
-Pipeline->Download: Pipeline.downloader_step(records)
-Download->Local Filesystem: Fetch the associated metadata
-Local Filesystem->Download:
-Download->Data Provider: Get raw data from data provider
-Download->Local Filesystem: Persist the data as raw files
-Download->Pipeline: Send back a reference to the local file to process
-Pipeline-->Uncompress: Optionally run if flag set to True
-Uncompress->Local Filesystem: Extract tar and zip files
-Uncompress->Local Filesystem: New metadata records are created for new extracted files
-Uncompress->Pipeline: Send back list of pointers to new metadata files
-Pipeline->Partition: Pipeline.partitioner_step(downloaded_data)
-Partition-->Unstructured Api: If credentials passed in,\npassed file data to API for partitioning
-Unstructured Api->Partition:
-Partition->Local Filesystem: Persist results
-Partition->Pipeline: Pointers to persisted results
-Pipeline-->Chunk:  Optionally Pipeline.chunker_step.run(records)
-Chunk-->Unstructured Api: If credentials passed in,\npassed file data to API for chunking
-Unstructured Api->Chunk:
-Chunk->Local Filesystem: Persist results
-Chunk->Pipeline: Pointers to persisted results
-Pipeline-->Embed: Optionally Pipeline.embed_step.run(records)
-Embed-->Embedder Api: Depending on which embedder\nis chosen, make API calls to provider
-Embed->Local Filesystem: Persist results
-Embed->Pipeline: Pointers to persisted results
-Pipeline->Stage: Optionally Pipeline.stager_step.run(records)
-Stage->Local Filesystem: manipulate the records to better upload
-Stage->Pipeline: Pointers to persisted results
-Pipeline->Upload: Pipeline.upload_step.run()
-Upload->Data Destination:
-Pipeline->Local Filesystem: Cleanup
diff --git a/unstructured/ingest/v2/cli/README.md b/unstructured/ingest/v2/cli/README.md
deleted file mode 100644
index 4d60d4ccf..000000000
--- a/unstructured/ingest/v2/cli/README.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# Ingest CLI
-This package helps map user input via a cli to the underlying ingest code to run a small ETL pipeline.
-
-## Design Reference
-[cli.py](./cli.py) is the main entrypoint to run the cli itself. The key points for this is the interaction between all
-source and destination connectors.
-
-To manually run the cli:
-```shell
-PYTHONPATH=. python unstructured/ingest/v2/main.py --help
-```
-
-The `main.py` file simply wraps the generated Click command created in `cli.py`.
-
-### Source Commands
-All source commands are added as sub commands to the parent ingest Click group. This allows each command to map to
-different connectors with shared and unique parameters.
-
-### Destination Commands
-All destination commands are added as sub commands to each parent source command. This allows each invocation of the source
-sub command to display all possible destination subcommands. The code un [utils.py](./utils.py) helps structure the
-generated text from the Click library to be more intuitive on this approach (i.e. list sub commands as  `Destinations`).
-
-### Configs
-The configs in [configs/](./configs) and connector specific ones in [cmds/](./cmds) help surface all user parameters that
-are needed to marshall the input dictionary from Click into all the respective configs needed to create a full pipeline run.
-Because click returns a flat dictionary of user inputs, the `extract_config` method in `utils.py` helps deserialize this dictionary
-into dataclasses that have nexted fields (such as access configs).
diff --git a/unstructured/ingest/v2/cli/__init__.py b/unstructured/ingest/v2/cli/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/unstructured/ingest/v2/cli/base/__init__.py b/unstructured/ingest/v2/cli/base/__init__.py
deleted file mode 100644
index ed07a1684..000000000
--- a/unstructured/ingest/v2/cli/base/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .dest import DestCmd
-from .src import SrcCmd
-
-__all__ = ["SrcCmd", "DestCmd"]
diff --git a/unstructured/ingest/v2/cli/base/cmd.py b/unstructured/ingest/v2/cli/base/cmd.py
deleted file mode 100644
index 0a5d5c138..000000000
--- a/unstructured/ingest/v2/cli/base/cmd.py
+++ /dev/null
@@ -1,215 +0,0 @@
-import inspect
-from abc import ABC, abstractmethod
-from dataclasses import dataclass, field, fields
-from typing import Any, Optional, Type, TypeVar
-
-import click
-
-from unstructured.ingest.v2.cli.base.importer import import_from_string
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.cli.utils import extract_config
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.pipeline import Pipeline
-from unstructured.ingest.v2.processes.chunker import Chunker, ChunkerConfig
-from unstructured.ingest.v2.processes.connector_registry import (
-    DownloaderT,
-    IndexerT,
-    UploaderT,
-    UploadStager,
-    UploadStagerConfig,
-    UploadStagerT,
-    destination_registry,
-    source_registry,
-)
-from unstructured.ingest.v2.processes.connectors.local import LocalUploader, LocalUploaderConfig
-from unstructured.ingest.v2.processes.embedder import Embedder, EmbedderConfig
-from unstructured.ingest.v2.processes.partitioner import Partitioner, PartitionerConfig
-
-CommandT = TypeVar("CommandT", bound=click.Command)
-
-
-@dataclass
-class BaseCmd(ABC):
-    cmd_name: str
-    default_configs: list[Type[CliConfig]] = field(default_factory=list)
-
-    @property
-    def cmd_name_key(self):
-        return self.cmd_name.replace("-", "_")
-
-    @property
-    def cli_cmd_name(self):
-        return self.cmd_name.replace("_", "-")
-
-    @abstractmethod
-    def cmd(self, ctx: click.Context, **options) -> None:
-        pass
-
-    def add_options(self, cmd: CommandT, extras: list[Type[CliConfig]]) -> CommandT:
-        configs = self.default_configs
-        # make sure what's unique to this cmd appears first
-        extras.extend(configs)
-        for config in extras:
-            try:
-                config.add_cli_options(cmd=cmd)
-            except ValueError as e:
-                raise ValueError(f"failed to set configs from {config.__name__}: {e}")
-        return cmd
-
-    def get_pipeline(
-        self,
-        src: str,
-        source_options: dict[str, Any],
-        dest: Optional[str] = None,
-        destination_options: Optional[dict[str, Any]] = None,
-    ) -> Pipeline:
-        logger.debug(
-            f"creating pipeline from cli using source {src} with options: {source_options}"
-        )
-        pipeline_kwargs: dict[str, Any] = {
-            "context": self.get_processor_config(options=source_options),
-            "downloader": self.get_downloader(src=src, options=source_options),
-            "indexer": self.get_indexer(src=src, options=source_options),
-            "partitioner": self.get_partitioner(options=source_options),
-        }
-        if chunker := self.get_chunker(options=source_options):
-            pipeline_kwargs["chunker"] = chunker
-        if embedder := self.get_embeder(options=source_options):
-            pipeline_kwargs["embedder"] = embedder
-        if dest:
-            logger.debug(
-                f"setting destination on pipeline {dest} with options: {destination_options}"
-            )
-            if uploader_stager := self.get_upload_stager(dest=dest, options=destination_options):
-                pipeline_kwargs["stager"] = uploader_stager
-            pipeline_kwargs["uploader"] = self.get_uploader(dest=dest, options=destination_options)
-        else:
-            # Default to local uploader
-            # TODO remove after v1 no longer supported
-            destination_options = destination_options or {}
-            if "output_dir" not in destination_options:
-                destination_options["output_dir"] = source_options["output_dir"]
-            pipeline_kwargs["uploader"] = self.get_default_uploader(options=destination_options)
-        return Pipeline(**pipeline_kwargs)
-
-    @staticmethod
-    def get_default_uploader(options: dict[str, Any]) -> UploaderT:
-        uploader_config = extract_config(flat_data=options, config=LocalUploaderConfig)
-        return LocalUploader(upload_config=uploader_config)
-
-    @staticmethod
-    def get_chunker(options: dict[str, Any]) -> Optional[Chunker]:
-        chunker_config = extract_config(flat_data=options, config=ChunkerConfig)
-        if not chunker_config.chunking_strategy:
-            return None
-        return Chunker(config=chunker_config)
-
-    @staticmethod
-    def get_embeder(options: dict[str, Any]) -> Optional[Embedder]:
-        embedder_config = extract_config(flat_data=options, config=EmbedderConfig)
-        if not embedder_config.embedding_provider:
-            return None
-        return Embedder(config=embedder_config)
-
-    @staticmethod
-    def get_partitioner(options: dict[str, Any]) -> Partitioner:
-        partitioner_config = extract_config(flat_data=options, config=PartitionerConfig)
-        return Partitioner(config=partitioner_config)
-
-    @staticmethod
-    def get_processor_config(options: dict[str, Any]) -> ProcessorConfig:
-        return extract_config(flat_data=options, config=ProcessorConfig)
-
-    @staticmethod
-    def get_indexer(src: str, options: dict[str, Any]) -> IndexerT:
-        source_entry = source_registry[src]
-        indexer_kwargs: dict[str, Any] = {}
-        if indexer_config_cls := source_entry.indexer_config:
-            indexer_kwargs["index_config"] = extract_config(
-                flat_data=options, config=indexer_config_cls
-            )
-        if connection_config_cls := source_entry.connection_config:
-            indexer_kwargs["connection_config"] = extract_config(
-                flat_data=options, config=connection_config_cls
-            )
-        indexer_cls = source_entry.indexer
-        return indexer_cls(**indexer_kwargs)
-
-    @staticmethod
-    def get_downloader(src: str, options: dict[str, Any]) -> DownloaderT:
-        source_entry = source_registry[src]
-        downloader_kwargs: dict[str, Any] = {}
-        if downloader_config_cls := source_entry.downloader_config:
-            downloader_kwargs["download_config"] = extract_config(
-                flat_data=options, config=downloader_config_cls
-            )
-        if connection_config_cls := source_entry.connection_config:
-            downloader_kwargs["connection_config"] = extract_config(
-                flat_data=options, config=connection_config_cls
-            )
-        downloader_cls = source_entry.downloader
-        return downloader_cls(**downloader_kwargs)
-
-    @staticmethod
-    def get_custom_stager(
-        stager_reference: str, stager_config_kwargs: Optional[dict] = None
-    ) -> Optional[UploadStagerT]:
-        uploader_cls = import_from_string(stager_reference)
-        if not inspect.isclass(uploader_cls):
-            raise ValueError(
-                f"custom stager must be a reference to a python class, got: {type(uploader_cls)}"
-            )
-        if not issubclass(uploader_cls, UploadStager):
-            raise ValueError(
-                "custom stager must be an implementation of the UploadStager interface"
-            )
-        fields_dict = {f.name: f.type for f in fields(uploader_cls)}
-        upload_stager_config_cls = fields_dict["upload_stager_config"]
-        if not inspect.isclass(upload_stager_config_cls):
-            raise ValueError(
-                f"custom stager config must be a class, got: {type(upload_stager_config_cls)}"
-            )
-        if not issubclass(upload_stager_config_cls, UploadStagerConfig):
-            raise ValueError(
-                "custom stager config must be an implementation "
-                "of the UploadStagerUploadStagerConfig interface"
-            )
-        upload_stager_kwargs: dict[str, Any] = {}
-        if stager_config_kwargs:
-            upload_stager_kwargs["upload_stager_config"] = upload_stager_config_cls(
-                **stager_config_kwargs
-            )
-        return uploader_cls(**upload_stager_kwargs)
-
-    @staticmethod
-    def get_upload_stager(dest: str, options: dict[str, Any]) -> Optional[UploadStagerT]:
-        if custom_stager := options.get("custom_stager"):
-            return BaseCmd.get_custom_stager(
-                stager_reference=custom_stager,
-                stager_config_kwargs=options.get("custom_stager_config_kwargs"),
-            )
-        dest_entry = destination_registry[dest]
-        upload_stager_kwargs: dict[str, Any] = {}
-        if upload_stager_config_cls := dest_entry.upload_stager_config:
-            upload_stager_kwargs["upload_stager_config"] = extract_config(
-                flat_data=options, config=upload_stager_config_cls
-            )
-        if upload_stager_cls := dest_entry.upload_stager:
-            return upload_stager_cls(**upload_stager_kwargs)
-        return None
-
-    @staticmethod
-    def get_uploader(dest, options: dict[str, Any]) -> UploaderT:
-        dest_entry = destination_registry[dest]
-        uploader_kwargs: dict[str, Any] = {}
-        if uploader_config_cls := dest_entry.uploader_config:
-            uploader_kwargs["upload_config"] = extract_config(
-                flat_data=options, config=uploader_config_cls
-            )
-        if connection_config_cls := dest_entry.connection_config:
-            uploader_kwargs["connection_config"] = extract_config(
-                flat_data=options, config=connection_config_cls
-            )
-        uploader_cls = dest_entry.uploader
-        return uploader_cls(**uploader_kwargs)
diff --git a/unstructured/ingest/v2/cli/base/dest.py b/unstructured/ingest/v2/cli/base/dest.py
deleted file mode 100644
index b1703dcc8..000000000
--- a/unstructured/ingest/v2/cli/base/dest.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import logging
-from dataclasses import dataclass
-from typing import Optional, Type
-
-import click
-
-from unstructured.ingest.v2.cli.base.cmd import BaseCmd
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.cli.utils import Dict, conform_click_options
-from unstructured.ingest.v2.logger import logger
-
-
-@dataclass
-class DestCmd(BaseCmd):
-    connection_config: Optional[Type[CliConfig]] = None
-    uploader_config: Optional[Type[CliConfig]] = None
-    upload_stager_config: Optional[Type[CliConfig]] = None
-
-    def cmd(self, ctx: click.Context, **options) -> None:
-        logger.setLevel(logging.DEBUG if options.get("verbose", False) else logging.INFO)
-        if not ctx.parent:
-            raise click.ClickException("destination command called without a parent")
-        if not ctx.parent.info_name:
-            raise click.ClickException("parent command missing info name")
-        source_cmd = ctx.parent.info_name.replace("-", "_")
-        source_options: dict = ctx.parent.params if ctx.parent else {}
-        conform_click_options(options)
-        try:
-            pipeline = self.get_pipeline(
-                src=source_cmd,
-                source_options=source_options,
-                dest=self.cmd_name,
-                destination_options=options,
-            )
-            pipeline.run()
-        except Exception as e:
-            logger.error(f"failed to run destination command {self.cmd_name}: {e}", exc_info=True)
-            raise click.ClickException(str(e)) from e
-
-    def get_cmd(self) -> click.Command:
-        # Dynamically create the command without the use of click decorators
-        fn = self.cmd
-        fn = click.pass_context(fn)
-        cmd = click.command(fn)
-        if not isinstance(cmd, click.core.Command):
-            raise ValueError(f"generated command was not of expected type Command: {type(cmd)}")
-        cmd.name = self.cli_cmd_name
-        cmd.short_help = "v2"
-        cmd.invoke_without_command = True
-        extras = [
-            x
-            for x in [self.uploader_config, self.upload_stager_config, self.connection_config]
-            if x
-        ]
-        self.add_options(cmd, extras=extras)
-        cmd.params.append(
-            click.Option(
-                ["--custom-stager"],
-                required=False,
-                type=str,
-                default=None,
-                help="Pass a pointer to a custom upload stager to use, "
-                "must be in format '<module>:<attribute>'",
-            )
-        )
-        cmd.params.append(
-            click.Option(
-                ["--custom-stager-config-kwargs"],
-                required=False,
-                type=Dict(),
-                default=None,
-                help="Any kwargs to instantiate the configuration "
-                "associated with the customer stager",
-            )
-        )
-        return cmd
diff --git a/unstructured/ingest/v2/cli/base/importer.py b/unstructured/ingest/v2/cli/base/importer.py
deleted file mode 100644
index f77520ee1..000000000
--- a/unstructured/ingest/v2/cli/base/importer.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import importlib
-from typing import Any
-
-
-class ImportFromStringError(Exception):
-    pass
-
-
-def import_from_string(import_str: Any) -> Any:
-    if not isinstance(import_str, str):
-        return import_str
-
-    module_str, _, attrs_str = import_str.partition(":")
-    if not module_str or not attrs_str:
-        message = 'Import string "{import_str}" must be in format "<module>:<attribute>".'
-        raise ImportFromStringError(message.format(import_str=import_str))
-
-    try:
-        module = importlib.import_module(module_str)
-    except ModuleNotFoundError as exc:
-        if exc.name != module_str:
-            raise exc from None
-        message = 'Could not import module "{module_str}".'
-        raise ImportFromStringError(message.format(module_str=module_str))
-
-    instance = module
-    try:
-        for attr_str in attrs_str.split("."):
-            instance = getattr(instance, attr_str)
-    except AttributeError:
-        message = 'Attribute "{attrs_str}" not found in module "{module_str}".'
-        raise ImportFromStringError(message.format(attrs_str=attrs_str, module_str=module_str))
-
-    return instance
diff --git a/unstructured/ingest/v2/cli/base/src.py b/unstructured/ingest/v2/cli/base/src.py
deleted file mode 100644
index 9ec350cad..000000000
--- a/unstructured/ingest/v2/cli/base/src.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import logging
-from dataclasses import dataclass, field
-from typing import Any, Optional, Type
-
-import click
-
-from unstructured.ingest.v2.cli.base.cmd import BaseCmd
-from unstructured.ingest.v2.cli.configs import (
-    ChunkerCliConfig,
-    EmbedderCliConfig,
-    PartitionerCliConfig,
-    ProcessorCliConfig,
-)
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.cli.utils import Group, conform_click_options
-from unstructured.ingest.v2.logger import logger
-
-
-@dataclass
-class SrcCmd(BaseCmd):
-    indexer_config: Optional[Type[CliConfig]] = None
-    downloader_config: Optional[Type[CliConfig]] = None
-    connection_config: Optional[Type[CliConfig]] = None
-    default_configs: list[CliConfig] = field(
-        default_factory=lambda: [
-            ProcessorCliConfig,
-            PartitionerCliConfig,
-            EmbedderCliConfig,
-            ChunkerCliConfig,
-        ]
-    )
-
-    def cmd(self, ctx: click.Context, **options: dict[str, Any]) -> None:
-        if ctx.invoked_subcommand:
-            return
-
-        conform_click_options(options)
-        logger.setLevel(logging.DEBUG if options.get("verbose", False) else logging.INFO)
-        try:
-            pipeline = self.get_pipeline(src=self.cmd_name, source_options=options)
-            pipeline.run()
-        except Exception as e:
-            logger.error(f"failed to run source command {self.cmd_name}: {e}", exc_info=True)
-            raise click.ClickException(str(e)) from e
-
-    def get_cmd(self) -> click.Group:
-        # Dynamically create the command without the use of click decorators
-        fn = self.cmd
-        fn = click.pass_context(fn)
-        cmd = click.group(fn, cls=Group)
-        if not isinstance(cmd, click.core.Group):
-            raise ValueError(f"generated src command was not of expected type Group: {type(cmd)}")
-        cmd.name = self.cli_cmd_name
-        cmd.short_help = "v2"
-        cmd.invoke_without_command = True
-        extras = [
-            x for x in [self.indexer_config, self.downloader_config, self.connection_config] if x
-        ]
-        self.add_options(cmd, extras=extras)
-
-        # TODO remove after v1 no longer supported
-        cmd.params.append(
-            click.Option(
-                ["--output-dir"],
-                required=False,
-                type=str,
-                help="Local path to write partitioned output to",
-            )
-        )
-        return cmd
diff --git a/unstructured/ingest/v2/cli/cli.py b/unstructured/ingest/v2/cli/cli.py
deleted file mode 100644
index a53c43565..000000000
--- a/unstructured/ingest/v2/cli/cli.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import click
-
-from unstructured.ingest.v2.cli.cmds import dest, src
-
-
-@click.group()
-def ingest():
-    pass
-
-
-def get_cmd() -> click.Command:
-    """Construct and return a Click command object representing the main command for the CLI.
-
-    This function adds all dest_subcommand(s) to each src_subcommand, and adds all of those
-    to the main command as nested subcommands.
-    """
-    cmd = ingest
-    # Add all subcommands
-    for src_subcommand in src:
-        # Add all destination subcommands
-        for dest_subcommand in dest:
-            src_subcommand.add_command(dest_subcommand)
-        cmd.add_command(src_subcommand)
-    return cmd
diff --git a/unstructured/ingest/v2/cli/cmds/__init__.py b/unstructured/ingest/v2/cli/cmds/__init__.py
deleted file mode 100644
index 4a4a74c5d..000000000
--- a/unstructured/ingest/v2/cli/cmds/__init__.py
+++ /dev/null
@@ -1,87 +0,0 @@
-from collections import Counter
-
-import click
-
-from .astradb import astradb_dest_cmd
-from .azure_cognitive_search import azure_cognitive_search_dest_cmd
-from .chroma import chroma_dest_cmd
-from .databricks_volumes import databricks_volumes_dest_cmd
-from .elasticsearch import elasticsearch_dest_cmd, elasticsearch_src_cmd
-from .fsspec.azure import azure_dest_cmd, azure_src_cmd
-from .fsspec.box import box_dest_cmd, box_src_cmd
-from .fsspec.dropbox import dropbox_dest_cmd, dropbox_src_cmd
-from .fsspec.gcs import gcs_dest_cmd, gcs_src_cmd
-from .fsspec.s3 import s3_dest_cmd, s3_src_cmd
-from .fsspec.sftp import sftp_dest_cmd, sftp_src_cmd
-from .google_drive import google_drive_src_cmd
-from .local import local_dest_cmd, local_src_cmd
-from .mongodb import mongodb_dest_cmd
-from .onedrive import onedrive_drive_src_cmd
-from .opensearch import opensearch_dest_cmd, opensearch_src_cmd
-from .pinecone import pinecone_dest_cmd
-from .salesforce import salesforce_src_cmd
-from .sharepoint import sharepoint_drive_src_cmd
-from .singlestore import singlestore_dest_cmd
-from .sql import sql_dest_cmd
-from .weaviate import weaviate_dest_cmd
-
-src_cmds = [
-    azure_src_cmd,
-    box_src_cmd,
-    dropbox_src_cmd,
-    elasticsearch_src_cmd,
-    gcs_src_cmd,
-    google_drive_src_cmd,
-    local_src_cmd,
-    onedrive_drive_src_cmd,
-    opensearch_src_cmd,
-    s3_src_cmd,
-    salesforce_src_cmd,
-    sharepoint_drive_src_cmd,
-    sftp_src_cmd,
-]
-duplicate_src_names = [
-    name for name, count in Counter([s.cmd_name for s in src_cmds]).items() if count > 1
-]
-if duplicate_src_names:
-    raise ValueError(
-        "the following source cmd names were reused, all must be unique: {}".format(
-            ", ".join(duplicate_src_names)
-        )
-    )
-
-dest_cmds = [
-    astradb_dest_cmd,
-    azure_cognitive_search_dest_cmd,
-    azure_dest_cmd,
-    box_dest_cmd,
-    chroma_dest_cmd,
-    dropbox_dest_cmd,
-    elasticsearch_dest_cmd,
-    gcs_dest_cmd,
-    local_dest_cmd,
-    opensearch_dest_cmd,
-    pinecone_dest_cmd,
-    s3_dest_cmd,
-    sftp_dest_cmd,
-    singlestore_dest_cmd,
-    weaviate_dest_cmd,
-    mongodb_dest_cmd,
-    databricks_volumes_dest_cmd,
-    sql_dest_cmd,
-]
-
-duplicate_dest_names = [
-    name for name, count in Counter([d.cmd_name for d in dest_cmds]).items() if count > 1
-]
-if duplicate_dest_names:
-    raise ValueError(
-        "the following dest cmd names were reused, all must be unique: {}".format(
-            ", ".join(duplicate_dest_names)
-        )
-    )
-
-
-src: list[click.Group] = [v.get_cmd() for v in src_cmds]
-
-dest: list[click.Command] = [v.get_cmd() for v in dest_cmds]
diff --git a/unstructured/ingest/v2/cli/cmds/astradb.py b/unstructured/ingest/v2/cli/cmds/astradb.py
deleted file mode 100644
index 36de30f70..000000000
--- a/unstructured/ingest/v2/cli/cmds/astradb.py
+++ /dev/null
@@ -1,85 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.cli.utils import Dict
-from unstructured.ingest.v2.processes.connectors.astradb import CONNECTOR_TYPE
-
-
-@dataclass
-class AstraDBCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--token"],
-                required=True,
-                type=str,
-                help="Astra DB Token with access to the database.",
-                envvar="ASTRA_DB_APPLICATION_TOKEN",
-                show_envvar=True,
-            ),
-            click.Option(
-                ["--api-endpoint"],
-                required=True,
-                type=str,
-                help="The API endpoint for the Astra DB.",
-                envvar="ASTRA_DB_API_ENDPOINT",
-                show_envvar=True,
-            ),
-        ]
-        return options
-
-
-@dataclass
-class AstraDBCliUploaderConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--collection-name"],
-                required=False,
-                type=str,
-                help="The name of the Astra DB collection. "
-                "Note that the collection name must only include letters, "
-                "numbers, and underscores.",
-            ),
-            click.Option(
-                ["--embedding-dimension"],
-                required=True,
-                default=384,
-                type=int,
-                help="The dimensionality of the embeddings",
-            ),
-            click.Option(
-                ["--namespace"],
-                required=False,
-                default=None,
-                type=str,
-                help="The Astra DB connection namespace.",
-            ),
-            click.Option(
-                ["--requested-indexing-policy"],
-                required=False,
-                default=None,
-                type=Dict(),
-                help="The indexing policy to use for the collection."
-                'example: \'{"deny": ["metadata"]}\' ',
-            ),
-            click.Option(
-                ["--batch-size"],
-                default=20,
-                type=int,
-                help="Number of records per batch",
-            ),
-        ]
-        return options
-
-
-astradb_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=AstraDBCliConnectionConfig,
-    uploader_config=AstraDBCliUploaderConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/azure_cognitive_search.py b/unstructured/ingest/v2/cli/cmds/azure_cognitive_search.py
deleted file mode 100644
index 6097606e5..000000000
--- a/unstructured/ingest/v2/cli/cmds/azure_cognitive_search.py
+++ /dev/null
@@ -1,72 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.processes.connectors.azure_cognitive_search import CONNECTOR_TYPE
-
-
-@dataclass
-class AzureCognitiveSearchCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--index"],
-                required=True,
-                type=str,
-                help="The name of the Azure AI (Cognitive) Search index to connect to.",
-                envvar="AZURE_SEARCH_INDEX",
-                show_envvar=True,
-            ),
-            click.Option(
-                ["--endpoint"],
-                required=True,
-                type=str,
-                help="The URL endpoint of an Azure AI (Cognitive) search service."
-                "In the form of https://{{service_name}}.search.windows.net",
-                envvar="AZURE_SEARCH_ENDPOINT",
-                show_envvar=True,
-            ),
-            click.Option(
-                ["--key"],
-                required=True,
-                type=str,
-                help="Credential that is used for authenticating to an Azure service."
-                "(is an AzureKeyCredential)",
-                envvar="AZURE_SEARCH_API_KEY",
-                show_envvar=True,
-            ),
-        ]
-        return options
-
-
-@dataclass
-class AzureCognitiveSearchCliUploaderConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--batch-size"],
-                default=100,
-                type=int,
-                help="Number of records per batch",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class AzureCognitiveSearchCliUploadStagerConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        return []
-
-
-azure_cognitive_search_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=AzureCognitiveSearchCliConnectionConfig,
-    uploader_config=AzureCognitiveSearchCliUploaderConfig,
-    upload_stager_config=AzureCognitiveSearchCliUploadStagerConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/chroma.py b/unstructured/ingest/v2/cli/cmds/chroma.py
deleted file mode 100644
index c13816351..000000000
--- a/unstructured/ingest/v2/cli/cmds/chroma.py
+++ /dev/null
@@ -1,108 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.cli.utils import Dict
-from unstructured.ingest.v2.processes.connectors.chroma import CONNECTOR_TYPE
-
-
-@dataclass
-class ChromaCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--path"],
-                required=False,
-                type=str,
-                help="Location where Chroma is persisted," "if not connecting via http.",
-            ),
-            click.Option(
-                ["--settings"],
-                required=False,
-                type=Dict(),
-                help="A dictionary of settings to communicate with the chroma server."
-                'example: \'{"persist_directory":"./chroma-persist"}\' ',
-            ),
-            click.Option(
-                ["--tenant"],
-                required=False,
-                default="default_tenant",
-                type=str,
-                help="The tenant to use for this client. Chroma defaults to 'default_tenant'.",
-            ),
-            click.Option(
-                ["--database"],
-                required=False,
-                default="default_database",
-                type=str,
-                help="The database to use for this client."
-                "Chroma defaults to 'default_database'.",
-            ),
-            click.Option(
-                ["--host"],
-                required=False,
-                type=str,
-                help="The hostname of the Chroma server.",
-            ),
-            click.Option(
-                ["--port"],
-                required=False,
-                type=int,
-                help="The port of the Chroma server.",
-            ),
-            click.Option(
-                ["--ssl"],
-                required=False,
-                default=False,
-                is_flag=True,
-                type=bool,
-                help="Whether to use SSL to connect to the Chroma server.",
-            ),
-            click.Option(
-                ["--headers"],
-                required=False,
-                type=Dict(),
-                help="A dictionary of headers to send to the Chroma server."
-                'example: \'{"Authorization":"Basic()"}\' ',
-            ),
-            click.Option(
-                ["--collection-name"],
-                required=True,
-                type=str,
-                help="The name of the Chroma collection to write into.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class ChromaCliUploaderConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--batch-size"],
-                default=100,
-                type=int,
-                help="Number of records per batch",
-            )
-        ]
-        return options
-
-
-@dataclass
-class ChromaCliUploadStagerConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        return []
-
-
-chroma_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=ChromaCliConnectionConfig,
-    uploader_config=ChromaCliUploaderConfig,
-    upload_stager_config=ChromaCliUploadStagerConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/databricks_volumes.py b/unstructured/ingest/v2/cli/cmds/databricks_volumes.py
deleted file mode 100644
index e8f8e2486..000000000
--- a/unstructured/ingest/v2/cli/cmds/databricks_volumes.py
+++ /dev/null
@@ -1,161 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.processes.connectors.databricks_volumes import CONNECTOR_TYPE
-
-
-@dataclass
-class DatabricksVolumesCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--host"],
-                type=str,
-                default=None,
-                help="The Databricks host URL for either the "
-                "Databricks workspace endpoint or the "
-                "Databricks accounts endpoint.",
-            ),
-            click.Option(
-                ["--account-id"],
-                type=str,
-                default=None,
-                help="The Databricks account ID for the Databricks "
-                "accounts endpoint. Only has effect when Host is "
-                "either https://accounts.cloud.databricks.com/ (AWS), "
-                "https://accounts.azuredatabricks.net/ (Azure), "
-                "or https://accounts.gcp.databricks.com/ (GCP).",
-            ),
-            click.Option(
-                ["--username"],
-                type=str,
-                default=None,
-                help="The Databricks username part of basic authentication. "
-                "Only possible when Host is *.cloud.databricks.com (AWS).",
-            ),
-            click.Option(
-                ["--password"],
-                type=str,
-                default=None,
-                help="The Databricks password part of basic authentication. "
-                "Only possible when Host is *.cloud.databricks.com (AWS).",
-            ),
-            click.Option(["--client-id"], type=str, default=None),
-            click.Option(["--client-secret"], type=str, default=None),
-            click.Option(
-                ["--token"],
-                type=str,
-                default=None,
-                help="The Databricks personal access token (PAT) (AWS, Azure, and GCP) or "
-                "Azure Active Directory (Azure AD) token (Azure).",
-            ),
-            click.Option(
-                ["--azure-workspace-resource-id"],
-                type=str,
-                default=None,
-                help="The Azure Resource Manager ID for the Azure Databricks workspace, "
-                "which is exchanged for a Databricks host URL.",
-            ),
-            click.Option(
-                ["--azure-client-secret"],
-                type=str,
-                default=None,
-                help="The Azure AD service principal’s client secret.",
-            ),
-            click.Option(
-                ["--azure-client-id"],
-                type=str,
-                default=None,
-                help="The Azure AD service principal’s application ID.",
-            ),
-            click.Option(
-                ["--azure-tenant-id"],
-                type=str,
-                default=None,
-                help="The Azure AD service principal’s tenant ID.",
-            ),
-            click.Option(
-                ["--azure-environment"],
-                type=str,
-                default=None,
-                help="The Azure environment type (such as Public, UsGov, China, and Germany) for a "
-                "specific set of API endpoints. Defaults to PUBLIC.",
-            ),
-            click.Option(
-                ["--auth-type"],
-                type=str,
-                default=None,
-                help="When multiple auth attributes are available in the "
-                "environment, use the auth type specified by this "
-                "argument. This argument also holds the currently "
-                "selected auth.",
-            ),
-            click.Option(["--cluster-id"], type=str, default=None),
-            click.Option(["--google-credentials"], type=str, default=None),
-            click.Option(["--google-service-account"], type=str, default=None),
-        ]
-        return options
-
-
-@dataclass
-class DatabricksVolumesCliUploaderConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--volume"], type=str, required=True, help="Name of volume in the Unity Catalog"
-            ),
-            click.Option(
-                ["--catalog"],
-                type=str,
-                required=True,
-                help="Name of the catalog in the Databricks Unity Catalog service",
-            ),
-            click.Option(
-                ["--volume-path"],
-                type=str,
-                required=False,
-                default=None,
-                help="Optional path within the volume to write to",
-            ),
-            click.Option(
-                ["--overwrite"],
-                type=bool,
-                is_flag=True,
-                help="If true, an existing file will be overwritten.",
-            ),
-            click.Option(
-                ["--encoding"],
-                type=str,
-                required=True,
-                default="utf-8",
-                help="Encoding applied to the data when written to the volume",
-            ),
-            click.Option(
-                ["--schema"],
-                type=str,
-                required=True,
-                default="default",
-                help="Schema associated with the volume to write to in the Unity Catalog service",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class DatabricksVolumesCliUploadStagerConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        return []
-
-
-databricks_volumes_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=DatabricksVolumesCliConnectionConfig,
-    uploader_config=DatabricksVolumesCliUploaderConfig,
-    upload_stager_config=DatabricksVolumesCliUploadStagerConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/elasticsearch.py b/unstructured/ingest/v2/cli/cmds/elasticsearch.py
deleted file mode 100644
index 8c52c97f7..000000000
--- a/unstructured/ingest/v2/cli/cmds/elasticsearch.py
+++ /dev/null
@@ -1,159 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.cli.utils import DelimitedString
-from unstructured.ingest.v2.processes.connectors.elasticsearch import CONNECTOR_TYPE
-
-
-@dataclass
-class ElasticsearchCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--hosts"],
-                type=DelimitedString(),
-                help='List of the Elasticsearch hosts to connect to, e.g. "http://localhost:9200"',
-            ),
-            click.Option(
-                ["--username"], type=str, default=None, help="username when using basic auth"
-            ),
-            click.Option(
-                ["--password"],
-                type=str,
-                default=None,
-                help="password when using basic auth or connecting to a cloud instance",
-            ),
-            click.Option(
-                ["--cloud-id"], type=str, default=None, help="id used to connect to Elastic Cloud"
-            ),
-            click.Option(
-                ["--es-api-key"], type=str, default=None, help="api key used for authentication"
-            ),
-            click.Option(
-                ["--api-key-id"],
-                type=str,
-                default=None,
-                help="id associated with api key used for authentication: "
-                "https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html",  # noqa: E501
-                # noqa: E501
-            ),
-            click.Option(
-                ["--bearer-auth"],
-                type=str,
-                default=None,
-                help="bearer token used for HTTP bearer authentication",
-            ),
-            click.Option(
-                ["--ca-certs"],
-                type=click.Path(),
-                default=None,
-            ),
-            click.Option(
-                ["--ssl-assert-fingerprint"],
-                type=str,
-                default=None,
-                help="SHA256 fingerprint value",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class ElasticsearchCliDownloadConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--download-dir"],
-                help="Where files are downloaded to, defaults to a location at"
-                "`$HOME/.cache/unstructured/ingest/<connector name>/<SHA256>`.",
-            ),
-            click.Option(
-                ["--fields"],
-                type=DelimitedString(),
-                default=[],
-                help="If provided, will limit the fields returned by Elasticsearch "
-                "to this comma-delimited list",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class ElasticsearchCliIndexerConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--index-name"],
-                required=True,
-                type=str,
-                help="Name of the Elasticsearch index to pull data from, or upload data to.",
-            ),
-            click.Option(
-                ["--batch-size"],
-                default=100,
-                type=click.IntRange(0),
-                help="how many records to read at a time per process",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class ElasticsearchCliUploadStagerConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--index-name"],
-                required=True,
-                type=str,
-                help="Name of the Elasticsearch index to pull data from, or upload data to.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class ElasticsearchUploaderConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--batch-size-bytes"],
-                required=False,
-                default=15_000_000,
-                type=int,
-                help="Size limit (in bytes) for each batch of items to be uploaded. Check"
-                " https://www.elastic.co/guide/en/elasticsearch/guide/current/bulk.html"
-                "#_how_big_is_too_big for more information.",
-            ),
-            click.Option(
-                ["--num-threads"],
-                required=False,
-                default=1,
-                type=int,
-                help="Number of threads to be used while uploading content",
-            ),
-        ]
-        return options
-
-
-elasticsearch_src_cmd = SrcCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=ElasticsearchCliConnectionConfig,
-    indexer_config=ElasticsearchCliIndexerConfig,
-    downloader_config=ElasticsearchCliDownloadConfig,
-)
-
-elasticsearch_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=ElasticsearchCliConnectionConfig,
-    upload_stager_config=ElasticsearchCliUploadStagerConfig,
-    uploader_config=ElasticsearchUploaderConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/__init__.py b/unstructured/ingest/v2/cli/cmds/fsspec/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/azure.py b/unstructured/ingest/v2/cli/cmds/fsspec/azure.py
deleted file mode 100644
index c5bdd2ab3..000000000
--- a/unstructured/ingest/v2/cli/cmds/fsspec/azure.py
+++ /dev/null
@@ -1,84 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd
-from unstructured.ingest.v2.cli.cmds.fsspec.fsspec import (
-    FsspecCliDownloadConfig,
-    FsspecCliIndexerConfig,
-    FsspecCliUploaderConfig,
-)
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.processes.connectors.fsspec.azure import (
-    CONNECTOR_TYPE,
-)
-
-
-@dataclass
-class AzureCliDownloadConfig(FsspecCliDownloadConfig):
-    pass
-
-
-@dataclass
-class AzureCliIndexerConfig(FsspecCliIndexerConfig):
-    pass
-
-
-@dataclass
-class AzureCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--account-key"],
-                default=None,
-                help="The storage account key. This is used for shared key "
-                "authentication. If any of account key, sas token or "
-                "client_id are not specified, anonymous access will be used.",
-            ),
-            click.Option(
-                ["--account-name"],
-                default=None,
-                help="The storage account name. This is used to authenticate "
-                "requests signed with an account key and to construct "
-                "the storage endpoint. It is required unless a connection "
-                "string is given, or if a custom domain is used with "
-                "anonymous authentication.",
-            ),
-            click.Option(
-                ["--connection-string"],
-                default=None,
-                help="If specified, this will override all other parameters. See "
-                "http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/ "  # noqa: E501
-                "for the connection string format.",
-            ),
-            click.Option(
-                ["--sas_token"],
-                default=None,
-                help="A shared access signature token to use to authenticate "
-                "requests instead of the account key. If account key and "
-                "sas token are both specified, account key will be used "
-                "to sign. If any of account key, sas token or client_id "
-                "are not specified, anonymous access will be used.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class AzureUploaderConfig(FsspecCliUploaderConfig):
-    pass
-
-
-azure_src_cmd = SrcCmd(
-    cmd_name=CONNECTOR_TYPE,
-    indexer_config=AzureCliIndexerConfig,
-    connection_config=AzureCliConnectionConfig,
-    downloader_config=AzureCliDownloadConfig,
-)
-
-azure_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=AzureCliConnectionConfig,
-    uploader_config=AzureUploaderConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/box.py b/unstructured/ingest/v2/cli/cmds/fsspec/box.py
deleted file mode 100644
index 99241b917..000000000
--- a/unstructured/ingest/v2/cli/cmds/fsspec/box.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd
-from unstructured.ingest.v2.cli.cmds.fsspec.fsspec import (
-    FsspecCliDownloadConfig,
-    FsspecCliIndexerConfig,
-    FsspecCliUploaderConfig,
-)
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.processes.connectors.fsspec.box import (
-    CONNECTOR_TYPE,
-)
-
-
-@dataclass
-class BoxCliDownloadConfig(FsspecCliDownloadConfig):
-    pass
-
-
-@dataclass
-class BoxCliIndexerConfig(FsspecCliIndexerConfig):
-    pass
-
-
-@dataclass
-class BoxCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--box-app-config"],
-                default=None,
-                type=click.Path(),
-                help="Path to Box app credentials as json file.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class BoxUploaderConfig(FsspecCliUploaderConfig):
-    pass
-
-
-box_src_cmd = SrcCmd(
-    cmd_name=CONNECTOR_TYPE,
-    indexer_config=BoxCliIndexerConfig,
-    connection_config=BoxCliConnectionConfig,
-    downloader_config=BoxCliDownloadConfig,
-)
-
-box_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=BoxCliConnectionConfig,
-    uploader_config=BoxUploaderConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/dropbox.py b/unstructured/ingest/v2/cli/cmds/fsspec/dropbox.py
deleted file mode 100644
index 7b7c4406d..000000000
--- a/unstructured/ingest/v2/cli/cmds/fsspec/dropbox.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd
-from unstructured.ingest.v2.cli.cmds.fsspec.fsspec import (
-    FsspecCliDownloadConfig,
-    FsspecCliIndexerConfig,
-    FsspecCliUploaderConfig,
-)
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.processes.connectors.fsspec.dropbox import (
-    CONNECTOR_TYPE,
-)
-
-
-@dataclass
-class DropboxCliDownloadConfig(FsspecCliDownloadConfig):
-    pass
-
-
-@dataclass
-class DropboxCliIndexerConfig(FsspecCliIndexerConfig):
-    pass
-
-
-@dataclass
-class DropboxCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--token"],
-                required=True,
-                type=str,
-                help="Dropbox access token.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class DropboxUploaderConfig(FsspecCliUploaderConfig):
-    pass
-
-
-dropbox_src_cmd = SrcCmd(
-    cmd_name=CONNECTOR_TYPE,
-    indexer_config=DropboxCliIndexerConfig,
-    connection_config=DropboxCliConnectionConfig,
-    downloader_config=DropboxCliDownloadConfig,
-)
-
-dropbox_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=DropboxCliConnectionConfig,
-    uploader_config=DropboxUploaderConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/fsspec.py b/unstructured/ingest/v2/cli/cmds/fsspec/fsspec.py
deleted file mode 100644
index 858586c76..000000000
--- a/unstructured/ingest/v2/cli/cmds/fsspec/fsspec.py
+++ /dev/null
@@ -1,77 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.cli.utils import DelimitedString
-
-
-@dataclass
-class FsspecCliDownloadConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        return [
-            click.Option(
-                ["--download-dir"],
-                help="Where files are downloaded to, defaults to a location at"
-                "`$HOME/.cache/unstructured/ingest/<connector name>/<SHA256>`.",
-            ),
-        ]
-
-
-@dataclass
-class FsspecCliFileConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        return [
-            click.Option(
-                ["--remote-url"],
-                required=True,
-                help="Remote fsspec URL formatted as `protocol://dir/path`",
-            )
-        ]
-
-
-@dataclass
-class FsspecCliUploaderConfig(FsspecCliFileConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = super(FsspecCliUploaderConfig, FsspecCliUploaderConfig).get_cli_options()
-        options.extend(
-            [
-                click.Option(
-                    ["--overwrite"],
-                    is_flag=True,
-                    default=False,
-                    show_default=True,
-                    help="If set, will overwrite content if content already exists",
-                )
-            ]
-        )
-        return options
-
-
-@dataclass
-class FsspecCliIndexerConfig(FsspecCliFileConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = super(FsspecCliIndexerConfig, FsspecCliIndexerConfig).get_cli_options()
-        options.extend(
-            [
-                click.Option(
-                    ["--recursive"],
-                    is_flag=True,
-                    default=False,
-                    help="Recursively download files in their respective folders "
-                    "otherwise stop at the files in provided folder level.",
-                ),
-                click.Option(
-                    ["--file-glob"],
-                    default=None,
-                    type=DelimitedString(),
-                    help="A comma-separated list of file globs to limit which types of "
-                    "local files are accepted, e.g. '*.html,*.txt'",
-                ),
-            ]
-        )
-        return options
diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/gcs.py b/unstructured/ingest/v2/cli/cmds/fsspec/gcs.py
deleted file mode 100644
index 7464d7769..000000000
--- a/unstructured/ingest/v2/cli/cmds/fsspec/gcs.py
+++ /dev/null
@@ -1,81 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd
-from unstructured.ingest.v2.cli.cmds.fsspec.fsspec import (
-    FsspecCliDownloadConfig,
-    FsspecCliIndexerConfig,
-    FsspecCliUploaderConfig,
-)
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.cli.utils import FileOrJson
-from unstructured.ingest.v2.processes.connectors.fsspec.gcs import (
-    CONNECTOR_TYPE,
-)
-
-
-@dataclass
-class GcsCliDownloadConfig(FsspecCliDownloadConfig):
-    pass
-
-
-@dataclass
-class GcsCliIndexerConfig(FsspecCliIndexerConfig):
-    pass
-
-
-@dataclass
-class GcsCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        help_string = """
-          Options:
-          - ``None``, GCSFS will attempt to guess your credentials in the
-        following order: gcloud CLI default, gcsfs cached token, google compute
-        metadata service, anonymous.
-          - ``'google_default'``, your default gcloud credentials will be used,
-            which are typically established by doing ``gcloud login`` in a terminal.
-          - ``'cache'``, credentials from previously successful gcsfs
-            authentication will be used (use this after "browser" auth succeeded)
-          - ``'anon'``, no authentication is performed, and you can only
-            access data which is accessible to allUsers (in this case, the project and
-            access level parameters are meaningless)
-          - ``'browser'``, you get an access code with which you can
-            authenticate via a specially provided URL
-          - if ``'cloud'``, we assume we are running within google compute
-            or google container engine, and query the internal metadata directly for
-            a token.
-          - you may supply a token generated by the
-            [gcloud](https://cloud.google.com/sdk/docs/)
-            utility; this is either a python dictionary or the name of a file
-            containing the JSON returned by logging in with the gcloud CLI tool.
-          """
-        options = [
-            click.Option(
-                ["--service-account-key"],
-                default=None,
-                type=FileOrJson(allow_raw_str=True),
-                help=help_string,
-            ),
-        ]
-        return options
-
-
-@dataclass
-class GcsUploaderConfig(FsspecCliUploaderConfig):
-    pass
-
-
-gcs_src_cmd = SrcCmd(
-    cmd_name=CONNECTOR_TYPE,
-    indexer_config=GcsCliIndexerConfig,
-    connection_config=GcsCliConnectionConfig,
-    downloader_config=GcsCliDownloadConfig,
-)
-
-gcs_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=GcsCliConnectionConfig,
-    uploader_config=GcsUploaderConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/s3.py b/unstructured/ingest/v2/cli/cmds/fsspec/s3.py
deleted file mode 100644
index 4af72d4d4..000000000
--- a/unstructured/ingest/v2/cli/cmds/fsspec/s3.py
+++ /dev/null
@@ -1,84 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd
-from unstructured.ingest.v2.cli.cmds.fsspec.fsspec import (
-    FsspecCliDownloadConfig,
-    FsspecCliIndexerConfig,
-    FsspecCliUploaderConfig,
-)
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.processes.connectors.fsspec.s3 import (
-    CONNECTOR_TYPE,
-)
-
-
-@dataclass
-class S3CliDownloadConfig(FsspecCliDownloadConfig):
-    pass
-
-
-@dataclass
-class S3CliIndexerConfig(FsspecCliIndexerConfig):
-    pass
-
-
-@dataclass
-class S3CliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--anonymous"],
-                is_flag=True,
-                default=False,
-                help="Connect to s3 without local AWS credentials.",
-            ),
-            click.Option(
-                ["--endpoint-url"],
-                type=str,
-                default=None,
-                help="Use this endpoint_url, if specified. Needed for "
-                "connecting to non-AWS S3 buckets.",
-            ),
-            click.Option(
-                ["--key"],
-                type=str,
-                default=None,
-                help="If not anonymous, use this access key ID, if specified. Takes precedence "
-                "over `aws_access_key_id` in client_kwargs.",
-            ),
-            click.Option(
-                ["--secret"],
-                type=str,
-                default=None,
-                help="If not anonymous, use this secret access key, if specified.",
-            ),
-            click.Option(
-                ["--token"],
-                type=str,
-                default=None,
-                help="If not anonymous, use this security token, if specified.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class S3UploaderConfig(FsspecCliUploaderConfig):
-    pass
-
-
-s3_src_cmd = SrcCmd(
-    cmd_name=CONNECTOR_TYPE,
-    indexer_config=S3CliIndexerConfig,
-    connection_config=S3CliConnectionConfig,
-    downloader_config=S3CliDownloadConfig,
-)
-
-s3_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=S3CliConnectionConfig,
-    uploader_config=S3UploaderConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/sftp.py b/unstructured/ingest/v2/cli/cmds/fsspec/sftp.py
deleted file mode 100644
index b4bfcb6c8..000000000
--- a/unstructured/ingest/v2/cli/cmds/fsspec/sftp.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd
-from unstructured.ingest.v2.cli.cmds.fsspec.fsspec import (
-    FsspecCliDownloadConfig,
-    FsspecCliIndexerConfig,
-    FsspecCliUploaderConfig,
-)
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.processes.connectors.fsspec.sftp import (
-    CONNECTOR_TYPE,
-)
-
-
-@dataclass
-class SftpCliDownloadConfig(FsspecCliDownloadConfig):
-    pass
-
-
-@dataclass
-class SftpCliIndexerConfig(FsspecCliIndexerConfig):
-    pass
-
-
-@dataclass
-class SftpCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--username"],
-                required=True,
-                type=str,
-                help="Username for sftp connection",
-            ),
-            click.Option(
-                ["--password"],
-                required=True,
-                type=str,
-                help="Password for sftp connection",
-            ),
-            click.Option(
-                ["--look-for-keys"],
-                required=False,
-                default=False,
-                is_flag=True,
-                type=bool,
-                help="Whether to search for private key files in ~/.ssh/",
-            ),
-            click.Option(
-                ["--allow-agent"],
-                required=False,
-                default=False,
-                is_flag=True,
-                type=bool,
-                help="Whether to connect to the SSH agent.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class SftpUploaderConfig(FsspecCliUploaderConfig):
-    pass
-
-
-sftp_src_cmd = SrcCmd(
-    cmd_name=CONNECTOR_TYPE,
-    indexer_config=SftpCliIndexerConfig,
-    connection_config=SftpCliConnectionConfig,
-    downloader_config=SftpCliDownloadConfig,
-)
-
-sftp_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=SftpCliConnectionConfig,
-    uploader_config=SftpUploaderConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/google_drive.py b/unstructured/ingest/v2/cli/cmds/google_drive.py
deleted file mode 100644
index 2a8d7960c..000000000
--- a/unstructured/ingest/v2/cli/cmds/google_drive.py
+++ /dev/null
@@ -1,74 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import SrcCmd
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.cli.utils import DelimitedString, FileOrJson
-from unstructured.ingest.v2.processes.connectors.google_drive import CONNECTOR_TYPE
-
-
-@dataclass
-class GoogleDriveCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--drive-id"],
-                required=True,
-                type=str,
-                help="Google Drive File or Folder ID.",
-            ),
-            click.Option(
-                ["--service-account-key"],
-                required=True,
-                type=FileOrJson(),
-                help="Either the file path of the credentials file to use or a json string of "
-                "those values to use for authentication",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class GoogleDriveCliIndexerConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--extensions"],
-                default=None,
-                type=DelimitedString(),
-                help="Filters the files to be processed based on extension e.g. jpg, docx, etc.",
-            ),
-            click.Option(
-                ["--recursive"],
-                is_flag=True,
-                default=False,
-                help="Recursively download files in their respective folders "
-                "otherwise stop at the files in provided folder level.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class GoogleDriveCliDownloadConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--download-dir"],
-                help="Where files are downloaded to, defaults to a location at"
-                "`$HOME/.cache/unstructured/ingest/<connector name>/<SHA256>`.",
-            ),
-        ]
-        return options
-
-
-google_drive_src_cmd = SrcCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=GoogleDriveCliConnectionConfig,
-    indexer_config=GoogleDriveCliIndexerConfig,
-    downloader_config=GoogleDriveCliDownloadConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/local.py b/unstructured/ingest/v2/cli/cmds/local.py
deleted file mode 100644
index f9ab17308..000000000
--- a/unstructured/ingest/v2/cli/cmds/local.py
+++ /dev/null
@@ -1,60 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.cli.utils import DelimitedString
-from unstructured.ingest.v2.processes.connectors.local import CONNECTOR_TYPE
-
-
-@dataclass
-class LocalCliIndexerConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--input-path"],
-                required=True,
-                type=click.Path(file_okay=True, dir_okay=True, exists=True),
-                help="Path to the location in the local file system that will be processed.",
-            ),
-            click.Option(
-                ["--file-glob"],
-                default=None,
-                type=DelimitedString(),
-                help="A comma-separated list of file globs to limit which types of "
-                "local files are accepted, e.g. '*.html,*.txt'",
-            ),
-            click.Option(
-                ["--recursive"],
-                is_flag=True,
-                default=False,
-                help="Recursively download files in their respective folders "
-                "otherwise stop at the files in provided folder level.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class LocalCliUploaderConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--output-dir"],
-                required=True,
-                type=str,
-                help="Local path to write partitioned output to",
-            )
-        ]
-        return options
-
-
-local_src_cmd = SrcCmd(
-    cmd_name=CONNECTOR_TYPE,
-    indexer_config=LocalCliIndexerConfig,
-)
-
-local_dest_cmd = DestCmd(cmd_name=CONNECTOR_TYPE, uploader_config=LocalCliUploaderConfig)
diff --git a/unstructured/ingest/v2/cli/cmds/mongodb.py b/unstructured/ingest/v2/cli/cmds/mongodb.py
deleted file mode 100644
index 49ad3e53d..000000000
--- a/unstructured/ingest/v2/cli/cmds/mongodb.py
+++ /dev/null
@@ -1,62 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.processes.connectors.mongodb import CONNECTOR_TYPE
-
-
-@dataclass
-class MongoDBCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--uri"],
-                help="URI to user when connecting",
-            ),
-            click.Option(
-                ["--host"],
-                help="hostname or IP address or Unix domain socket path of a single mongod or "
-                "mongos instance to connect to, or a list of hostnames",
-            ),
-            click.Option(["--port"], type=int, default=27017),
-            click.Option(
-                ["--database"], type=str, required=True, help="database name to connect to"
-            ),
-            click.Option(
-                ["--collection"], required=True, type=str, help="collection name to connect to"
-            ),
-        ]
-        return options
-
-
-@dataclass
-class MongoDBCliUploaderConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--batch-size"],
-                default=100,
-                type=int,
-                help="Number of records per batch",
-            )
-        ]
-        return options
-
-
-@dataclass
-class MongoDBCliUploadStagerConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        return []
-
-
-mongodb_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=MongoDBCliConnectionConfig,
-    uploader_config=MongoDBCliUploaderConfig,
-    upload_stager_config=MongoDBCliUploadStagerConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/onedrive.py b/unstructured/ingest/v2/cli/cmds/onedrive.py
deleted file mode 100644
index d9bc7df2c..000000000
--- a/unstructured/ingest/v2/cli/cmds/onedrive.py
+++ /dev/null
@@ -1,91 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import SrcCmd
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.processes.connectors.onedrive import CONNECTOR_TYPE
-
-
-@dataclass
-class OnedriveCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--client-id"],
-                required=True,
-                type=str,
-                help="Microsoft app client ID",
-            ),
-            click.Option(
-                ["--client-cred"],
-                required=True,
-                type=str,
-                help="Microsoft App client secret",
-            ),
-            click.Option(
-                ["--user-pname"],
-                required=True,
-                type=str,
-                help="User principal name, usually is your Azure AD email.",
-            ),
-            click.Option(
-                ["--tenant"],
-                default="common",
-                type=str,
-                help="ID or domain name associated with your Azure AD instance",
-            ),
-            click.Option(
-                ["--authority-url"],
-                default="https://login.microsoftonline.com",
-                type=str,
-                help="Authentication token provider for Microsoft apps, default is "
-                "https://login.microsoftonline.com",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class OnedriveCliIndexerConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--path"],
-                default=None,
-                type=str,
-                help="Folder to start parsing files from.",
-            ),
-            click.Option(
-                ["--recursive"],
-                is_flag=True,
-                default=False,
-                help="Recursively download files in their respective folders "
-                "otherwise stop at the files in provided folder level.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class OnedriveCliDownloadConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--download-dir"],
-                help="Where files are downloaded to, defaults to a location at"
-                "`$HOME/.cache/unstructured/ingest/<connector name>/<SHA256>`.",
-            ),
-        ]
-        return options
-
-
-onedrive_drive_src_cmd = SrcCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=OnedriveCliConnectionConfig,
-    indexer_config=OnedriveCliIndexerConfig,
-    downloader_config=OnedriveCliDownloadConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/opensearch.py b/unstructured/ingest/v2/cli/cmds/opensearch.py
deleted file mode 100644
index 8d93b7be3..000000000
--- a/unstructured/ingest/v2/cli/cmds/opensearch.py
+++ /dev/null
@@ -1,93 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd
-from unstructured.ingest.v2.cli.cmds.elasticsearch import (
-    ElasticsearchCliDownloadConfig,
-    ElasticsearchCliIndexerConfig,
-    ElasticsearchCliUploadStagerConfig,
-    ElasticsearchUploaderConfig,
-)
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.cli.utils import DelimitedString
-from unstructured.ingest.v2.processes.connectors.opensearch import CONNECTOR_TYPE
-
-
-@dataclass
-class OpenSearchCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--hosts"],
-                type=DelimitedString(),
-                help='List of the OpenSearch hosts to connect to, e.g. "http://localhost:9200"',
-            ),
-            click.Option(
-                ["--username"], type=str, default=None, help="username when using basic auth"
-            ),
-            click.Option(
-                ["--password"],
-                type=str,
-                default=None,
-                help="password when using basic auth",
-            ),
-            click.Option(
-                ["--use-ssl"],
-                type=bool,
-                default=False,
-                is_flag=True,
-                help="use ssl for the connection",
-            ),
-            click.Option(
-                ["--verify-certs"],
-                type=bool,
-                default=False,
-                is_flag=True,
-                help="whether to verify SSL certificates",
-            ),
-            click.Option(
-                ["--ssl-show-warn"],
-                type=bool,
-                default=False,
-                is_flag=True,
-                help="show warning when verify certs is disabled",
-            ),
-            click.Option(
-                ["--ca-certs"],
-                type=click.Path(),
-                default=None,
-                help="path to CA bundle",
-            ),
-            click.Option(
-                ["--client-cert"],
-                type=click.Path(),
-                default=None,
-                help="path to the file containing the private key and the certificate,"
-                " or cert only if using client_key",
-            ),
-            click.Option(
-                ["--client-key"],
-                type=click.Path(),
-                default=None,
-                help="path to the file containing the private key"
-                " if using separate cert and key files",
-            ),
-        ]
-        return options
-
-
-opensearch_src_cmd = SrcCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=OpenSearchCliConnectionConfig,
-    indexer_config=ElasticsearchCliIndexerConfig,
-    downloader_config=ElasticsearchCliDownloadConfig,
-)
-
-opensearch_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=OpenSearchCliConnectionConfig,
-    upload_stager_config=ElasticsearchCliUploadStagerConfig,
-    uploader_config=ElasticsearchUploaderConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/pinecone.py b/unstructured/ingest/v2/cli/cmds/pinecone.py
deleted file mode 100644
index 010cc703c..000000000
--- a/unstructured/ingest/v2/cli/cmds/pinecone.py
+++ /dev/null
@@ -1,62 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.processes.connectors.pinecone import CONNECTOR_TYPE
-
-
-@dataclass
-class PineconeCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--api-key"],
-                required=True,
-                type=str,
-                help="API key for Pinecone.",
-            ),
-            click.Option(
-                ["--index-name"],
-                required=True,
-                type=str,
-                help="Name of the index to connect to. Example: my-index",
-            ),
-            click.Option(
-                ["--environment"],
-                required=True,
-                type=str,
-                help="Environment to connect to. Example: us-east-1",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class PineconeCliUploaderConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--batch-size"],
-                default=100,
-                type=int,
-                help="Number of records per batch",
-            ),
-            click.Option(
-                ["--num-processes"],
-                default=4,
-                type=int,
-                help="Number of processes to use for uploading",
-            ),
-        ]
-        return options
-
-
-pinecone_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=PineconeCliConnectionConfig,
-    uploader_config=PineconeCliUploaderConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/salesforce.py b/unstructured/ingest/v2/cli/cmds/salesforce.py
deleted file mode 100644
index ac910b546..000000000
--- a/unstructured/ingest/v2/cli/cmds/salesforce.py
+++ /dev/null
@@ -1,79 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import SrcCmd
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.cli.utils import DelimitedString
-from unstructured.ingest.v2.processes.connectors.salesforce import (
-    ACCEPTED_CATEGORIES,
-    CONNECTOR_TYPE,
-)
-
-
-@dataclass
-class SalesforceCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--username"],
-                required=True,
-                type=str,
-                help="Salesforce username usually looks like an email.",
-            ),
-            click.Option(
-                ["--consumer-key"],
-                required=True,
-                type=str,
-                help="For the Salesforce JWT auth. Found in Consumer Details.",
-            ),
-            click.Option(
-                ["--private-key"],
-                required=True,
-                type=str,
-                help="Path to the private key or its contents for the Salesforce JWT auth. "
-                "Key file is usually named server.key.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class SalesforceCliIndexerConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        possible_categories = ACCEPTED_CATEGORIES
-        options = [
-            click.Option(
-                ["--categories"],
-                default=None,
-                required=True,
-                type=DelimitedString(choices=possible_categories),
-                help="Comma-delimited salesforce categories to download. "
-                "Currently only {}.".format(", ".join(possible_categories)),
-            ),
-        ]
-        return options
-
-
-@dataclass
-class SalesforceCliDownloadConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--download-dir"],
-                help="Where files are downloaded to, defaults to a location at"
-                "`$HOME/.cache/unstructured/ingest/<connector name>/<SHA256>`.",
-            ),
-        ]
-        return options
-
-
-salesforce_src_cmd = SrcCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=SalesforceCliConnectionConfig,
-    indexer_config=SalesforceCliIndexerConfig,
-    downloader_config=SalesforceCliDownloadConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/sharepoint.py b/unstructured/ingest/v2/cli/cmds/sharepoint.py
deleted file mode 100644
index 27d5cf3ed..000000000
--- a/unstructured/ingest/v2/cli/cmds/sharepoint.py
+++ /dev/null
@@ -1,112 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import SrcCmd
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.processes.connectors.sharepoint import CONNECTOR_TYPE
-
-
-@dataclass
-class SharepointCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--client-id"],
-                default=None,
-                type=str,
-                help="Sharepoint app client ID",
-            ),
-            click.Option(
-                ["--client-cred"],
-                default=None,
-                type=str,
-                help="Sharepoint app secret",
-            ),
-            click.Option(
-                ["--site"],
-                default=None,
-                type=str,
-                help="Sharepoint site url. Process either base url e.g \
-                    https://[tenant].sharepoint.com  or relative sites \
-                    https://[tenant].sharepoint.com/sites/<site_name>. \
-                    To process all sites within the tenant pass a site url as \
-                    https://[tenant]-admin.sharepoint.com.\
-                    This requires the app to be registered at a tenant level",
-            ),
-            click.Option(
-                ["--permissions-application-id"],
-                type=str,
-                help="Microsoft Graph API application id",
-            ),
-            click.Option(
-                ["--permissions-client-cred"],
-                type=str,
-                help="Microsoft Graph API application credentials",
-            ),
-            click.Option(
-                ["--permissions-tenant"],
-                type=str,
-                help="e.g https://contoso.onmicrosoft.com to get permissions data within tenant.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class SharepointCliIndexerConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--path"],
-                default=None,
-                type=str,
-                help="Path from which to start parsing files. If the connector is to \
-                process all sites within the tenant this filter will be applied to \
-                all sites document libraries.",
-            ),
-            click.Option(
-                ["--recursive"],
-                is_flag=True,
-                default=False,
-                help="Recursively download files in their respective folders "
-                "otherwise stop at the files in provided folder level.",
-            ),
-            click.Option(
-                ["--omit-files"],
-                is_flag=True,
-                default=False,
-                help="Don't process files.",
-            ),
-            click.Option(
-                ["--omit-pages"],
-                is_flag=True,
-                default=False,
-                help="Don't process site pages.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class SharepointCliDownloadConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--download-dir"],
-                help="Where files are downloaded to, defaults to a location at"
-                "`$HOME/.cache/unstructured/ingest/<connector name>/<SHA256>`.",
-            ),
-        ]
-        return options
-
-
-sharepoint_drive_src_cmd = SrcCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=SharepointCliConnectionConfig,
-    indexer_config=SharepointCliIndexerConfig,
-    downloader_config=SharepointCliDownloadConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/singlestore.py b/unstructured/ingest/v2/cli/cmds/singlestore.py
deleted file mode 100644
index 1b7809d09..000000000
--- a/unstructured/ingest/v2/cli/cmds/singlestore.py
+++ /dev/null
@@ -1,96 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.processes.connectors.singlestore import CONNECTOR_TYPE
-
-
-@dataclass
-class SingleStoreCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--host"],
-                required=False,
-                type=str,
-                default=None,
-                help="SingleStore host",
-            ),
-            click.Option(
-                ["--port"],
-                required=False,
-                type=int,
-                default=None,
-                help="SingleStore port",
-            ),
-            click.Option(
-                ["--user"],
-                required=False,
-                type=str,
-                default=None,
-                help="SingleStore user",
-            ),
-            click.Option(
-                ["--password"],
-                required=False,
-                type=str,
-                default=None,
-                help="SingleStore password",
-            ),
-            click.Option(
-                ["--database"],
-                required=False,
-                type=str,
-                default=None,
-                help="SingleStore database",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class SingleStoreCliUploaderConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--drop-empty-cols"],
-                required=False,
-                type=bool,
-                is_flag=True,
-                default=False,
-                help="Drop any columns that have no data",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class SingleStoreCliUploadStagerConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        return [
-            click.Option(
-                ["--table-name"],
-                required=False,
-                type=str,
-                help="SingleStore table to write contents to",
-            ),
-            click.Option(
-                ["--batch-size"],
-                required=False,
-                type=click.IntRange(min=1),
-                help="Batch size when writing to SingleStore",
-            ),
-        ]
-
-
-singlestore_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=SingleStoreCliConnectionConfig,
-    uploader_config=SingleStoreCliUploaderConfig,
-    upload_stager_config=SingleStoreCliUploadStagerConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/sql.py b/unstructured/ingest/v2/cli/cmds/sql.py
deleted file mode 100644
index b36f3c3ac..000000000
--- a/unstructured/ingest/v2/cli/cmds/sql.py
+++ /dev/null
@@ -1,84 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.processes.connectors.sql import CONNECTOR_TYPE
-
-SQL_DRIVERS = {"postgresql", "sqlite"}
-
-
-@dataclass
-class SQLCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--db-type"],
-                required=True,
-                type=click.Choice(SQL_DRIVERS),
-                help="Type of the database backend",
-            ),
-            click.Option(
-                ["--username"],
-                default=None,
-                type=str,
-                help="DB username",
-            ),
-            click.Option(
-                ["--password"],
-                default=None,
-                type=str,
-                help="DB password",
-            ),
-            click.Option(
-                ["--host"],
-                default=None,
-                type=str,
-                help="DB host",
-            ),
-            click.Option(
-                ["--port"],
-                default=None,
-                type=int,
-                help="DB host connection port",
-            ),
-            click.Option(
-                ["--database"],
-                default=None,
-                type=str,
-                help="Database name. For sqlite databases, this is the path to the .db file.",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class SQLCliUploaderConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--batch-size"],
-                default=100,
-                type=int,
-                help="Number of records per batch",
-            )
-        ]
-        return options
-
-
-@dataclass
-class SQLCliUploadStagerConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        return []
-
-
-sql_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=SQLCliConnectionConfig,
-    uploader_config=SQLCliUploaderConfig,
-    upload_stager_config=SQLCliUploadStagerConfig,
-)
diff --git a/unstructured/ingest/v2/cli/cmds/weaviate.py b/unstructured/ingest/v2/cli/cmds/weaviate.py
deleted file mode 100644
index aaa051d05..000000000
--- a/unstructured/ingest/v2/cli/cmds/weaviate.py
+++ /dev/null
@@ -1,100 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.base import DestCmd
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.cli.utils import DelimitedString
-from unstructured.ingest.v2.processes.connectors.weaviate import CONNECTOR_TYPE
-
-
-@dataclass
-class WeaviateCliConnectionConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--host-url"],
-                required=True,
-                help="Weaviate instance url",
-            ),
-            click.Option(
-                ["--class-name"],
-                default=None,
-                type=str,
-                help="Name of the class to push the records into, e.g: Pdf-elements",
-            ),
-            click.Option(
-                ["--access-token"], default=None, type=str, help="Used to create the bearer token."
-            ),
-            click.Option(
-                ["--refresh-token"],
-                default=None,
-                type=str,
-                help="Will tie this value to the bearer token. If not provided, "
-                "the authentication will expire once the lifetime of the access token is up.",
-            ),
-            click.Option(
-                ["--api-key"],
-                default=None,
-                type=str,
-            ),
-            click.Option(
-                ["--client-secret"],
-                default=None,
-                type=str,
-            ),
-            click.Option(
-                ["--scope"],
-                default=None,
-                type=DelimitedString(),
-            ),
-            click.Option(
-                ["--username"],
-                default=None,
-                type=str,
-            ),
-            click.Option(
-                ["--password"],
-                default=None,
-                type=str,
-            ),
-            click.Option(
-                ["--anonymous"],
-                is_flag=True,
-                default=False,
-                type=bool,
-                help="if set, all auth values will be ignored",
-            ),
-        ]
-        return options
-
-
-@dataclass
-class WeaviateCliUploaderConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--batch-size"],
-                default=100,
-                type=int,
-                help="Number of records per batch",
-            )
-        ]
-        return options
-
-
-@dataclass
-class WeaviateCliUploadStagerConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        return []
-
-
-weaviate_dest_cmd = DestCmd(
-    cmd_name=CONNECTOR_TYPE,
-    connection_config=WeaviateCliConnectionConfig,
-    uploader_config=WeaviateCliUploaderConfig,
-    upload_stager_config=WeaviateCliUploadStagerConfig,
-)
diff --git a/unstructured/ingest/v2/cli/configs/__init__.py b/unstructured/ingest/v2/cli/configs/__init__.py
deleted file mode 100644
index 2b3a42192..000000000
--- a/unstructured/ingest/v2/cli/configs/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from .chunk import ChunkerCliConfig
-from .embed import EmbedderCliConfig
-from .partition import PartitionerCliConfig
-from .processor import ProcessorCliConfig
-
-__all__ = ["ChunkerCliConfig", "ProcessorCliConfig", "PartitionerCliConfig", "EmbedderCliConfig"]
diff --git a/unstructured/ingest/v2/cli/configs/chunk.py b/unstructured/ingest/v2/cli/configs/chunk.py
deleted file mode 100644
index b6f79641d..000000000
--- a/unstructured/ingest/v2/cli/configs/chunk.py
+++ /dev/null
@@ -1,89 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.chunking import CHUNK_MAX_CHARS_DEFAULT, CHUNK_MULTI_PAGE_DEFAULT
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-
-
-@dataclass
-class ChunkerCliConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--chunking-strategy"],
-                type=str,
-                default=None,
-                help="The rule-set to use to form chunks. Omit to disable chunking.",
-            ),
-            click.Option(
-                ["--chunk-combine-text-under-n-chars"],
-                type=int,
-                help=(
-                    "Combine consecutive chunks when the first does not exceed this length and"
-                    " the second will fit without exceeding the hard-maximum length. Only"
-                    " operative for 'by_title' chunking-strategy."
-                ),
-            ),
-            click.Option(
-                ["--chunk-include-orig-elements/--chunk-no-include-orig-elements"],
-                is_flag=True,
-                default=True,
-                help=(
-                    "When chunking, add the original elements consolidated to form each chunk to"
-                    " `.metadata.orig_elements` on that chunk."
-                ),
-            ),
-            click.Option(
-                ["--chunk-max-characters"],
-                type=int,
-                default=CHUNK_MAX_CHARS_DEFAULT,
-                show_default=True,
-                help=(
-                    "Hard maximum chunk length. No chunk will exceed this length. An oversized"
-                    " element will be divided by text-splitting to fit this window."
-                ),
-            ),
-            click.Option(
-                ["--chunk-multipage-sections/--chunk-no-multipage-sections"],
-                is_flag=True,
-                default=CHUNK_MULTI_PAGE_DEFAULT,
-                help=(
-                    "Ignore page boundaries when chunking such that elements from two different"
-                    " pages can appear in the same chunk. Only operative for 'by_title'"
-                    " chunking-strategy."
-                ),
-            ),
-            click.Option(
-                ["--chunk-new-after-n-chars"],
-                type=int,
-                help=(
-                    "Soft-maximum chunk length. Another element will not be added to a chunk of"
-                    " this length even when it would fit without exceeding the hard-maximum"
-                    " length."
-                ),
-            ),
-            click.Option(
-                ["--chunk-overlap"],
-                type=int,
-                default=0,
-                show_default=True,
-                help=(
-                    "Prefix chunk text with last overlap=N characters of prior chunk. Only"
-                    " applies to oversized chunks divided by text-splitting. To apply overlap to"
-                    " non-oversized chunks use the --overlap-all option."
-                ),
-            ),
-            click.Option(
-                ["--chunk-overlap-all"],
-                is_flag=True,
-                default=False,
-                help=(
-                    "Apply overlap to chunks formed from whole elements as well as those formed"
-                    " by text-splitting oversized elements. Overlap length is take from --overlap"
-                    " option value."
-                ),
-            ),
-        ]
-        return options
diff --git a/unstructured/ingest/v2/cli/configs/embed.py b/unstructured/ingest/v2/cli/configs/embed.py
deleted file mode 100644
index 69f6bc657..000000000
--- a/unstructured/ingest/v2/cli/configs/embed.py
+++ /dev/null
@@ -1,74 +0,0 @@
-from dataclasses import dataclass
-from typing import Any
-
-import click
-from dataclasses_json.core import Json
-
-from unstructured.embed import EMBEDDING_PROVIDER_TO_CLASS_MAP
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-
-
-@dataclass
-class EmbedderCliConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--embedding-provider"],
-                help="Type of the embedding class to be used. Can be one of: "
-                f"{list(EMBEDDING_PROVIDER_TO_CLASS_MAP)}",
-                type=click.Choice(list(EMBEDDING_PROVIDER_TO_CLASS_MAP)),
-            ),
-            click.Option(
-                ["--embedding-api-key"],
-                help="API key for the embedding model, for the case an API key is needed.",
-                type=str,
-                default=None,
-            ),
-            click.Option(
-                ["--embedding-model-name"],
-                help="Embedding model name, if needed. "
-                "Chooses a particular LLM between different options, to embed with it.",
-                type=str,
-                default=None,
-            ),
-            click.Option(
-                ["--embedding-aws-access-key-id"],
-                help="AWS access key used for AWS-based embedders, such as bedrock",
-                type=str,
-                default=None,
-            ),
-            click.Option(
-                ["--embedding-aws-secret-access-key"],
-                help="AWS secret key used for AWS-based embedders, such as bedrock",
-                type=str,
-                default=None,
-            ),
-            click.Option(
-                ["--embedding-aws-region"],
-                help="AWS region used for AWS-based embedders, such as bedrock",
-                type=str,
-                default="us-west-2",
-            ),
-        ]
-        return options
-
-    @classmethod
-    def from_dict(cls, kvs: Json, **kwargs: Any):
-        """
-        Extension of the dataclass from_dict() to avoid a naming conflict with other CLI params.
-        This allows CLI arguments to be prepended with embedding_ during CLI invocation but
-        doesn't require that as part of the field names in this class
-        """
-        if isinstance(kvs, dict):
-            new_kvs = {
-                k[len("embedding_") :]: v  # noqa: E203
-                for k, v in kvs.items()
-                if k.startswith("embedding_")
-            }
-            if len(new_kvs.keys()) == 0:
-                return None
-            if not new_kvs.get("provider"):
-                return None
-            return super().from_dict(new_kvs, **kwargs)
-        return super().from_dict(kvs, **kwargs)
diff --git a/unstructured/ingest/v2/cli/configs/partition.py b/unstructured/ingest/v2/cli/configs/partition.py
deleted file mode 100644
index 5ec5c0dbe..000000000
--- a/unstructured/ingest/v2/cli/configs/partition.py
+++ /dev/null
@@ -1,99 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.cli.utils import DelimitedString, Dict
-
-
-@dataclass
-class PartitionerCliConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--strategy"],
-                default="auto",
-                help="The method that will be used to process the documents. "
-                "Default: auto. Other strategies include `fast` and `hi_res`.",
-            ),
-            click.Option(
-                ["--ocr-languages"],
-                default=None,
-                type=DelimitedString(delimiter="+"),
-                help="A list of language packs to specify which languages to use for OCR, "
-                "separated by '+' e.g. 'eng+deu' to use the English and German language packs. "
-                "The appropriate Tesseract "
-                "language pack needs to be installed.",
-            ),
-            click.Option(
-                ["--encoding"],
-                default=None,
-                help="Text encoding to use when reading documents. By default the encoding is "
-                "detected automatically.",
-            ),
-            click.Option(
-                ["--skip-infer-table-types"],
-                type=DelimitedString(),
-                default=None,
-                help="Optional list of document types to skip table extraction on",
-            ),
-            click.Option(
-                ["--additional-partition-args"],
-                type=Dict(),
-                help="A json string representation of values to pass through to partition()",
-            ),
-            click.Option(
-                ["--fields-include"],
-                type=DelimitedString(),
-                default=["element_id", "text", "type", "metadata", "embeddings"],
-                help="Comma-delimited list. If set, include the specified top-level "
-                "fields in an element.",
-            ),
-            click.Option(
-                ["--flatten-metadata"],
-                is_flag=True,
-                default=False,
-                help="Results in flattened json elements. "
-                "Specifically, the metadata key values are brought to "
-                "the top-level of the element, and the `metadata` key itself is removed.",
-            ),
-            click.Option(
-                ["--metadata-include"],
-                default=[],
-                type=DelimitedString(),
-                help="Comma-delimited list. If set, include the specified metadata "
-                "fields if they exist and drop all other fields. ",
-            ),
-            click.Option(
-                ["--metadata-exclude"],
-                default=[],
-                type=DelimitedString(),
-                help="Comma-delimited list. If set, drop the specified metadata "
-                "fields if they exist.",
-            ),
-            click.Option(
-                ["--partition-by-api"],
-                is_flag=True,
-                default=False,
-                help="Use a remote API to partition the files."
-                " Otherwise, use the function from partition.auto",
-            ),
-            click.Option(
-                ["--partition-endpoint"],
-                default="https://api.unstructured.io/general/v0/general",
-                help="If partitioning via api, use the following host. "
-                "Default: https://api.unstructured.io/general/v0/general",
-            ),
-            click.Option(
-                ["--api-key"],
-                default=None,
-                help="API Key for partition endpoint.",
-            ),
-            click.Option(
-                ["--hi-res-model-name"],
-                default=None,
-                help="Model name for hi-res strategy.",
-            ),
-        ]
-        return options
diff --git a/unstructured/ingest/v2/cli/configs/processor.py b/unstructured/ingest/v2/cli/configs/processor.py
deleted file mode 100644
index b9236fad5..000000000
--- a/unstructured/ingest/v2/cli/configs/processor.py
+++ /dev/null
@@ -1,88 +0,0 @@
-from dataclasses import dataclass
-
-import click
-
-from unstructured.ingest.v2.cli.interfaces import CliConfig
-from unstructured.ingest.v2.interfaces.processor import DEFAULT_WORK_DIR
-
-
-@dataclass
-class ProcessorCliConfig(CliConfig):
-    @staticmethod
-    def get_cli_options() -> list[click.Option]:
-        options = [
-            click.Option(
-                ["--reprocess"],
-                is_flag=True,
-                default=False,
-                help="Reprocess a downloaded file even if the relevant structured "
-                "output .json file in output directory already exists.",
-            ),
-            click.Option(
-                ["--work-dir"],
-                type=str,
-                default=DEFAULT_WORK_DIR,
-                show_default=True,
-                help="Where to place working files when processing each step",
-            ),
-            click.Option(
-                ["--num-processes"],
-                default=2,
-                show_default=True,
-                type=click.IntRange(min=1),
-                help="Number of parallel processes with which to process docs",
-            ),
-            click.Option(
-                ["--max-connections"],
-                default=None,
-                show_default=True,
-                type=click.IntRange(min=1),
-                help="Max number of connections allowed when running an async step",
-            ),
-            click.Option(
-                ["--raise-on-error"],
-                is_flag=True,
-                default=False,
-                help="Is set, will raise error if any doc in the pipeline fail. Otherwise will "
-                "log error and continue with other docs",
-            ),
-            click.Option(
-                ["--re-download"],
-                is_flag=True,
-                default=False,
-                help="Re-download files even if they are already present in download dir.",
-            ),
-            click.Option(
-                ["--preserve-downloads"],
-                is_flag=True,
-                default=False,
-                help="Preserve downloaded files. Otherwise each file is removed "
-                "after being processed successfully.",
-            ),
-            click.Option(
-                ["--download-only"],
-                is_flag=True,
-                default=False,
-                help="Download any files that are not already present in either --download-dir or "
-                "the default download ~/.cache/... location in case --download-dir "
-                "is not specified and "
-                "skip processing them through unstructured.",
-            ),
-            click.Option(
-                ["--max-docs"],
-                default=None,
-                type=int,
-                help="If specified, process at most the specified number of documents.",
-            ),
-            click.Option(
-                ["--uncompress"],
-                type=bool,
-                default=False,
-                is_flag=True,
-                help="Uncompress any archived files. Currently supporting zip and tar "
-                "files based on file extension.",
-            ),
-            click.Option(["--verbose"], is_flag=True, default=False),
-            click.Option(["--tqdm"], is_flag=True, default=False, help="Show progress bar"),
-        ]
-        return options
diff --git a/unstructured/ingest/v2/cli/interfaces.py b/unstructured/ingest/v2/cli/interfaces.py
deleted file mode 100644
index 2a8a0e18b..000000000
--- a/unstructured/ingest/v2/cli/interfaces.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from abc import ABC, abstractmethod
-
-import click
-
-
-class CliConfig(ABC):
-    @staticmethod
-    @abstractmethod
-    def get_cli_options() -> list[click.Option]:
-        pass
-
-    @classmethod
-    def add_cli_options(cls, cmd: click.Command) -> None:
-        options_to_add = cls.get_cli_options()
-        CliConfig.add_params(cmd, params=options_to_add)
-
-    @staticmethod
-    def add_params(cmd: click.Command, params: list[click.Parameter]):
-        existing_opts = []
-        for param in cmd.params:
-            existing_opts.extend(param.opts)
-        for param in params:
-            for opt in param.opts:
-                if opt in existing_opts:
-                    raise ValueError(f"{opt} is already defined on the command {cmd.name}")
-                existing_opts.append(opt)
-                cmd.params.append(param)
diff --git a/unstructured/ingest/v2/cli/utils.py b/unstructured/ingest/v2/cli/utils.py
deleted file mode 100644
index 66d414f61..000000000
--- a/unstructured/ingest/v2/cli/utils.py
+++ /dev/null
@@ -1,240 +0,0 @@
-import json
-import os.path
-import sys
-from dataclasses import fields, is_dataclass
-from gettext import gettext, ngettext
-from gettext import gettext as _
-from pathlib import Path
-from typing import Any, ForwardRef, Optional, Type, TypeVar, Union, get_args, get_origin
-
-import click
-
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
-from unstructured.ingest.v2.logger import logger
-
-
-def conform_click_options(options: dict[str, Any]) -> None:
-    # Click sets all multiple fields as tuple, this needs to be updated to list
-    for k, v in options.items():
-        if isinstance(v, tuple):
-            options[k] = list(v)
-
-
-class Dict(click.ParamType):
-    name = "dict"
-
-    def convert(
-        self,
-        value: Any,
-        param: Optional[click.Parameter] = None,
-        ctx: Optional[click.Context] = None,
-    ) -> Any:
-        try:
-            return json.loads(value)
-        except json.JSONDecodeError:
-            self.fail(
-                gettext(
-                    "{value} is not a valid json value.",
-                ).format(value=value),
-                param,
-                ctx,
-            )
-
-
-class FileOrJson(click.ParamType):
-    name = "file-or-json"
-
-    def __init__(self, allow_raw_str: bool = False):
-        self.allow_raw_str = allow_raw_str
-
-    def convert(
-        self,
-        value: Any,
-        param: Optional[click.Parameter] = None,
-        ctx: Optional[click.Context] = None,
-    ) -> Any:
-        # check if valid file
-        full_path = os.path.abspath(os.path.expanduser(value))
-        if os.path.isfile(full_path):
-            return str(Path(full_path).resolve())
-        if isinstance(value, str):
-            try:
-                return json.loads(value)
-            except json.JSONDecodeError:
-                if self.allow_raw_str:
-                    return value
-        self.fail(
-            gettext(
-                "{value} is not a valid json string nor an existing filepath.",
-            ).format(value=value),
-            param,
-            ctx,
-        )
-
-
-class DelimitedString(click.ParamType):
-    name = "delimited-string"
-
-    def __init__(self, delimiter: str = ",", choices: Optional[list[str]] = None):
-        self.choices = choices if choices else []
-        self.delimiter = delimiter
-
-    def convert(
-        self,
-        value: Any,
-        param: Optional[click.Parameter] = None,
-        ctx: Optional[click.Context] = None,
-    ) -> Any:
-        # In case a list is provided as the default, will not break
-        if isinstance(value, list):
-            split = [str(v).strip() for v in value]
-        else:
-            split = [v.strip() for v in value.split(self.delimiter)]
-        if not self.choices:
-            return split
-        choices_str = ", ".join(map(repr, self.choices))
-        for s in split:
-            if s not in self.choices:
-                self.fail(
-                    ngettext(
-                        "{value!r} is not {choice}.",
-                        "{value!r} is not one of {choices}.",
-                        len(self.choices),
-                    ).format(value=s, choice=choices_str, choices=choices_str),
-                    param,
-                    ctx,
-                )
-        return split
-
-
-EnhancedDataClassJsonMixinT = TypeVar(
-    "EnhancedDataClassJsonMixinT", bound=EnhancedDataClassJsonMixin
-)
-
-
-def extract_config(
-    flat_data: dict, config: Type[EnhancedDataClassJsonMixinT]
-) -> EnhancedDataClassJsonMixinT:
-    """
-    To be able to extract a nested dataclass from a flat dictionary (as in one coming
-    from a click-based options input), the config class is dynamically looked through for
-    nested dataclass fields and new nested dictionaries are created to conform to the
-    shape the overall class expects when parsing from a dict. During the process, this will create
-    copies of the original dictionary to avoid pruning fields but this isn't a
-    problem since the `from_dict()` method ignores unneeded values.
-
-    Not handling more complex edge cases for now such as nested types i.e Union[List[List[...]]]
-    """
-
-    def conform_dict(inner_d: dict, inner_config: Type[EnhancedDataClassJsonMixinT]):
-        # Catch edge cases (i.e. Dict[str, ...]) where underlying type is not a concrete Class,
-        # causing 'issubclass() arg 1 must be a class' errors, return False
-        def is_subclass(instance, class_type) -> bool:
-            try:
-                return issubclass(instance, class_type)
-            except Exception:
-                return False
-
-        dd = inner_d.copy()
-        for field in fields(inner_config):
-            f_type = field.type
-            # typing can be defined using a string, in which case it needs to be resolved
-            # to the actual type. following logic is cherry picked from the typing
-            # get_type_hints() since type resolution can be expensive, only do it
-            # when the type is a string
-            if isinstance(f_type, str):
-                try:
-                    base_globals = sys.modules[inner_config.__module__].__dict__
-                    for_ref = ForwardRef(f_type, is_argument=False, is_class=True)
-                    f_type = for_ref._evaluate(
-                        globalns=base_globals, localns=None, recursive_guard=frozenset()
-                    )
-                except NameError as e:
-                    logger.warning(f"couldn't resolve type {f_type}: {e}")
-            # Handle the case where the type of a value if a Union (possibly optional)
-            if get_origin(f_type) is Union:
-                union_values = get_args(f_type)
-                # handle List types
-                union_values = [
-                    get_args(u)[0] if get_origin(u) is list else u for u in union_values
-                ]
-                # Ignore injected NoneType when optional
-                concrete_union_values = [v for v in union_values if not is_subclass(v, type(None))]
-                dataclass_union_values = [v for v in concrete_union_values if is_dataclass(v)]
-                non_dataclass_union_values = [
-                    v for v in concrete_union_values if not is_dataclass(v)
-                ]
-                if not dataclass_union_values:
-                    continue
-                # Check if the key for this field already exists in the dictionary,
-                # if so it might map to one of these non dataclass fields and this
-                # can't be enforced
-                if non_dataclass_union_values and field.name in dd:
-                    continue
-                if len(dataclass_union_values) > 1:
-                    logger.warning(
-                        "more than one dataclass type possible for field {}, "
-                        "not extracting: {}".format(field.name, ", ".join(dataclass_union_values))
-                    )
-                    continue
-                f_type = dataclass_union_values[0]
-            origin = get_origin(f_type)
-            if origin:
-                f_type = origin
-            if is_subclass(f_type, EnhancedDataClassJsonMixin):
-                dd[field.name] = conform_dict(inner_d=dd, inner_config=f_type)
-        return dd
-
-    adjusted_dict = conform_dict(inner_d=flat_data, inner_config=config)
-    return config.from_dict(adjusted_dict, apply_name_overload=False)
-
-
-class Group(click.Group):
-    def parse_args(self, ctx, args):
-        """
-        This allows for subcommands to be called with the --help flag without breaking
-        if parent command is missing any of its required parameters
-        """
-
-        try:
-            return super().parse_args(ctx, args)
-        except click.MissingParameter:
-            if "--help" not in args:
-                raise
-
-            # remove the required params so that help can display
-            for param in self.params:
-                param.required = False
-            return super().parse_args(ctx, args)
-
-    def format_commands(self, ctx: click.Context, formatter: click.HelpFormatter) -> None:
-        """
-        Copy of the original click.Group format_commands() method but replacing
-        'Commands' -> 'Destinations'
-        """
-        commands = []
-        for subcommand in self.list_commands(ctx):
-            cmd = self.get_command(ctx, subcommand)
-            # What is this, the tool lied about a command.  Ignore it
-            if cmd is None:
-                continue
-            if cmd.hidden:
-                continue
-
-            commands.append((subcommand, cmd))
-
-        # allow for 3 times the default spacing
-        if len(commands):
-            if formatter.width:
-                limit = formatter.width - 6 - max(len(cmd[0]) for cmd in commands)
-            else:
-                limit = -6 - max(len(cmd[0]) for cmd in commands)
-
-            rows = []
-            for subcommand, cmd in commands:
-                help = cmd.get_short_help_str(limit)
-                rows.append((subcommand, help))
-
-            if rows:
-                with formatter.section(_("Destinations")):
-                    formatter.write_dl(rows)
diff --git a/unstructured/ingest/v2/example.py b/unstructured/ingest/v2/example.py
deleted file mode 100644
index c4545f926..000000000
--- a/unstructured/ingest/v2/example.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from pathlib import Path
-
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.pipeline import Pipeline
-from unstructured.ingest.v2.processes.chunker import ChunkerConfig
-from unstructured.ingest.v2.processes.connectors.fsspec.s3 import (
-    S3ConnectionConfig,
-    S3DownloaderConfig,
-    S3IndexerConfig,
-)
-from unstructured.ingest.v2.processes.connectors.local import (
-    LocalUploaderConfig,
-)
-from unstructured.ingest.v2.processes.embedder import EmbedderConfig
-from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
-
-base_path = Path(__file__).parent.parent.parent.parent
-docs_path = base_path / "example-docs"
-work_dir = base_path / "tmp_ingest"
-output_path = work_dir / "output"
-download_path = work_dir / "download"
-
-if __name__ == "__main__":
-    logger.info(f"Writing all content in: {work_dir.resolve()}")
-    Pipeline.from_configs(
-        context=ProcessorConfig(
-            work_dir=str(work_dir.resolve()), tqdm=True, reprocess=True, verbose=True
-        ),
-        indexer_config=S3IndexerConfig(remote_url="s3://utic-dev-tech-fixtures/small-pdf-set/"),
-        downloader_config=S3DownloaderConfig(download_dir=download_path),
-        source_connection_config=S3ConnectionConfig(anonymous=True),
-        partitioner_config=PartitionerConfig(strategy="fast"),
-        chunker_config=ChunkerConfig(chunking_strategy="by_title"),
-        embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"),
-        uploader_config=LocalUploaderConfig(output_dir=str(output_path.resolve())),
-    ).run()
diff --git a/unstructured/ingest/v2/examples/example_azure_cognitive_search.py b/unstructured/ingest/v2/examples/example_azure_cognitive_search.py
deleted file mode 100644
index f3679ad1b..000000000
--- a/unstructured/ingest/v2/examples/example_azure_cognitive_search.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import os
-from pathlib import Path
-
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.pipeline import Pipeline
-from unstructured.ingest.v2.processes.chunker import ChunkerConfig
-from unstructured.ingest.v2.processes.connectors.azure_cognitive_search import (
-    AzureCognitiveSearchAccessConfig,
-    AzureCognitiveSearchConnectionConfig,
-    AzureCognitiveSearchUploaderConfig,
-    AzureCognitiveSearchUploadStagerConfig,
-)
-from unstructured.ingest.v2.processes.connectors.local import (
-    LocalConnectionConfig,
-    LocalDownloaderConfig,
-    LocalIndexerConfig,
-)
-from unstructured.ingest.v2.processes.embedder import EmbedderConfig
-from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
-
-base_path = Path(__file__).parent.parent.parent.parent.parent
-docs_path = base_path / "example-docs"
-work_dir = base_path / "tmp_ingest"
-output_path = work_dir / "output"
-download_path = work_dir / "download"
-
-if __name__ == "__main__":
-    logger.info(f"Writing all content in: {work_dir.resolve()}")
-    index_name = "ingest-test-destination"
-    Pipeline.from_configs(
-        context=ProcessorConfig(work_dir=str(work_dir.resolve())),
-        indexer_config=LocalIndexerConfig(
-            input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt"
-        ),
-        downloader_config=LocalDownloaderConfig(download_dir=download_path),
-        source_connection_config=LocalConnectionConfig(),
-        partitioner_config=PartitionerConfig(strategy="fast"),
-        chunker_config=ChunkerConfig(
-            chunking_strategy="by_title", chunk_include_orig_elements=False
-        ),
-        embedder_config=EmbedderConfig(
-            embedding_provider="langchain-openai", embedding_api_key=os.getenv("OPENAI_API_KEY")
-        ),
-        destination_connection_config=AzureCognitiveSearchConnectionConfig(
-            access_config=AzureCognitiveSearchAccessConfig(key=os.getenv("AZURE_SEARCH_API_KEY")),
-            index=os.getenv("AZURE_SEARCH_INDEX"),
-            endpoint=os.getenv("AZURE_SEARCH_ENDPOINT"),
-        ),
-        uploader_config=AzureCognitiveSearchUploaderConfig(batch_size=10),
-        stager_config=AzureCognitiveSearchUploadStagerConfig(),
-    ).run()
diff --git a/unstructured/ingest/v2/examples/example_chroma.py b/unstructured/ingest/v2/examples/example_chroma.py
deleted file mode 100644
index f5773c4d8..000000000
--- a/unstructured/ingest/v2/examples/example_chroma.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import random
-from pathlib import Path
-
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.pipeline import Pipeline
-from unstructured.ingest.v2.processes.chunker import ChunkerConfig
-from unstructured.ingest.v2.processes.connectors.chroma import (
-    ChromaAccessConfig,
-    ChromaConnectionConfig,
-    ChromaUploaderConfig,
-    ChromaUploadStagerConfig,
-)
-from unstructured.ingest.v2.processes.connectors.local import (
-    LocalConnectionConfig,
-    LocalDownloaderConfig,
-    LocalIndexerConfig,
-)
-from unstructured.ingest.v2.processes.embedder import EmbedderConfig
-from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
-
-base_path = Path(__file__).parent.parent.parent.parent.parent
-docs_path = base_path / "example-docs"
-work_dir = base_path / "tmp_ingest"
-output_path = work_dir / "output"
-download_path = work_dir / "download"
-
-if __name__ == "__main__":
-    logger.info(f"Writing all content in: {work_dir.resolve()}")
-    Pipeline.from_configs(
-        context=ProcessorConfig(work_dir=str(work_dir.resolve())),
-        indexer_config=LocalIndexerConfig(input_path=str(docs_path.resolve()) + "/multisimple/"),
-        downloader_config=LocalDownloaderConfig(download_dir=download_path),
-        source_connection_config=LocalConnectionConfig(),
-        partitioner_config=PartitionerConfig(strategy="fast"),
-        chunker_config=ChunkerConfig(
-            chunking_strategy="by_title",
-            chunk_include_orig_elements=False,
-            chunk_max_characters=1500,
-            chunk_multipage_sections=True,
-        ),
-        embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"),
-        destination_connection_config=ChromaConnectionConfig(
-            access_config=ChromaAccessConfig(settings=None, headers=None),
-            host="localhost",
-            port=8047,
-            collection_name=f"test-collection-{random.randint(1000,9999)}",
-            tenant="default_tenant",
-            database="default_database",
-        ),
-        stager_config=ChromaUploadStagerConfig(),
-        uploader_config=ChromaUploaderConfig(batch_size=10),
-    ).run()
diff --git a/unstructured/ingest/v2/examples/example_databricks_volumes.py b/unstructured/ingest/v2/examples/example_databricks_volumes.py
deleted file mode 100644
index ecc8b6301..000000000
--- a/unstructured/ingest/v2/examples/example_databricks_volumes.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import os
-from pathlib import Path
-
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.pipeline import Pipeline
-from unstructured.ingest.v2.processes.chunker import ChunkerConfig
-from unstructured.ingest.v2.processes.connectors.databricks_volumes import (
-    DatabricksVolumesAccessConfig,
-    DatabricksVolumesConnectionConfig,
-    DatabricksVolumesUploaderConfig,
-)
-from unstructured.ingest.v2.processes.connectors.local import (
-    LocalConnectionConfig,
-    LocalDownloaderConfig,
-    LocalIndexerConfig,
-)
-from unstructured.ingest.v2.processes.embedder import EmbedderConfig
-from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
-
-base_path = Path(__file__).parent.parent.parent.parent.parent
-docs_path = base_path / "example-docs"
-work_dir = base_path / "tmp_ingest"
-output_path = work_dir / "output"
-download_path = work_dir / "download"
-
-if __name__ == "__main__":
-    logger.info(f"Writing all content in: {work_dir.resolve()}")
-    Pipeline.from_configs(
-        context=ProcessorConfig(work_dir=str(work_dir.resolve())),
-        indexer_config=LocalIndexerConfig(input_path=str(docs_path.resolve()) + "/multisimple/"),
-        downloader_config=LocalDownloaderConfig(download_dir=download_path),
-        source_connection_config=LocalConnectionConfig(),
-        partitioner_config=PartitionerConfig(strategy="fast"),
-        chunker_config=ChunkerConfig(
-            chunking_strategy="by_title",
-            chunk_include_orig_elements=False,
-            chunk_max_characters=1500,
-            chunk_multipage_sections=True,
-        ),
-        embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"),
-        destination_connection_config=DatabricksVolumesConnectionConfig(
-            access_config=DatabricksVolumesAccessConfig(
-                username=os.environ["DATABRICKS_USERNAME"],
-                password=os.environ["DATABRICKS_PASSWORD"],
-            ),
-            host=os.environ["DATABRICKS_HOST"],
-        ),
-        uploader_config=DatabricksVolumesUploaderConfig(
-            catalog=os.environ["DATABRICKS_CATALOG"],
-            volume=os.environ["DATABRICKS_VOLUME"],
-            volume_path=os.environ["DATABRICKS_VOLUME_PATH"],
-        ),
-    ).run()
diff --git a/unstructured/ingest/v2/examples/example_elasticsearch.py b/unstructured/ingest/v2/examples/example_elasticsearch.py
deleted file mode 100644
index 96cdeef24..000000000
--- a/unstructured/ingest/v2/examples/example_elasticsearch.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import os
-from pathlib import Path
-
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.pipeline import Pipeline
-from unstructured.ingest.v2.processes.chunker import ChunkerConfig
-from unstructured.ingest.v2.processes.connectors.elasticsearch import (
-    ElasticsearchAccessConfig,
-    ElasticsearchConnectionConfig,
-    ElasticsearchUploaderConfig,
-    ElasticsearchUploadStagerConfig,
-)
-from unstructured.ingest.v2.processes.connectors.local import (
-    LocalConnectionConfig,
-    LocalDownloaderConfig,
-    LocalIndexerConfig,
-)
-from unstructured.ingest.v2.processes.embedder import EmbedderConfig
-from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
-
-base_path = Path(__file__).parent.parent.parent.parent.parent
-docs_path = base_path / "example-docs"
-work_dir = base_path / "tmp_ingest"
-output_path = work_dir / "output"
-download_path = work_dir / "download"
-
-if __name__ == "__main__":
-    logger.info(f"Writing all content in: {work_dir.resolve()}")
-    index_name = "ingest-test-destination"
-    Pipeline.from_configs(
-        context=ProcessorConfig(work_dir=str(work_dir.resolve())),
-        indexer_config=LocalIndexerConfig(
-            input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt"
-        ),
-        downloader_config=LocalDownloaderConfig(download_dir=download_path),
-        source_connection_config=LocalConnectionConfig(),
-        partitioner_config=PartitionerConfig(strategy="fast"),
-        chunker_config=ChunkerConfig(chunking_strategy="by_title"),
-        embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"),
-        destination_connection_config=ElasticsearchConnectionConfig(
-            access_config=ElasticsearchAccessConfig(password=os.getenv("ELASTIC_PASSWORD")),
-            username=os.getenv("ELASTIC_USERNAME"),
-            hosts=["http://localhost:9200"],
-        ),
-        uploader_config=ElasticsearchUploaderConfig(index_name=index_name),
-        stager_config=ElasticsearchUploadStagerConfig(index_name=index_name),
-    ).run()
diff --git a/unstructured/ingest/v2/examples/example_local.py b/unstructured/ingest/v2/examples/example_local.py
deleted file mode 100644
index f72334e40..000000000
--- a/unstructured/ingest/v2/examples/example_local.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from pathlib import Path
-
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.pipeline import Pipeline
-from unstructured.ingest.v2.processes.chunker import ChunkerConfig
-from unstructured.ingest.v2.processes.connectors.local import (
-    LocalConnectionConfig,
-    LocalDownloaderConfig,
-    LocalIndexerConfig,
-    LocalUploaderConfig,
-)
-from unstructured.ingest.v2.processes.embedder import EmbedderConfig
-from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
-
-base_path = Path(__file__).parent.parent.parent.parent.parent
-docs_path = base_path / "example-docs"
-work_dir = base_path / "tmp_ingest"
-output_path = work_dir / "output"
-download_path = work_dir / "download"
-
-if __name__ == "__main__":
-    logger.info(f"Writing all content in: {work_dir.resolve()}")
-    Pipeline.from_configs(
-        context=ProcessorConfig(work_dir=str(work_dir.resolve())),
-        indexer_config=LocalIndexerConfig(
-            input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt"
-        ),
-        downloader_config=LocalDownloaderConfig(download_dir=download_path),
-        source_connection_config=LocalConnectionConfig(),
-        partitioner_config=PartitionerConfig(strategy="fast"),
-        chunker_config=ChunkerConfig(chunking_strategy="by_title"),
-        embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"),
-        uploader_config=LocalUploaderConfig(output_dir=str(output_path.resolve())),
-    ).run()
diff --git a/unstructured/ingest/v2/examples/example_mongodb.py b/unstructured/ingest/v2/examples/example_mongodb.py
deleted file mode 100644
index 4ef562ae6..000000000
--- a/unstructured/ingest/v2/examples/example_mongodb.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import random
-from pathlib import Path
-
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.pipeline import Pipeline
-from unstructured.ingest.v2.processes.chunker import ChunkerConfig
-from unstructured.ingest.v2.processes.connectors.local import (
-    LocalConnectionConfig,
-    LocalDownloaderConfig,
-    LocalIndexerConfig,
-)
-from unstructured.ingest.v2.processes.connectors.mongodb import (
-    MongoDBAccessConfig,
-    MongoDBConnectionConfig,
-    MongoDBUploaderConfig,
-    MongoDBUploadStagerConfig,
-)
-from unstructured.ingest.v2.processes.embedder import EmbedderConfig
-from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
-
-base_path = Path(__file__).parent.parent.parent.parent.parent
-docs_path = base_path / "example-docs"
-work_dir = base_path / "tmp_ingest"
-output_path = work_dir / "output"
-download_path = work_dir / "download"
-
-if __name__ == "__main__":
-    logger.info(f"Writing all content in: {work_dir.resolve()}")
-    Pipeline.from_configs(
-        context=ProcessorConfig(work_dir=str(work_dir.resolve())),
-        indexer_config=LocalIndexerConfig(input_path=str(docs_path.resolve()) + "/multisimple/"),
-        downloader_config=LocalDownloaderConfig(download_dir=download_path),
-        source_connection_config=LocalConnectionConfig(),
-        partitioner_config=PartitionerConfig(strategy="fast"),
-        chunker_config=ChunkerConfig(
-            chunking_strategy="by_title",
-            chunk_include_orig_elements=False,
-            chunk_max_characters=1500,
-            chunk_multipage_sections=True,
-        ),
-        embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"),
-        destination_connection_config=MongoDBConnectionConfig(
-            access_config=MongoDBAccessConfig(uri=None),
-            host="localhost",
-            port=27017,
-            collection=f"test-collection-{random.randint(1000,9999)}",
-            database="testDatabase",
-        ),
-        stager_config=MongoDBUploadStagerConfig(),
-        uploader_config=MongoDBUploaderConfig(batch_size=10),
-    ).run()
diff --git a/unstructured/ingest/v2/examples/example_opensearch.py b/unstructured/ingest/v2/examples/example_opensearch.py
deleted file mode 100644
index a5f654cfe..000000000
--- a/unstructured/ingest/v2/examples/example_opensearch.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from pathlib import Path
-
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.pipeline import Pipeline
-from unstructured.ingest.v2.processes.chunker import ChunkerConfig
-from unstructured.ingest.v2.processes.connectors.local import (
-    LocalConnectionConfig,
-    LocalDownloaderConfig,
-    LocalIndexerConfig,
-)
-from unstructured.ingest.v2.processes.connectors.opensearch import (
-    OpenSearchAccessConfig,
-    OpenSearchConnectionConfig,
-    OpenSearchUploaderConfig,
-    OpenSearchUploadStagerConfig,
-)
-from unstructured.ingest.v2.processes.embedder import EmbedderConfig
-from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
-
-base_path = Path(__file__).parent.parent.parent.parent.parent
-docs_path = base_path / "example-docs"
-work_dir = base_path / "tmp_ingest"
-output_path = work_dir / "output"
-download_path = work_dir / "download"
-
-if __name__ == "__main__":
-    logger.info(f"Writing all content in: {work_dir.resolve()}")
-    Pipeline.from_configs(
-        context=ProcessorConfig(work_dir=str(work_dir.resolve())),
-        indexer_config=LocalIndexerConfig(
-            input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt"
-        ),
-        downloader_config=LocalDownloaderConfig(download_dir=download_path),
-        source_connection_config=LocalConnectionConfig(),
-        partitioner_config=PartitionerConfig(strategy="fast"),
-        chunker_config=ChunkerConfig(chunking_strategy="by_title"),
-        embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"),
-        destination_connection_config=OpenSearchConnectionConfig(
-            hosts="http://localhost:9247",
-            username="admin",
-            access_config=OpenSearchAccessConfig(
-                password="admin",
-                use_ssl=True,
-            ),
-        ),
-        stager_config=OpenSearchUploadStagerConfig(index_name="ingest-test-destination"),
-        uploader_config=OpenSearchUploaderConfig(
-            index_name="ingest-test-destination", batch_size_bytes=150
-        ),
-    ).run()
diff --git a/unstructured/ingest/v2/examples/example_pinecone.py b/unstructured/ingest/v2/examples/example_pinecone.py
deleted file mode 100644
index 236a64df2..000000000
--- a/unstructured/ingest/v2/examples/example_pinecone.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import os
-from pathlib import Path
-
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.pipeline import Pipeline
-from unstructured.ingest.v2.processes.chunker import ChunkerConfig
-from unstructured.ingest.v2.processes.connectors.local import (
-    LocalConnectionConfig,
-    LocalDownloaderConfig,
-    LocalIndexerConfig,
-)
-from unstructured.ingest.v2.processes.connectors.pinecone import (
-    PineconeAccessConfig,
-    PineconeConnectionConfig,
-    PineconeUploaderConfig,
-    PineconeUploadStagerConfig,
-)
-from unstructured.ingest.v2.processes.embedder import EmbedderConfig
-from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
-
-base_path = Path(__file__).parent.parent.parent.parent.parent
-docs_path = base_path / "example-docs"
-work_dir = base_path / "tmp_ingest"
-output_path = work_dir / "output"
-download_path = work_dir / "download"
-
-if __name__ == "__main__":
-    logger.info(f"Writing all content in: {work_dir.resolve()}")
-    Pipeline.from_configs(
-        context=ProcessorConfig(work_dir=str(work_dir.resolve())),
-        indexer_config=LocalIndexerConfig(
-            input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt"
-        ),
-        downloader_config=LocalDownloaderConfig(download_dir=download_path),
-        source_connection_config=LocalConnectionConfig(),
-        partitioner_config=PartitionerConfig(strategy="fast"),
-        chunker_config=ChunkerConfig(chunking_strategy="by_title"),
-        embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"),
-        destination_connection_config=PineconeConnectionConfig(
-            # You'll need to set PINECONE_API_KEY environment variable to run this example
-            access_config=PineconeAccessConfig(api_key=os.getenv("PINECONE_API_KEY")),
-            index_name=os.getenv(
-                "PINECONE_INDEX",
-                default="your index name here. e.g. my-index,"
-                "or define in environment variable PINECONE_INDEX",
-            ),
-            environment=os.getenv(
-                "PINECONE_ENVIRONMENT",
-                default="your environment name here. e.g. us-east-1,"
-                "or define in environment variable PINECONE_ENVIRONMENT",
-            ),
-        ),
-        stager_config=PineconeUploadStagerConfig(),
-        uploader_config=PineconeUploaderConfig(batch_size=10, num_of_processes=2),
-    ).run()
diff --git a/unstructured/ingest/v2/examples/example_s3.py b/unstructured/ingest/v2/examples/example_s3.py
deleted file mode 100644
index 2910f526d..000000000
--- a/unstructured/ingest/v2/examples/example_s3.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from pathlib import Path
-
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.pipeline import Pipeline
-from unstructured.ingest.v2.processes.chunker import ChunkerConfig
-from unstructured.ingest.v2.processes.connectors.fsspec.s3 import (
-    S3ConnectionConfig,
-    S3DownloaderConfig,
-    S3IndexerConfig,
-)
-from unstructured.ingest.v2.processes.connectors.local import (
-    LocalUploaderConfig,
-)
-from unstructured.ingest.v2.processes.embedder import EmbedderConfig
-from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
-
-base_path = Path(__file__).parent.parent.parent.parent.parent
-docs_path = base_path / "example-docs"
-work_dir = base_path / "tmp_ingest"
-output_path = work_dir / "output"
-download_path = work_dir / "download"
-
-if __name__ == "__main__":
-    logger.info(f"Writing all content in: {work_dir.resolve()}")
-    Pipeline.from_configs(
-        context=ProcessorConfig(work_dir=str(work_dir.resolve())),
-        indexer_config=S3IndexerConfig(remote_url="s3://utic-dev-tech-fixtures/small-pdf-set/"),
-        downloader_config=S3DownloaderConfig(download_dir=download_path),
-        source_connection_config=S3ConnectionConfig(anonymous=True),
-        partitioner_config=PartitionerConfig(strategy="fast"),
-        chunker_config=ChunkerConfig(chunking_strategy="by_title"),
-        embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"),
-        uploader_config=LocalUploaderConfig(output_dir=str(output_path.resolve())),
-    ).run()
diff --git a/unstructured/ingest/v2/examples/example_salesforce.py b/unstructured/ingest/v2/examples/example_salesforce.py
deleted file mode 100644
index b3439d5aa..000000000
--- a/unstructured/ingest/v2/examples/example_salesforce.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import os
-from pathlib import Path
-
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.pipeline import Pipeline
-from unstructured.ingest.v2.processes.chunker import ChunkerConfig
-from unstructured.ingest.v2.processes.connectors.local import (
-    LocalUploaderConfig,
-)
-from unstructured.ingest.v2.processes.connectors.salesforce import (
-    SalesforceAccessConfig,
-    SalesforceConnectionConfig,
-    SalesforceDownloaderConfig,
-    SalesforceIndexerConfig,
-)
-from unstructured.ingest.v2.processes.embedder import EmbedderConfig
-from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
-
-base_path = Path(__file__).parent.parent.parent.parent.parent
-docs_path = base_path / "example-docs"
-work_dir = base_path / "tmp_ingest"
-output_path = work_dir / "output"
-download_path = work_dir / "download"
-
-if __name__ == "__main__":
-    logger.info(f"Writing all content in: {work_dir.resolve()}")
-    Pipeline.from_configs(
-        context=ProcessorConfig(work_dir=str(work_dir.resolve())),
-        indexer_config=SalesforceIndexerConfig(categories=["Campaign", "EmailMessage"]),
-        downloader_config=SalesforceDownloaderConfig(download_dir=download_path),
-        source_connection_config=SalesforceConnectionConfig(
-            SalesforceAccessConfig(
-                consumer_key=os.getenv("SALESFORCE_CONSUMER_KEY"),
-                private_key=os.getenv("SALESFORCE_PRIVATE_KEY"),
-            ),
-            username=os.getenv("SALESFORCE_USERNAME"),
-        ),
-        partitioner_config=PartitionerConfig(strategy="fast"),
-        chunker_config=ChunkerConfig(chunking_strategy="by_title"),
-        embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"),
-        uploader_config=LocalUploaderConfig(output_dir=str(output_path.resolve())),
-    ).run()
diff --git a/unstructured/ingest/v2/examples/example_sharepoint.py b/unstructured/ingest/v2/examples/example_sharepoint.py
deleted file mode 100644
index bc9139efc..000000000
--- a/unstructured/ingest/v2/examples/example_sharepoint.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import os
-from pathlib import Path
-
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.pipeline import Pipeline
-from unstructured.ingest.v2.processes.connectors.local import (
-    LocalUploaderConfig,
-)
-from unstructured.ingest.v2.processes.connectors.sharepoint import (
-    SharepointAccessConfig,
-    SharepointConnectionConfig,
-    SharepointDownloaderConfig,
-    SharepointIndexerConfig,
-    SharepointPermissionsConfig,
-)
-from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
-
-base_path = Path(__file__).parent.parent.parent.parent.parent
-docs_path = base_path / "example-docs"
-work_dir = base_path / "tmp_ingest"
-output_path = work_dir / "output"
-download_path = work_dir / "download"
-
-
-if __name__ == "__main__":
-    logger.info(f"Writing all content in: {work_dir.resolve()}")
-    Pipeline.from_configs(
-        context=ProcessorConfig(work_dir=str(work_dir.resolve()), tqdm=True, verbose=True),
-        indexer_config=SharepointIndexerConfig(),
-        downloader_config=SharepointDownloaderConfig(download_dir=download_path),
-        source_connection_config=SharepointConnectionConfig(
-            client_id=os.getenv("SHAREPOINT_CLIENT_ID"),
-            site=os.getenv("SHAREPOINT_SITE"),
-            access_config=SharepointAccessConfig(client_cred=os.getenv("SHAREPOINT_CRED")),
-            permissions_config=SharepointPermissionsConfig(
-                permissions_application_id=os.getenv("SHAREPOINT_PERMISSIONS_APP_ID"),
-                permissions_client_cred=os.getenv("SHAREPOINT_PERMISSIONS_APP_CRED"),
-                permissions_tenant=os.getenv("SHAREPOINT_PERMISSIONS_TENANT"),
-            ),
-        ),
-        partitioner_config=PartitionerConfig(strategy="fast"),
-        # chunker_config=ChunkerConfig(chunking_strategy="by_title"),
-        # embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"),
-        uploader_config=LocalUploaderConfig(output_dir=str(output_path.resolve())),
-    ).run()
diff --git a/unstructured/ingest/v2/examples/example_singlestore.py b/unstructured/ingest/v2/examples/example_singlestore.py
deleted file mode 100644
index 47d4494a9..000000000
--- a/unstructured/ingest/v2/examples/example_singlestore.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from pathlib import Path
-
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.pipeline import Pipeline
-from unstructured.ingest.v2.processes.chunker import ChunkerConfig
-from unstructured.ingest.v2.processes.connectors.local import (
-    LocalConnectionConfig,
-    LocalDownloaderConfig,
-    LocalIndexerConfig,
-)
-from unstructured.ingest.v2.processes.connectors.singlestore import (
-    SingleStoreAccessConfig,
-    SingleStoreConnectionConfig,
-    SingleStoreUploaderConfig,
-    SingleStoreUploadStagerConfig,
-)
-from unstructured.ingest.v2.processes.embedder import EmbedderConfig
-from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
-
-base_path = Path(__file__).parent.parent.parent.parent.parent
-docs_path = base_path / "example-docs"
-work_dir = base_path / "tmp_ingest"
-output_path = work_dir / "output"
-download_path = work_dir / "download"
-
-if __name__ == "__main__":
-    logger.info(f"Writing all content in: {work_dir.resolve()}")
-    Pipeline.from_configs(
-        context=ProcessorConfig(work_dir=str(work_dir.resolve()), tqdm=True, verbose=True),
-        indexer_config=LocalIndexerConfig(
-            input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt"
-        ),
-        downloader_config=LocalDownloaderConfig(download_dir=download_path),
-        source_connection_config=LocalConnectionConfig(),
-        partitioner_config=PartitionerConfig(strategy="fast"),
-        chunker_config=ChunkerConfig(chunking_strategy="by_title"),
-        embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"),
-        destination_connection_config=SingleStoreConnectionConfig(
-            access_config=SingleStoreAccessConfig(password="password"),
-            host="localhost",
-            port=3306,
-            database="ingest_test",
-            user="root",
-        ),
-        stager_config=SingleStoreUploadStagerConfig(),
-        uploader_config=SingleStoreUploaderConfig(table_name="elements"),
-    ).run()
diff --git a/unstructured/ingest/v2/examples/example_sql.py b/unstructured/ingest/v2/examples/example_sql.py
deleted file mode 100644
index 4ed938192..000000000
--- a/unstructured/ingest/v2/examples/example_sql.py
+++ /dev/null
@@ -1,88 +0,0 @@
-import os
-import sqlite3
-from pathlib import Path
-
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.pipeline import Pipeline
-from unstructured.ingest.v2.processes.chunker import ChunkerConfig
-from unstructured.ingest.v2.processes.connectors.local import (
-    LocalConnectionConfig,
-    LocalDownloaderConfig,
-    LocalIndexerConfig,
-)
-from unstructured.ingest.v2.processes.connectors.sql import (
-    DatabaseType,
-    SimpleSqlConfig,
-    SQLAccessConfig,
-    SQLUploaderConfig,
-    SQLUploadStagerConfig,
-)
-from unstructured.ingest.v2.processes.embedder import EmbedderConfig
-from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
-
-base_path = Path(__file__).parent.parent.parent.parent.parent
-docs_path = base_path / "example-docs"
-work_dir = base_path / "tmp_ingest"
-output_path = work_dir / "output"
-download_path = work_dir / "download"
-
-SQLITE_DB = "test-sql-db.sqlite"
-
-if __name__ == "__main__":
-    logger.info(f"Writing all content in: {work_dir.resolve()}")
-
-    configs = {
-        "context": ProcessorConfig(work_dir=str(work_dir.resolve())),
-        "indexer_config": LocalIndexerConfig(input_path=str(docs_path.resolve()) + "/multisimple/"),
-        "downloader_config": LocalDownloaderConfig(download_dir=download_path),
-        "source_connection_config": LocalConnectionConfig(),
-        "partitioner_config": PartitionerConfig(strategy="fast"),
-        "chunker_config": ChunkerConfig(
-            chunking_strategy="by_title",
-            chunk_include_orig_elements=False,
-            chunk_max_characters=1500,
-            chunk_multipage_sections=True,
-        ),
-        "embedder_config": EmbedderConfig(embedding_provider="langchain-huggingface"),
-        "stager_config": SQLUploadStagerConfig(),
-        "uploader_config": SQLUploaderConfig(batch_size=10),
-    }
-
-    if os.path.exists(SQLITE_DB):
-        os.remove(SQLITE_DB)
-
-    connection = sqlite3.connect(database=SQLITE_DB)
-
-    query = None
-    script_path = (
-        Path(__file__).parent.parent.parent.parent.parent
-        / Path("scripts/sql-test-helpers/create-sqlite-schema.sql")
-    ).resolve()
-    with open(script_path) as f:
-        query = f.read()
-    cursor = connection.cursor()
-    cursor.executescript(query)
-    connection.close()
-
-    # sqlite test first
-    Pipeline.from_configs(
-        destination_connection_config=SimpleSqlConfig(
-            db_type=DatabaseType.SQLITE,
-            database=SQLITE_DB,
-            access_config=SQLAccessConfig(),
-        ),
-        **configs,
-    ).run()
-
-    # now, pg with pgvector
-    Pipeline.from_configs(
-        destination_connection_config=SimpleSqlConfig(
-            db_type=DatabaseType.POSTGRESQL,
-            database="elements",
-            host="localhost",
-            port=5433,
-            access_config=SQLAccessConfig(username="unstructured", password="test"),
-        ),
-        **configs,
-    ).run()
diff --git a/unstructured/ingest/v2/examples/example_weaviate.py b/unstructured/ingest/v2/examples/example_weaviate.py
deleted file mode 100644
index 5b9e739c5..000000000
--- a/unstructured/ingest/v2/examples/example_weaviate.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from pathlib import Path
-
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.pipeline import Pipeline
-from unstructured.ingest.v2.processes.chunker import ChunkerConfig
-from unstructured.ingest.v2.processes.connectors.local import (
-    LocalConnectionConfig,
-    LocalDownloaderConfig,
-    LocalIndexerConfig,
-)
-from unstructured.ingest.v2.processes.connectors.weaviate import (
-    WeaviateConnectionConfig,
-    WeaviateUploaderConfig,
-    WeaviateUploadStagerConfig,
-)
-from unstructured.ingest.v2.processes.embedder import EmbedderConfig
-from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
-
-base_path = Path(__file__).parent.parent.parent.parent.parent
-docs_path = base_path / "example-docs"
-work_dir = base_path / "tmp_ingest"
-output_path = work_dir / "output"
-download_path = work_dir / "download"
-
-if __name__ == "__main__":
-    logger.info(f"Writing all content in: {work_dir.resolve()}")
-    Pipeline.from_configs(
-        context=ProcessorConfig(work_dir=str(work_dir.resolve())),
-        indexer_config=LocalIndexerConfig(input_path=str(docs_path.resolve()) + "/multisimple/"),
-        downloader_config=LocalDownloaderConfig(download_dir=download_path),
-        source_connection_config=LocalConnectionConfig(),
-        partitioner_config=PartitionerConfig(strategy="fast"),
-        chunker_config=ChunkerConfig(chunking_strategy="by_title"),
-        embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"),
-        destination_connection_config=WeaviateConnectionConfig(
-            host_url="http://localhost:8080",
-            class_name="elements",
-            access_config=None,
-            anonymous=True,
-        ),
-        stager_config=WeaviateUploadStagerConfig(),
-        uploader_config=WeaviateUploaderConfig(batch_size=10),
-    ).run()
diff --git a/unstructured/ingest/v2/interfaces/__init__.py b/unstructured/ingest/v2/interfaces/__init__.py
deleted file mode 100644
index 5aa6240ab..000000000
--- a/unstructured/ingest/v2/interfaces/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from .connector import AccessConfig, BaseConnector, ConnectionConfig
-from .downloader import Downloader, DownloaderConfig, DownloadResponse, download_responses
-from .file_data import FileData, SourceIdentifiers
-from .indexer import Indexer, IndexerConfig
-from .process import BaseProcess
-from .processor import ProcessorConfig
-from .upload_stager import UploadStager, UploadStagerConfig
-from .uploader import UploadContent, Uploader, UploaderConfig
-
-__all__ = [
-    "DownloadResponse",
-    "download_responses",
-    "Downloader",
-    "DownloaderConfig",
-    "FileData",
-    "Indexer",
-    "IndexerConfig",
-    "BaseProcess",
-    "ProcessorConfig",
-    "UploadStager",
-    "UploadStagerConfig",
-    "Uploader",
-    "UploaderConfig",
-    "SourceIdentifiers",
-    "UploadContent",
-    "AccessConfig",
-    "ConnectionConfig",
-    "BaseConnector",
-]
diff --git a/unstructured/ingest/v2/interfaces/connector.py b/unstructured/ingest/v2/interfaces/connector.py
deleted file mode 100644
index dc700fc94..000000000
--- a/unstructured/ingest/v2/interfaces/connector.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from abc import ABC
-from dataclasses import dataclass
-from typing import Any, TypeVar
-
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
-
-
-@dataclass
-class AccessConfig(EnhancedDataClassJsonMixin):
-    """Meant to designate holding any sensitive information associated with other configs
-    and also for access specific configs."""
-
-
-AccessConfigT = TypeVar("AccessConfigT", bound=AccessConfig)
-
-
-@dataclass
-class ConnectionConfig(EnhancedDataClassJsonMixin):
-    access_config: AccessConfigT
-
-    def get_access_config(self) -> dict[str, Any]:
-        if not self.access_config:
-            return {}
-        return self.access_config.to_dict(apply_name_overload=False)
-
-
-ConnectionConfigT = TypeVar("ConnectionConfigT", bound=ConnectionConfig)
-
-
-@dataclass
-class BaseConnector(ABC):
-    connection_config: ConnectionConfigT
diff --git a/unstructured/ingest/v2/interfaces/downloader.py b/unstructured/ingest/v2/interfaces/downloader.py
deleted file mode 100644
index 3a493b017..000000000
--- a/unstructured/ingest/v2/interfaces/downloader.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import os
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Optional, TypedDict, TypeVar, Union
-
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
-from unstructured.ingest.v2.interfaces.connector import BaseConnector
-from unstructured.ingest.v2.interfaces.file_data import FileData
-from unstructured.ingest.v2.interfaces.process import BaseProcess
-
-
-@dataclass
-class DownloaderConfig(EnhancedDataClassJsonMixin):
-    download_dir: Optional[Path] = None
-
-
-DownloaderConfigT = TypeVar("DownloaderConfigT", bound=DownloaderConfig)
-
-
-class DownloadResponse(TypedDict):
-    file_data: FileData
-    path: Path
-
-
-download_responses = Union[list[DownloadResponse], DownloadResponse]
-
-
-class Downloader(BaseProcess, BaseConnector, ABC):
-    connector_type: str
-    download_config: DownloaderConfigT
-
-    @staticmethod
-    def is_float(value: str):
-        try:
-            float(value)
-            return True
-        except ValueError:
-            return False
-
-    def generate_download_response(
-        self, file_data: FileData, download_path: Path
-    ) -> DownloadResponse:
-        if (
-            file_data.metadata.date_modified
-            and self.is_float(file_data.metadata.date_modified)
-            and file_data.metadata.date_created
-            and self.is_float(file_data.metadata.date_created)
-        ):
-            date_modified = float(file_data.metadata.date_modified)
-            date_created = float(file_data.metadata.date_created)
-            os.utime(download_path, times=(date_created, date_modified))
-        return DownloadResponse(file_data=file_data, path=download_path)
-
-    @property
-    def download_dir(self) -> Path:
-        if self.download_config.download_dir is None:
-            self.download_config.download_dir = (
-                Path.home()
-                / ".cache"
-                / "unstructured"
-                / "ingest"
-                / "download"
-                / self.connector_type
-            ).resolve()
-        return self.download_config.download_dir
-
-    def is_async(self) -> bool:
-        return True
-
-    def get_download_path(self, file_data: FileData) -> Optional[Path]:
-        return None
-
-    @abstractmethod
-    def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
-        pass
-
-    async def run_async(self, file_data: FileData, **kwargs: Any) -> download_responses:
-        return self.run(file_data=file_data, **kwargs)
diff --git a/unstructured/ingest/v2/interfaces/file_data.py b/unstructured/ingest/v2/interfaces/file_data.py
deleted file mode 100644
index 9cccbaff0..000000000
--- a/unstructured/ingest/v2/interfaces/file_data.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import json
-from dataclasses import dataclass, field
-from enum import Enum
-from pathlib import Path
-from typing import Any, Optional
-
-from dataclasses_json import DataClassJsonMixin
-
-from unstructured.documents.elements import DataSourceMetadata
-
-
-class IndexDocType(str, Enum):
-    BATCH = "batch"
-    FILE = "file"
-
-
-@dataclass
-class SourceIdentifiers:
-    filename: str
-    fullpath: str
-    rel_path: Optional[str] = None
-
-    @property
-    def filename_stem(self) -> str:
-        return Path(self.filename).stem
-
-    @property
-    def relative_path(self) -> str:
-        return self.rel_path or self.fullpath
-
-
-@dataclass
-class FileData(DataClassJsonMixin):
-    identifier: str
-    connector_type: str
-    source_identifiers: Optional[SourceIdentifiers] = None
-    doc_type: IndexDocType = field(default=IndexDocType.FILE)
-    metadata: DataSourceMetadata = field(default_factory=DataSourceMetadata)
-    additional_metadata: dict[str, Any] = field(default_factory=dict)
-    reprocess: bool = False
-
-    @classmethod
-    def from_file(cls, path: str) -> "FileData":
-        path = Path(path).resolve()
-        if not path.exists() or not path.is_file():
-            raise ValueError(f"file path not valid: {path}")
-        with open(str(path.resolve()), "rb") as f:
-            file_data_dict = json.load(f)
-        file_data = FileData.from_dict(file_data_dict)
-        return file_data
-
-    def to_file(self, path: str) -> None:
-        path = Path(path).resolve()
-        path.parent.mkdir(parents=True, exist_ok=True)
-        with open(str(path.resolve()), "w") as f:
-            json.dump(self.to_dict(), f, indent=2)
diff --git a/unstructured/ingest/v2/interfaces/indexer.py b/unstructured/ingest/v2/interfaces/indexer.py
deleted file mode 100644
index f3f2490ef..000000000
--- a/unstructured/ingest/v2/interfaces/indexer.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-from typing import Any, Generator, Optional, TypeVar
-
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
-from unstructured.ingest.v2.interfaces.connector import BaseConnector
-from unstructured.ingest.v2.interfaces.file_data import FileData
-from unstructured.ingest.v2.interfaces.process import BaseProcess
-
-
-@dataclass
-class IndexerConfig(EnhancedDataClassJsonMixin):
-    pass
-
-
-IndexerConfigT = TypeVar("IndexerConfigT", bound=IndexerConfig)
-
-
-class Indexer(BaseProcess, BaseConnector, ABC):
-    connector_type: str
-    index_config: Optional[IndexerConfigT] = None
-
-    def is_async(self) -> bool:
-        return False
-
-    @abstractmethod
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        pass
diff --git a/unstructured/ingest/v2/interfaces/process.py b/unstructured/ingest/v2/interfaces/process.py
deleted file mode 100644
index 028356111..000000000
--- a/unstructured/ingest/v2/interfaces/process.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-from typing import Any
-
-
-@dataclass
-class BaseProcess(ABC):
-    def is_async(self) -> bool:
-        return False
-
-    @abstractmethod
-    def run(self, **kwargs: Any) -> Any:
-        pass
-
-    async def run_async(self, **kwargs: Any) -> Any:
-        return self.run(**kwargs)
-
-    def check_connection(self):
-        # If the process requires external connections, run a quick check
-        pass
diff --git a/unstructured/ingest/v2/interfaces/processor.py b/unstructured/ingest/v2/interfaces/processor.py
deleted file mode 100644
index 96390e53f..000000000
--- a/unstructured/ingest/v2/interfaces/processor.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import os
-from asyncio import Semaphore
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, Optional
-
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
-
-DEFAULT_WORK_DIR = str((Path.home() / ".cache" / "unstructured" / "ingest" / "pipeline").resolve())
-
-
-@dataclass
-class ProcessorConfig(EnhancedDataClassJsonMixin):
-    reprocess: bool = False
-    verbose: bool = False
-    tqdm: bool = False
-    work_dir: str = field(default_factory=lambda: DEFAULT_WORK_DIR)
-    num_processes: int = 2
-    max_connections: Optional[int] = None
-    raise_on_error: bool = False
-    disable_parallelism: bool = field(
-        default_factory=lambda: os.getenv("INGEST_DISABLE_PARALLELISM", "false").lower() == "true"
-    )
-    preserve_downloads: bool = False
-    download_only: bool = False
-    max_docs: Optional[int] = None
-    re_download: bool = False
-    uncompress: bool = False
-
-    # Used to keep track of state in pipeline
-    status: dict[str, Any] = field(default_factory=dict)
-    semaphore: Optional[Semaphore] = field(init=False, default=None)
-
-    def __post_init__(self):
-        if self.max_connections is not None:
-            self.semaphore = Semaphore(self.max_connections)
-
-    @property
-    def mp_supported(self) -> bool:
-        return not self.disable_parallelism and self.num_processes > 1
-
-    @property
-    def async_supported(self) -> bool:
-        if self.disable_parallelism:
-            return False
-        if self.max_connections is not None and isinstance(self.max_connections, int):
-            return self.max_connections > 1
-        return True
diff --git a/unstructured/ingest/v2/interfaces/upload_stager.py b/unstructured/ingest/v2/interfaces/upload_stager.py
deleted file mode 100644
index 2aeef2e5d..000000000
--- a/unstructured/ingest/v2/interfaces/upload_stager.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, TypeVar
-
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
-from unstructured.ingest.v2.interfaces.file_data import FileData
-from unstructured.ingest.v2.interfaces.process import BaseProcess
-
-
-@dataclass
-class UploadStagerConfig(EnhancedDataClassJsonMixin):
-    pass
-
-
-UploadStagerConfigT = TypeVar("UploadStagerConfigT", bound=UploadStagerConfig)
-
-
-@dataclass
-class UploadStager(BaseProcess, ABC):
-    upload_stager_config: UploadStagerConfigT
-
-    @abstractmethod
-    def run(
-        self,
-        elements_filepath: Path,
-        file_data: FileData,
-        output_dir: Path,
-        output_filename: str,
-        **kwargs: Any
-    ) -> Path:
-        pass
-
-    async def run_async(
-        self,
-        elements_filepath: Path,
-        file_data: FileData,
-        output_dir: Path,
-        output_filename: str,
-        **kwargs: Any
-    ) -> Path:
-        return self.run(
-            elements_filepath=elements_filepath,
-            output_dir=output_dir,
-            output_filename=output_filename,
-            file_data=file_data,
-            **kwargs
-        )
diff --git a/unstructured/ingest/v2/interfaces/uploader.py b/unstructured/ingest/v2/interfaces/uploader.py
deleted file mode 100644
index b8c282983..000000000
--- a/unstructured/ingest/v2/interfaces/uploader.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, TypeVar
-
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
-from unstructured.ingest.v2.interfaces.connector import BaseConnector
-from unstructured.ingest.v2.interfaces.file_data import FileData
-from unstructured.ingest.v2.interfaces.process import BaseProcess
-
-
-@dataclass
-class UploaderConfig(EnhancedDataClassJsonMixin):
-    pass
-
-
-UploaderConfigT = TypeVar("UploaderConfigT", bound=UploaderConfig)
-
-
-@dataclass
-class UploadContent:
-    path: Path
-    file_data: FileData
-
-
-@dataclass
-class Uploader(BaseProcess, BaseConnector, ABC):
-    upload_config: UploaderConfigT
-    connector_type: str
-
-    def is_async(self) -> bool:
-        return False
-
-    @abstractmethod
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-        pass
-
-    async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        return self.run(contents=[UploadContent(path=path, file_data=file_data)], **kwargs)
diff --git a/unstructured/ingest/v2/logger.py b/unstructured/ingest/v2/logger.py
deleted file mode 100644
index 34c5c1df3..000000000
--- a/unstructured/ingest/v2/logger.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import ast
-import json
-import os
-from logging import Formatter, Logger, StreamHandler, getLevelName, getLogger
-from typing import Any, Callable
-
-log_level = os.getenv("INGEST_LOG_LEVEL", "INFO")
-LOGGER_NAME = "unstructured.ingest.v2"
-
-
-def default_is_data_sensitive(k: str, v: Any) -> bool:
-    sensitive_fields = [
-        "account_name",
-        "client_id",
-    ]
-    sensitive_triggers = ["key", "cred", "token", "password", "oauth", "secret"]
-    return (
-        v
-        and any([s in k.lower() for s in sensitive_triggers])  # noqa: C419
-        or k.lower() in sensitive_fields
-    )
-
-
-def hide_sensitive_fields(
-    data: dict, is_sensitive_fn: Callable[[str, Any], bool] = default_is_data_sensitive
-) -> dict:
-    """
-    Will recursively look through every k, v pair in this dict and any nested ones and run
-    is_sensitive_fn to dynamically redact the value of the k, v pair. Will also check if
-    any string value can be parsed as valid json and process that dict as well and replace
-    the original string with the json.dumps() version of the redacted dict.
-    """
-    new_data = data.copy()
-    for k, v in new_data.items():
-        if is_sensitive_fn(k, v):
-            new_data[k] = "*******"
-        if isinstance(v, dict):
-            new_data[k] = hide_sensitive_fields(v)
-        if isinstance(v, str):
-            # Need to take into account strings generated via json.dumps() or simply printing a dict
-            try:
-                json_data = json.loads(v)
-                if isinstance(json_data, dict):
-                    updated_data = hide_sensitive_fields(json_data)
-                    new_data[k] = json.dumps(updated_data)
-            except json.JSONDecodeError:
-                pass
-
-    return new_data
-
-
-def redact_jsons(s: str) -> str:
-    """
-    Takes in a generic string and pulls out all valid json content. Leverages
-    hide_sensitive_fields() to redact any sensitive information and replaces the
-    original json with the new redacted format. There can be any number of valid
-    jsons in a generic string and this will work. Having extra '{' without a
-    closing '}' will cause this to break though. i.e '{ text, {"a": 3}'.
-
-    """
-    chars = list(s)
-    if "{" not in chars:
-        return s
-    i = 0
-    jsons = []
-    i = 0
-    while i < len(chars):
-        char = chars[i]
-        if char == "{":
-            stack = [char]
-            current = [char]
-            while len(stack) != 0 and i < len(chars):
-                i += 1
-                char = chars[i]
-                current.append(char)
-                if char == "{":
-                    stack.append(char)
-                if char == "}":
-                    stack.pop(-1)
-            jsons.append("".join(current))
-            continue
-        i += 1
-    for j in jsons:
-        try:
-            formatted_j = json.dumps(json.loads(j))
-        except json.JSONDecodeError:
-            lit = ast.literal_eval(j)
-            formatted_j = json.dumps(lit)
-        hidden_j = json.dumps(hide_sensitive_fields(json.loads(formatted_j)))
-        s = s.replace(j, hidden_j)
-    return s
-
-
-class SensitiveFormatter(Formatter):
-    def format(self, record):
-        s = super().format(record=record)
-        return redact_jsons(s)
-
-
-def remove_root_handlers(logger: Logger) -> None:
-    # NOTE(robinson) - in some environments such as Google Colab, there is a root handler
-    # that doesn't not mask secrets, meaning sensitive info such as api keys appear in logs.
-    # Removing these when they exist prevents this behavior
-    if logger.root.hasHandlers():
-        for handler in logger.root.handlers:
-            logger.root.removeHandler(handler)
-
-
-def make_default_logger(level: int) -> Logger:
-    """Return a custom logger."""
-    logger = getLogger(LOGGER_NAME)
-    handler = StreamHandler()
-    handler.name = "ingest_log_handler"
-    formatter = SensitiveFormatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s")
-    handler.setFormatter(formatter)
-    if handler.name not in [h.name for h in logger.handlers]:
-        logger.addHandler(handler)
-    logger.setLevel(level)
-    remove_root_handlers(logger)
-    return logger
-
-
-logger = make_default_logger(level=getLevelName(log_level.upper()))
diff --git a/unstructured/ingest/v2/main.py b/unstructured/ingest/v2/main.py
deleted file mode 100644
index f1b697717..000000000
--- a/unstructured/ingest/v2/main.py
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/env python3
-from unstructured.ingest.v2.cli.cli import get_cmd
-
-
-def main():
-    ingest_cmd = get_cmd()
-    ingest_cmd()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/unstructured/ingest/v2/pipeline/__init__.py b/unstructured/ingest/v2/pipeline/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/unstructured/ingest/v2/pipeline/interfaces.py b/unstructured/ingest/v2/pipeline/interfaces.py
deleted file mode 100644
index ae6dd95d7..000000000
--- a/unstructured/ingest/v2/pipeline/interfaces.py
+++ /dev/null
@@ -1,169 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import logging
-import multiprocessing as mp
-from abc import ABC
-from concurrent.futures import ThreadPoolExecutor
-from dataclasses import dataclass
-from functools import wraps
-from pathlib import Path
-from time import time
-from typing import Any, Awaitable, Callable, Optional, TypeVar
-
-from tqdm import tqdm
-from tqdm.asyncio import tqdm as tqdm_asyncio
-
-from unstructured.ingest.v2.interfaces import BaseProcess, ProcessorConfig
-from unstructured.ingest.v2.logger import logger, make_default_logger
-
-BaseProcessT = TypeVar("BaseProcessT", bound=BaseProcess)
-iterable_input = list[dict[str, Any]]
-
-
-def timed(func):
-    @wraps(func)
-    def time_it(self, *args, **kwargs):
-        start = time()
-        try:
-            return func(self, *args, **kwargs)
-        finally:
-            if func.__name__ == "__call__":
-                reported_name = f"{self.__class__.__name__} [cls]"
-            else:
-                reported_name = func.__name__
-            logger.info(f"{reported_name} took {time() - start} seconds")
-
-    return time_it
-
-
-@dataclass
-class PipelineStep(ABC):
-    process: BaseProcessT
-    context: ProcessorConfig
-    identifier: str
-
-    def __str__(self):
-        return self.identifier
-
-    def process_serially(self, iterable: iterable_input) -> Any:
-        logger.info("processing content serially")
-        if iterable:
-            if len(iterable) == 1:
-                return [self.run(**iterable[0])]
-            if self.context.tqdm:
-                return [self.run(**it) for it in tqdm(iterable, desc=self.identifier)]
-            return [self.run(**it) for it in iterable]
-        return [self.run()]
-
-    async def _process_async(self, iterable: iterable_input) -> Any:
-        if iterable:
-            if len(iterable) == 1:
-                return [await self.run_async(**iterable[0])]
-            if self.context.tqdm:
-                return await tqdm_asyncio.gather(
-                    *[self.run_async(**i) for i in iterable], desc=self.identifier
-                )
-            return await asyncio.gather(*[self.run_async(**i) for i in iterable])
-        return [await self.run_async()]
-
-    def process_async(self, iterable: iterable_input) -> Any:
-        logger.info("processing content async")
-        return self.asyncio_run(fn=self._process_async, iterable=iterable)
-
-    def asyncio_run(
-        self, fn: Callable[[Any, Any], Awaitable[Any]], *args: Any, **kwargs: Any
-    ) -> Any:
-        current_loop = asyncio._get_running_loop()
-        if current_loop is None:
-            return asyncio.run(fn(*args, **kwargs))
-        with ThreadPoolExecutor(thread_name_prefix="asyncio") as thread_pool:
-            logger.warning(
-                f"async code being run in dedicated thread pool "
-                f"to not conflict with existing event loop: {current_loop}"
-            )
-
-            def wrapped():
-                return asyncio.run(fn(*args, **kwargs))
-
-            future = thread_pool.submit(wrapped)
-            return future.result()
-
-    def process_multiprocess(self, iterable: iterable_input) -> Any:
-        logger.info("processing content across processes")
-
-        if iterable:
-            if len(iterable) == 1:
-                return [self.process_serially(iterable)]
-            if self.context.num_processes == 1:
-                return self.process_serially(iterable)
-            with mp.Pool(
-                processes=self.context.num_processes,
-                initializer=self._init_logger,
-                initargs=(logging.DEBUG if self.context.verbose else logging.INFO,),
-            ) as pool:
-                if self.context.tqdm:
-                    return list(
-                        tqdm(
-                            pool.imap_unordered(func=self._wrap_mp, iterable=iterable),
-                            total=len(iterable),
-                            desc=self.identifier,
-                        )
-                    )
-                return pool.map(self._wrap_mp, iterable)
-        return [self.run()]
-
-    def _wrap_mp(self, input_kwargs: dict) -> Any:
-        # Allow mapping of kwargs via multiprocessing map()
-        return self.run(**input_kwargs)
-
-    def _init_logger(self, log_level: int):
-        # Init logger for each spawned process when using multiprocessing pool
-        make_default_logger(level=log_level)
-
-    @timed
-    def __call__(self, iterable: Optional[iterable_input] = None) -> Any:
-        iterable = iterable or []
-        if iterable:
-            logger.info(
-                f"Calling {self.__class__.__name__} " f"with {len(iterable)} docs",  # type: ignore
-            )
-        if self.context.async_supported and self.process.is_async():
-            return self.process_async(iterable=iterable)
-        if self.context.mp_supported:
-            return self.process_multiprocess(iterable=iterable)
-        return self.process_serially(iterable=iterable)
-
-    def _run(self, fn: Callable, **kwargs: Any) -> Optional[Any]:
-        return self.asyncio_run(fn=self.run_async, _fn=fn, **kwargs)
-
-    async def _run_async(self, fn: Callable, **kwargs: Any) -> Optional[Any]:
-        raise NotImplementedError
-
-    def run(self, _fn: Callable[..., Any] | None = None, **kwargs: Any) -> Optional[Any]:
-        try:
-            fn = _fn or self.process.run
-            return self._run(fn=fn, **kwargs)
-        except Exception as e:
-            logger.error(f"Exception raised while running {self.identifier}", exc_info=e)
-            if "file_data_path" in kwargs:
-                self.context.status[kwargs["file_data_path"]] = {self.identifier: str(e)}
-            if self.context.raise_on_error:
-                raise e
-            return None
-
-    async def run_async(self, _fn: Optional[Callable] = None, **kwargs: Any) -> Optional[Any]:
-        try:
-            fn = _fn or self.process.run_async
-            return await self._run_async(fn=fn, **kwargs)
-        except Exception as e:
-            logger.error(f"Exception raised while running {self.identifier}", exc_info=e)
-            if "file_data_path" in kwargs:
-                self.context.status[kwargs["file_data_path"]] = {self.identifier: str(e)}
-            if self.context.raise_on_error:
-                raise e
-            return None
-
-    @property
-    def cache_dir(self) -> Path:
-        return Path(self.context.work_dir) / self.identifier
diff --git a/unstructured/ingest/v2/pipeline/pipeline.py b/unstructured/ingest/v2/pipeline/pipeline.py
deleted file mode 100644
index 93c77dfa0..000000000
--- a/unstructured/ingest/v2/pipeline/pipeline.py
+++ /dev/null
@@ -1,286 +0,0 @@
-from __future__ import annotations
-
-import logging
-import multiprocessing as mp
-from dataclasses import InitVar, dataclass, field
-from time import time
-from typing import Any, Optional, Union
-
-from unstructured.ingest.v2.interfaces import ProcessorConfig
-from unstructured.ingest.v2.logger import logger, make_default_logger
-from unstructured.ingest.v2.pipeline.steps.chunk import Chunker, ChunkStep
-from unstructured.ingest.v2.pipeline.steps.download import DownloaderT, DownloadStep
-from unstructured.ingest.v2.pipeline.steps.embed import Embedder, EmbedStep
-from unstructured.ingest.v2.pipeline.steps.index import IndexerT, IndexStep
-from unstructured.ingest.v2.pipeline.steps.partition import Partitioner, PartitionStep
-from unstructured.ingest.v2.pipeline.steps.stage import UploadStager, UploadStageStep
-from unstructured.ingest.v2.pipeline.steps.uncompress import Uncompressor, UncompressStep
-from unstructured.ingest.v2.pipeline.steps.upload import Uploader, UploadStep
-from unstructured.ingest.v2.pipeline.utils import sterilize_dict
-from unstructured.ingest.v2.processes.chunker import ChunkerConfig
-from unstructured.ingest.v2.processes.connector_registry import (
-    ConnectionConfig,
-    DownloaderConfigT,
-    IndexerConfigT,
-    UploaderConfigT,
-    UploadStagerConfigT,
-    destination_registry,
-    source_registry,
-)
-from unstructured.ingest.v2.processes.connectors.local import LocalUploader
-from unstructured.ingest.v2.processes.embedder import EmbedderConfig
-from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
-
-
-class PipelineError(Exception):
-    pass
-
-
-@dataclass
-class Pipeline:
-    context: ProcessorConfig
-    indexer: InitVar[IndexerT]
-    indexer_step: IndexStep = field(init=False)
-    downloader: InitVar[DownloaderT]
-    downloader_step: DownloadStep = field(init=False)
-    partitioner: InitVar[Partitioner]
-    partitioner_step: PartitionStep = field(init=False)
-    chunker: InitVar[Optional[Chunker]] = None
-    chunker_step: ChunkStep | None = field(init=False, default=None)
-    embedder: InitVar[Optional[Embedder]] = None
-    embedder_step: EmbedStep | None = field(init=False, default=None)
-    stager: InitVar[Optional[UploadStager]] = None
-    stager_step: UploadStageStep | None = field(init=False, default=None)
-    uploader: InitVar[Uploader] = field(default=LocalUploader())
-    uploader_step: UploadStep | None = field(init=False, default=None)
-    uncompress_step: UncompressStep | None = field(init=False, default=None)
-
-    def __post_init__(
-        self,
-        indexer: IndexerT,
-        downloader: DownloaderT,
-        partitioner: Partitioner,
-        chunker: Chunker | None = None,
-        embedder: Embedder | None = None,
-        stager: UploadStager | None = None,
-        uploader: Uploader | None = None,
-    ):
-        make_default_logger(level=logging.DEBUG if self.context.verbose else logging.INFO)
-        self.indexer_step = IndexStep(process=indexer, context=self.context)
-        self.downloader_step = DownloadStep(process=downloader, context=self.context)
-        self.partitioner_step = PartitionStep(process=partitioner, context=self.context)
-        self.chunker_step = ChunkStep(process=chunker, context=self.context) if chunker else None
-
-        self.embedder_step = EmbedStep(process=embedder, context=self.context) if embedder else None
-        # TODO: support initialize() call from each step process
-        # Potential long call to download embedder models, run before any fanout:
-        if embedder and embedder.config:
-            embedder.config.get_embedder().initialize()
-
-        self.stager_step = UploadStageStep(process=stager, context=self.context) if stager else None
-        self.uploader_step = UploadStep(process=uploader, context=self.context)
-        if self.context.uncompress:
-            process = Uncompressor()
-            self.uncompress_step = UncompressStep(process=process, context=self.context)
-
-        self.check_destination_connector()
-
-    def check_destination_connector(self):
-        # Make sure that if the set destination connector expects a stager, one is also set
-        if not self.uploader_step:
-            return
-        uploader_connector_type = self.uploader_step.process.connector_type
-        registry_entry = destination_registry[uploader_connector_type]
-        if registry_entry.upload_stager and self.stager_step is None:
-            raise ValueError(
-                f"pipeline with uploader type {self.uploader_step.process.__class__.__name__} "
-                f"expects a stager of type {registry_entry.upload_stager.__name__} "
-                f"but one was not set"
-            )
-
-    def cleanup(self):
-        pass
-
-    def log_statuses(self):
-        if status := self.context.status:
-            logger.error(f"{len(status)} failed documents:")
-            for k, v in status.items():
-                for kk, vv in v.items():
-                    logger.error(f"{k}: [{kk}] {vv}")
-
-    def run(self):
-        try:
-            start_time = time()
-            self._run()
-            logger.info(f"Finished ingest process in {time() - start_time}s")
-        finally:
-            self.log_statuses()
-            self.cleanup()
-            if self.context.status:
-                raise PipelineError("Pipeline did not run successfully")
-
-    def clean_results(self, results: list[Union[Any, list[Any]]] | None) -> list[Any] | None:
-        if not results:
-            return None
-        results = [r for r in results if r]
-        flat = []
-        for r in results:
-            if isinstance(r, list):
-                flat.extend(r)
-            else:
-                flat.append(r)
-        final = [f for f in flat if f]
-        return final or None
-
-    def _run(self):
-        logger.info(
-            f"Running local pipeline: {self} with configs: "
-            f"{sterilize_dict(self.context.to_dict(redact_sensitive=True))}"
-        )
-        if self.context.mp_supported:
-            manager = mp.Manager()
-            self.context.status = manager.dict()
-        else:
-            self.context.status = {}
-
-        # Index into data source
-        indices = self.indexer_step.run()
-        indices_inputs = [{"file_data_path": i} for i in indices]
-        if not indices_inputs:
-            return
-
-        # Download associated content to local file system
-        downloaded_data = self.downloader_step(indices_inputs)
-        downloaded_data = self.clean_results(results=downloaded_data)
-        if not downloaded_data:
-            return
-
-        # Run uncompress if available
-        if self.uncompress_step:
-            downloaded_data = self.uncompress_step(downloaded_data)
-            # Flatten list of lists
-            downloaded_data = self.clean_results(results=downloaded_data)
-
-        if not downloaded_data:
-            return
-
-        # Partition content
-        elements = self.partitioner_step(downloaded_data)
-        elements = self.clean_results(results=elements)
-        if not elements:
-            return
-
-        # Run element specific modifiers
-        for step in [self.chunker_step, self.embedder_step, self.stager_step]:
-            elements = step(elements) if step else elements
-            elements = self.clean_results(results=elements)
-            if not elements:
-                return
-
-        # Upload the final result
-        self.uploader_step(iterable=elements)
-
-    def __str__(self):
-        s = [str(self.indexer_step), str(self.downloader_step)]
-        if uncompress_step := self.uncompress_step:
-            s.append(str(uncompress_step))
-        s.append(str(self.partitioner_step))
-        if chunker_step := self.chunker_step:
-            s.append(str(chunker_step))
-        if embedder_step := self.embedder_step:
-            s.append(str(embedder_step))
-        if stager_step := self.stager_step:
-            s.append(str(stager_step))
-        s.append(str(self.uploader_step))
-        return " -> ".join(s)
-
-    @classmethod
-    def from_configs(
-        cls,
-        context: ProcessorConfig,
-        indexer_config: IndexerConfigT,
-        downloader_config: DownloaderConfigT,
-        source_connection_config: ConnectionConfig,
-        partitioner_config: PartitionerConfig,
-        chunker_config: Optional[ChunkerConfig] = None,
-        embedder_config: Optional[EmbedderConfig] = None,
-        destination_connection_config: Optional[ConnectionConfig] = None,
-        stager_config: Optional[UploadStagerConfigT] = None,
-        uploader_config: Optional[UploaderConfigT] = None,
-    ) -> "Pipeline":
-        # Get registry key based on indexer config
-        source_entry = {
-            k: v
-            for k, v in source_registry.items()
-            if isinstance(indexer_config, v.indexer_config)
-            and isinstance(downloader_config, v.downloader_config)
-            and isinstance(source_connection_config, v.connection_config)
-        }
-        if len(source_entry) > 1:
-            raise ValueError(
-                f"multiple entries found matching provided indexer, "
-                f"downloader and connection configs: {source_entry}"
-            )
-        if len(source_entry) != 1:
-            raise ValueError(
-                "no entry found in source registry with matching indexer, "
-                "downloader and connection configs"
-            )
-        source = list(source_entry.values())[0]
-        pipeline_kwargs = {
-            "context": context,
-            "indexer": source.indexer(
-                index_config=indexer_config, connection_config=source_connection_config
-            ),
-            "downloader": source.downloader(
-                download_config=downloader_config, connection_config=source_connection_config
-            ),
-            "partitioner": Partitioner(config=partitioner_config),
-        }
-        if chunker_config:
-            pipeline_kwargs["chunker"] = Chunker(config=chunker_config)
-        if embedder_config:
-            pipeline_kwargs["embedder"] = Embedder(config=embedder_config)
-        if not uploader_config:
-            return Pipeline(**pipeline_kwargs)
-
-        destination_entry = {
-            k: v
-            for k, v in destination_registry.items()
-            if isinstance(uploader_config, v.uploader_config)
-        }
-        if destination_connection_config:
-            destination_entry = {
-                k: v
-                for k, v in destination_entry.items()
-                if isinstance(destination_connection_config, v.connection_config)
-            }
-        if stager_config:
-            destination_entry = {
-                k: v
-                for k, v in destination_entry.items()
-                if isinstance(stager_config, v.upload_stager_config)
-            }
-
-        if len(destination_entry) > 1:
-            raise ValueError(
-                f"multiple entries found matching provided uploader, "
-                f"stager and connection configs: {destination_entry}"
-            )
-        if len(destination_entry) != 1:
-            raise ValueError(
-                "no entry found in source registry with matching uploader, "
-                "stager and connection configs"
-            )
-
-        destination = list(destination_entry.values())[0]
-        if stager_config:
-            pipeline_kwargs["stager"] = destination.upload_stager(
-                upload_stager_config=stager_config
-            )
-        if uploader_config:
-            uploader_kwargs = {"upload_config": uploader_config}
-            if destination_connection_config:
-                uploader_kwargs["connection_config"] = destination_connection_config
-            pipeline_kwargs["uploader"] = destination.uploader(**uploader_kwargs)
-        return cls(**pipeline_kwargs)
diff --git a/unstructured/ingest/v2/pipeline/steps/__init__.py b/unstructured/ingest/v2/pipeline/steps/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/unstructured/ingest/v2/pipeline/steps/chunk.py b/unstructured/ingest/v2/pipeline/steps/chunk.py
deleted file mode 100644
index b2e5d14c2..000000000
--- a/unstructured/ingest/v2/pipeline/steps/chunk.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import asyncio
-import hashlib
-import json
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Callable, Optional, TypedDict
-
-from unstructured.ingest.v2.interfaces import FileData
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.interfaces import PipelineStep
-from unstructured.ingest.v2.pipeline.utils import sterilize_dict
-from unstructured.ingest.v2.processes.chunker import Chunker
-from unstructured.staging.base import elements_to_dicts
-
-STEP_ID = "chunk"
-
-
-class ChunkStepResponse(TypedDict):
-    file_data_path: str
-    path: str
-
-
-@dataclass
-class ChunkStep(PipelineStep):
-    process: Chunker
-    identifier: str = STEP_ID
-
-    def __str__(self):
-        return f"{self.identifier} ({self.process.config.chunking_strategy})"
-
-    def __post_init__(self):
-        config = (
-            sterilize_dict(self.process.config.to_dict(redact_sensitive=True))
-            if self.process.config
-            else None
-        )
-        logger.info(f"Created {self.identifier} with configs: {config}")
-
-    def should_chunk(self, filepath: Path, file_data: FileData) -> bool:
-        if self.context.reprocess or file_data.reprocess:
-            return True
-        return not filepath.exists()
-
-    def get_output_filepath(self, filename: Path) -> Path:
-        hashed_output_file = f"{self.get_hash(extras=[filename.name])}.json"
-        filepath = (self.cache_dir / hashed_output_file).resolve()
-        filepath.parent.mkdir(parents=True, exist_ok=True)
-        return filepath
-
-    def _save_output(self, output_filepath: str, chunked_content: list[dict]):
-        with open(str(output_filepath), "w") as f:
-            logger.debug(f"Writing chunker output to: {output_filepath}")
-            json.dump(chunked_content, f, indent=2)
-
-    async def _run_async(
-        self, fn: Callable, path: str, file_data_path: str, **kwargs
-    ) -> ChunkStepResponse:
-        path = Path(path)
-        file_data = FileData.from_file(path=file_data_path)
-        output_filepath = self.get_output_filepath(filename=path)
-        if not self.should_chunk(filepath=output_filepath, file_data=file_data):
-            logger.debug(f"Skipping chunking, output already exists: {output_filepath}")
-            return ChunkStepResponse(file_data_path=file_data_path, path=str(output_filepath))
-        fn_kwargs = {"elements_filepath": path}
-        if not asyncio.iscoroutinefunction(fn):
-            chunked_content_raw = fn(**fn_kwargs)
-        elif semaphore := self.context.semaphore:
-            async with semaphore:
-                chunked_content_raw = await fn(**fn_kwargs)
-        else:
-            chunked_content_raw = await fn(**fn_kwargs)
-        self._save_output(
-            output_filepath=str(output_filepath),
-            chunked_content=elements_to_dicts(chunked_content_raw),
-        )
-        return ChunkStepResponse(file_data_path=file_data_path, path=str(output_filepath))
-
-    def get_hash(self, extras: Optional[list[str]]) -> str:
-        hashable_string = json.dumps(
-            self.process.config.to_dict(), sort_keys=True, ensure_ascii=True
-        )
-        if extras:
-            hashable_string += "".join(extras)
-        return hashlib.sha256(hashable_string.encode()).hexdigest()[:12]
diff --git a/unstructured/ingest/v2/pipeline/steps/download.py b/unstructured/ingest/v2/pipeline/steps/download.py
deleted file mode 100644
index 84d00e35d..000000000
--- a/unstructured/ingest/v2/pipeline/steps/download.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import asyncio
-import hashlib
-import json
-from dataclasses import dataclass
-from typing import Callable, Optional, TypedDict, TypeVar
-
-from unstructured.ingest.v2.interfaces import FileData, download_responses
-from unstructured.ingest.v2.interfaces.downloader import Downloader
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.interfaces import PipelineStep
-from unstructured.ingest.v2.pipeline.utils import sterilize_dict
-
-DownloaderT = TypeVar("DownloaderT", bound=Downloader)
-
-STEP_ID = "download"
-
-
-class DownloadStepResponse(TypedDict):
-    file_data_path: str
-    path: str
-
-
-@dataclass
-class DownloadStep(PipelineStep):
-    process: DownloaderT
-    identifier: str = STEP_ID
-
-    def __str__(self):
-        return f"{self.identifier} ({self.process.__class__.__name__})"
-
-    def __post_init__(self):
-        config = (
-            sterilize_dict(self.process.download_config.to_dict(redact_sensitive=True))
-            if self.process.download_config
-            else None
-        )
-        connection_config = (
-            sterilize_dict(self.process.connection_config.to_dict(redact_sensitive=True))
-            if self.process.connection_config
-            else None
-        )
-        logger.info(
-            f"Created {self.identifier} with configs: {config}, "
-            f"connection configs: {connection_config}"
-        )
-
-    @staticmethod
-    def is_float(value: str):
-        try:
-            float(value)
-            return True
-        except ValueError:
-            return False
-
-    def should_download(self, file_data: FileData, file_data_path: str) -> bool:
-        if self.context.re_download:
-            return True
-        download_path = self.process.get_download_path(file_data=file_data)
-        if not download_path or not download_path.exists():
-            return True
-        if (
-            download_path.is_file()
-            and file_data.metadata.date_modified
-            and self.is_float(file_data.metadata.date_modified)
-            and download_path.stat().st_mtime > float(file_data.metadata.date_modified)
-        ):
-            # Also update file data to mark this to reprocess since this won't change the filename
-            file_data.reprocess = True
-            file_data.to_file(path=file_data_path)
-            return True
-        return False
-
-    async def _run_async(self, fn: Callable, file_data_path: str) -> list[DownloadStepResponse]:
-        file_data = FileData.from_file(path=file_data_path)
-        download_path = self.process.get_download_path(file_data=file_data)
-        if not self.should_download(file_data=file_data, file_data_path=file_data_path):
-            logger.debug(f"Skipping download, file already exists locally: {download_path}")
-            return [DownloadStepResponse(file_data_path=file_data_path, path=str(download_path))]
-        fn_kwargs = {"file_data": file_data}
-        if not asyncio.iscoroutinefunction(fn):
-            download_results = fn(**fn_kwargs)
-        elif semaphore := self.context.semaphore:
-            async with semaphore:
-                download_results = await fn(**fn_kwargs)
-        else:
-            download_results = await fn(**fn_kwargs)
-        return self.create_step_results(
-            current_file_data_path=file_data_path, download_results=download_results
-        )
-
-    def create_step_results(
-        self, current_file_data_path: str, download_results: download_responses
-    ) -> list[DownloadStepResponse]:
-        if not isinstance(download_results, list):
-            return [
-                DownloadStepResponse(
-                    file_data_path=current_file_data_path, path=str(download_results["path"])
-                )
-            ]
-            # Supplemental results generated as part of the download process
-        download_step_results = []
-        for res in download_results:
-            file_data_path = self.persist_new_file_data(file_data=res["file_data"])
-            download_step_results.append(
-                DownloadStepResponse(file_data_path=file_data_path, path=res["path"])
-            )
-        return download_step_results
-
-    def persist_new_file_data(self, file_data: FileData) -> str:
-        record_hash = self.get_hash(extras=[file_data.identifier])
-        filename = f"{record_hash}.json"
-        filepath = (self.cache_dir / filename).resolve()
-        filepath.parent.mkdir(parents=True, exist_ok=True)
-        with open(str(filepath), "w") as f:
-            json.dump(file_data.to_dict(), f, indent=2)
-        return str(filepath)
-
-    def get_hash(self, extras: Optional[list[str]]) -> str:
-        hashable_string = json.dumps(
-            sterilize_dict(self.process.download_config.to_dict()), sort_keys=True
-        )
-        if extras:
-            hashable_string += "".join(extras)
-        return hashlib.sha256(hashable_string.encode()).hexdigest()[:12]
diff --git a/unstructured/ingest/v2/pipeline/steps/embed.py b/unstructured/ingest/v2/pipeline/steps/embed.py
deleted file mode 100644
index 94103951c..000000000
--- a/unstructured/ingest/v2/pipeline/steps/embed.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import asyncio
-import hashlib
-import json
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Callable, Optional, TypedDict
-
-from unstructured.ingest.v2.interfaces import FileData
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.interfaces import PipelineStep
-from unstructured.ingest.v2.pipeline.utils import sterilize_dict
-from unstructured.ingest.v2.processes.embedder import Embedder
-from unstructured.staging.base import elements_to_dicts
-
-STEP_ID = "embed"
-
-
-class EmbedStepResponse(TypedDict):
-    file_data_path: str
-    path: str
-
-
-@dataclass
-class EmbedStep(PipelineStep):
-    process: Embedder
-    identifier: str = STEP_ID
-
-    def __str__(self):
-        return f"{self.identifier} ({self.process.config.embedding_provider})"
-
-    def __post_init__(self):
-        config = (
-            sterilize_dict(self.process.config.to_dict(redact_sensitive=True))
-            if self.process.config
-            else None
-        )
-        logger.info(f"Created {self.identifier} with configs: {config}")
-
-    def should_embed(self, filepath: Path, file_data: FileData) -> bool:
-        if self.context.reprocess or file_data.reprocess:
-            return True
-        return not filepath.exists()
-
-    def get_output_filepath(self, filename: Path) -> Path:
-        hashed_output_file = f"{self.get_hash(extras=[filename.name])}.json"
-        filepath = (self.cache_dir / hashed_output_file).resolve()
-        filepath.parent.mkdir(parents=True, exist_ok=True)
-        return filepath
-
-    def _save_output(self, output_filepath: str, embedded_content: list[dict]):
-        with open(str(output_filepath), "w") as f:
-            logger.debug(f"Writing embedded output to: {output_filepath}")
-            json.dump(embedded_content, f, indent=2)
-
-    async def _run_async(self, fn: Callable, path: str, file_data_path: str) -> EmbedStepResponse:
-        path = Path(path)
-        file_data = FileData.from_file(path=file_data_path)
-        output_filepath = self.get_output_filepath(filename=path)
-        if not self.should_embed(filepath=output_filepath, file_data=file_data):
-            logger.debug(f"Skipping embedding, output already exists: {output_filepath}")
-            return EmbedStepResponse(file_data_path=file_data_path, path=str(output_filepath))
-        fn_kwargs = {"elements_filepath": path}
-        if not asyncio.iscoroutinefunction(fn):
-            embed_content_raw = fn(**fn_kwargs)
-        elif semaphore := self.context.semaphore:
-            async with semaphore:
-                embed_content_raw = await fn(**fn_kwargs)
-        else:
-            embed_content_raw = await fn(**fn_kwargs)
-
-        self._save_output(
-            output_filepath=str(output_filepath),
-            embedded_content=elements_to_dicts(embed_content_raw),
-        )
-        return EmbedStepResponse(file_data_path=file_data_path, path=str(output_filepath))
-
-    def get_hash(self, extras: Optional[list[str]]) -> str:
-        hashable_string = json.dumps(
-            self.process.config.to_dict(), sort_keys=True, ensure_ascii=True
-        )
-        if extras:
-            hashable_string += "".join(extras)
-        return hashlib.sha256(hashable_string.encode()).hexdigest()[:12]
diff --git a/unstructured/ingest/v2/pipeline/steps/index.py b/unstructured/ingest/v2/pipeline/steps/index.py
deleted file mode 100644
index d91a035ab..000000000
--- a/unstructured/ingest/v2/pipeline/steps/index.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from __future__ import annotations
-
-import hashlib
-import json
-from dataclasses import dataclass
-from typing import Any, Callable, Generator, Optional, TypeVar
-
-from unstructured.ingest.v2.interfaces.indexer import Indexer
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.interfaces import PipelineStep
-from unstructured.ingest.v2.pipeline.utils import sterilize_dict
-
-IndexerT = TypeVar("IndexerT", bound=Indexer)
-
-STEP_ID = "index"
-
-
-@dataclass
-class IndexStep(PipelineStep):
-    process: IndexerT
-    identifier: str = STEP_ID
-
-    def __str__(self):
-        return f"{self.identifier} ({self.process.__class__.__name__})"
-
-    def __post_init__(self):
-        config = (
-            sterilize_dict(self.process.index_config.to_dict(redact_sensitive=True))
-            if self.process.index_config
-            else None
-        )
-        connection_config = (
-            sterilize_dict(self.process.connection_config.to_dict(redact_sensitive=True))
-            if self.process.connection_config
-            else None
-        )
-        logger.info(
-            f"Created {self.identifier} with configs: {config}, "
-            f"connection configs: {connection_config}"
-        )
-
-    def run(
-        self, _fn: Callable[..., Any] | None = None, **kwargs: Any
-    ) -> Generator[str, None, None]:
-        for file_data in self.process.run():
-            logger.debug(f"Generated file data: {file_data}")
-            try:
-                record_hash = self.get_hash(extras=[file_data.identifier])
-                filename = f"{record_hash}.json"
-                filepath = (self.cache_dir / filename).resolve()
-                filepath.parent.mkdir(parents=True, exist_ok=True)
-                with open(str(filepath), "w") as f:
-                    json.dump(file_data.to_dict(), f, indent=2)
-                yield str(filepath)
-            except Exception as e:
-                logger.error(f"failed to create index for file data: {file_data}", exc_info=True)
-                if self.context.raise_on_error:
-                    raise e
-                continue
-
-    def get_hash(self, extras: Optional[list[str]]) -> str:
-        hashable_string = json.dumps(self.process.index_config.to_dict())
-        if extras:
-            hashable_string += "".join(extras)
-        return hashlib.sha256(hashable_string.encode()).hexdigest()[:12]
diff --git a/unstructured/ingest/v2/pipeline/steps/partition.py b/unstructured/ingest/v2/pipeline/steps/partition.py
deleted file mode 100644
index 541d2cae9..000000000
--- a/unstructured/ingest/v2/pipeline/steps/partition.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import asyncio
-import hashlib
-import json
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Callable, Optional, TypedDict
-
-from unstructured.ingest.v2.interfaces import FileData
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.interfaces import PipelineStep
-from unstructured.ingest.v2.pipeline.utils import sterilize_dict
-from unstructured.ingest.v2.processes.partitioner import Partitioner
-
-STEP_ID = "partition"
-
-
-class PartitionStepResponse(TypedDict):
-    file_data_path: str
-    path: str
-
-
-@dataclass
-class PartitionStep(PipelineStep):
-    process: Partitioner
-    identifier: str = STEP_ID
-
-    def __str__(self):
-        return f"{self.identifier} ({self.process.config.strategy})"
-
-    def __post_init__(self):
-        config = sterilize_dict(self.process.config.to_dict(redact_sensitive=True))
-        logger.info(f"Created {self.identifier} with configs: {config}")
-
-    def should_partition(self, filepath: Path, file_data: FileData) -> bool:
-        if self.context.reprocess or file_data.reprocess:
-            return True
-        return not filepath.exists()
-
-    def get_output_filepath(self, filename: Path) -> Path:
-        hashed_output_file = f"{self.get_hash(extras=[filename.name])}.json"
-        filepath = (self.cache_dir / hashed_output_file).resolve()
-        filepath.parent.mkdir(parents=True, exist_ok=True)
-        return filepath
-
-    def _save_output(self, output_filepath: str, partitioned_content: list[dict]):
-        with open(str(output_filepath), "w") as f:
-            logger.debug(f"Writing partitioned output to: {output_filepath}")
-            json.dump(partitioned_content, f, indent=2)
-
-    async def _run_async(
-        self, fn: Callable, path: str, file_data_path: str
-    ) -> Optional[PartitionStepResponse]:
-        path = Path(path)
-        file_data = FileData.from_file(path=file_data_path)
-        output_filepath = self.get_output_filepath(filename=Path(file_data_path))
-        if not self.should_partition(filepath=output_filepath, file_data=file_data):
-            logger.debug(f"Skipping partitioning, output already exists: {output_filepath}")
-            return PartitionStepResponse(file_data_path=file_data_path, path=str(output_filepath))
-        fn_kwargs = {"filename": path, "metadata": file_data.metadata}
-        if not asyncio.iscoroutinefunction(fn):
-            partitioned_content = fn(**fn_kwargs)
-        elif semaphore := self.context.semaphore:
-            async with semaphore:
-                partitioned_content = await fn(**fn_kwargs)
-        else:
-            partitioned_content = await fn(**fn_kwargs)
-        self._save_output(
-            output_filepath=str(output_filepath), partitioned_content=partitioned_content
-        )
-        return PartitionStepResponse(file_data_path=file_data_path, path=str(output_filepath))
-
-    def get_hash(self, extras: Optional[list[str]]) -> str:
-        hashable_string = json.dumps(
-            self.process.config.to_dict(), sort_keys=True, ensure_ascii=True
-        )
-        if extras:
-            hashable_string += "".join(extras)
-        return hashlib.sha256(hashable_string.encode()).hexdigest()[:12]
diff --git a/unstructured/ingest/v2/pipeline/steps/stage.py b/unstructured/ingest/v2/pipeline/steps/stage.py
deleted file mode 100644
index b4c6204ad..000000000
--- a/unstructured/ingest/v2/pipeline/steps/stage.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import asyncio
-import hashlib
-import json
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Callable, Optional, TypedDict
-
-from unstructured.ingest.v2.interfaces.file_data import FileData
-from unstructured.ingest.v2.interfaces.upload_stager import UploadStager
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.interfaces import PipelineStep
-from unstructured.ingest.v2.pipeline.utils import sterilize_dict
-
-STEP_ID = "upload_stage"
-
-
-class UploadStageStepResponse(TypedDict):
-    file_data_path: str
-    path: str
-
-
-@dataclass
-class UploadStageStep(PipelineStep):
-    process: UploadStager
-    identifier: str = STEP_ID
-
-    def __str__(self):
-        return f"{self.identifier} ({self.process.__class__.__name__})"
-
-    def __post_init__(self):
-        config = (
-            sterilize_dict(self.process.upload_stager_config.to_dict(redact_sensitive=True))
-            if self.process.upload_stager_config
-            else None
-        )
-        self.cache_dir.mkdir(parents=True, exist_ok=True)
-        logger.info(f"Created {self.identifier} with configs: {config}")
-
-    async def _run_async(
-        self, fn: Callable, path: str, file_data_path: str
-    ) -> UploadStageStepResponse:
-        path = Path(path)
-        fn_kwargs = {
-            "elements_filepath": path,
-            "file_data": FileData.from_file(path=file_data_path),
-            "output_dir": self.cache_dir,
-            "output_filename": self.get_hash(extras=[path.name]),
-        }
-        if not asyncio.iscoroutinefunction(fn):
-            staged_output_path = fn(**fn_kwargs)
-        elif semaphore := self.context.semaphore:
-            async with semaphore:
-                staged_output_path = await fn(**fn_kwargs)
-        else:
-            staged_output_path = await fn(**fn_kwargs)
-        return UploadStageStepResponse(file_data_path=file_data_path, path=str(staged_output_path))
-
-    def get_hash(self, extras: Optional[list[str]]) -> str:
-        hashable_string = json.dumps(
-            self.process.upload_stager_config.to_dict(), sort_keys=True, ensure_ascii=True
-        )
-        if extras:
-            hashable_string += "".join(extras)
-        return hashlib.sha256(hashable_string.encode()).hexdigest()[:12]
diff --git a/unstructured/ingest/v2/pipeline/steps/uncompress.py b/unstructured/ingest/v2/pipeline/steps/uncompress.py
deleted file mode 100644
index 987c9d5f6..000000000
--- a/unstructured/ingest/v2/pipeline/steps/uncompress.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import asyncio
-from pathlib import Path
-from typing import Callable, TypedDict
-
-from unstructured.ingest.v2.interfaces.file_data import FileData
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.interfaces import PipelineStep
-from unstructured.ingest.v2.pipeline.utils import sterilize_dict
-from unstructured.ingest.v2.processes.uncompress import Uncompressor
-
-STEP_ID = "uncompress"
-
-
-class UncompressStepResponse(TypedDict):
-    file_data_path: str
-    path: str
-
-
-class UncompressStep(PipelineStep):
-    process: Uncompressor
-    identifier: str = STEP_ID
-
-    def __post_init__(self):
-        config = (
-            sterilize_dict(self.process.config.to_dict(redact_sensitive=True))
-            if self.process.config
-            else None
-        )
-        logger.info(f"Created {self.identifier} with configs: {config}")
-
-    def _run(self, path: str, file_data_path: str) -> list[UncompressStepResponse]:
-        file_data = FileData.from_file(path=file_data_path)
-        new_file_data = self.process.run(file_data=file_data)
-        responses = []
-        for new_file in new_file_data:
-            new_file_data_path = Path(file_data_path).parent / f"{new_file.identifier}.json"
-            new_file.to_file(path=str(new_file_data_path.resolve()))
-            responses.append(
-                UncompressStepResponse(
-                    path=new_file.source_identifiers.fullpath,
-                    file_data_path=str(new_file_data_path),
-                )
-            )
-        return responses
-
-    async def _run_async(
-        self, fn: Callable, path: str, file_data_path: str
-    ) -> list[UncompressStepResponse]:
-        file_data = FileData.from_file(path=file_data_path)
-        fn_kwargs = {"file_data": file_data}
-        if not asyncio.iscoroutinefunction(fn):
-            new_file_data = fn(**fn_kwargs)
-        elif semaphore := self.context.semaphore:
-            async with semaphore:
-                new_file_data = await fn(**fn_kwargs)
-        else:
-            new_file_data = await fn(**fn_kwargs)
-        responses = []
-        for new_file in new_file_data:
-            new_file_data_path = Path(file_data_path).parent / f"{new_file.identifier}.json"
-            new_file.to_file(path=str(new_file_data_path.resolve()))
-            responses.append(
-                UncompressStepResponse(
-                    path=new_file.source_identifiers.fullpath,
-                    file_data_path=str(new_file_data_path),
-                )
-            )
-        return responses
diff --git a/unstructured/ingest/v2/pipeline/steps/upload.py b/unstructured/ingest/v2/pipeline/steps/upload.py
deleted file mode 100644
index dc58d46ac..000000000
--- a/unstructured/ingest/v2/pipeline/steps/upload.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import asyncio
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Callable, Optional, TypedDict
-
-from unstructured.ingest.v2.interfaces import FileData
-from unstructured.ingest.v2.interfaces.uploader import UploadContent, Uploader
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.pipeline.interfaces import PipelineStep, iterable_input, timed
-from unstructured.ingest.v2.pipeline.utils import sterilize_dict
-
-STEP_ID = "upload"
-
-
-class UploadStepContent(TypedDict):
-    path: str
-    file_data_path: str
-
-
-@dataclass
-class UploadStep(PipelineStep):
-    process: Uploader
-    identifier: str = STEP_ID
-
-    def __str__(self):
-        return f"{self.identifier} ({self.process.__class__.__name__})"
-
-    def __post_init__(self):
-        config = (
-            sterilize_dict(self.process.upload_config.to_dict(redact_sensitive=True))
-            if self.process.upload_config
-            else None
-        )
-        connection_config = (
-            sterilize_dict(self.process.connection_config.to_dict(redact_sensitive=True))
-            if self.process.connection_config
-            else None
-        )
-        logger.info(
-            f"Created {self.identifier} with configs: {config}, "
-            f"connection configs: {connection_config}"
-        )
-
-    def process_whole(self, iterable: iterable_input):
-        self.run(contents=iterable)
-
-    @timed
-    def __call__(self, iterable: iterable_input):
-        logger.info(
-            f"Calling {self.__class__.__name__} " f"with {len(iterable)} docs",  # type: ignore
-        )
-        if self.process.is_async():
-            self.process_async(iterable=iterable)
-        else:
-            self.process_whole(iterable=iterable)
-
-    def _run(self, fn: Callable, contents: list[UploadStepContent]):
-        upload_contents = [
-            UploadContent(path=Path(c["path"]), file_data=FileData.from_file(c["file_data_path"]))
-            for c in contents
-        ]
-        fn(contents=upload_contents)
-
-    async def _run_async(self, path: str, file_data_path: str, fn: Optional[Callable] = None):
-        fn = fn or self.process.run_async
-        fn_kwargs = {"path": Path(path), "file_data": FileData.from_file(path=file_data_path)}
-        if not asyncio.iscoroutinefunction(fn):
-            fn(**fn_kwargs)
-        elif semaphore := self.context.semaphore:
-            async with semaphore:
-                await fn(**fn_kwargs)
-        else:
-            await fn(**fn_kwargs)
diff --git a/unstructured/ingest/v2/pipeline/utils.py b/unstructured/ingest/v2/pipeline/utils.py
deleted file mode 100644
index e684ebb10..000000000
--- a/unstructured/ingest/v2/pipeline/utils.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import json
-from datetime import datetime
-from pathlib import Path
-from typing import Any
-
-
-def sterilize_dict(data: dict[str, Any]) -> dict[str, Any]:
-    def json_serial(obj: Any) -> str:
-        if isinstance(obj, Path):
-            return obj.as_posix()
-        if isinstance(obj, datetime):
-            return obj.isoformat()
-        raise TypeError("Type %s not serializable" % type(obj))
-
-    data_s = json.dumps(data, default=json_serial)
-    return json.loads(data_s)
diff --git a/unstructured/ingest/v2/processes/__init__.py b/unstructured/ingest/v2/processes/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/unstructured/ingest/v2/processes/chunker.py b/unstructured/ingest/v2/processes/chunker.py
deleted file mode 100644
index 11dffb073..000000000
--- a/unstructured/ingest/v2/processes/chunker.py
+++ /dev/null
@@ -1,96 +0,0 @@
-from abc import ABC
-from dataclasses import dataclass, fields
-from pathlib import Path
-from typing import Any, Optional
-
-from unstructured.chunking import dispatch
-from unstructured.documents.elements import Element, assign_and_map_hash_ids
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field
-from unstructured.ingest.v2.interfaces.process import BaseProcess
-from unstructured.ingest.v2.logger import logger
-from unstructured.staging.base import dict_to_elements, elements_from_json
-
-
-@dataclass
-class ChunkerConfig(EnhancedDataClassJsonMixin):
-    chunking_strategy: Optional[str] = None
-    chunking_endpoint: Optional[str] = "https://api.unstructured.io/general/v0/general"
-    chunk_by_api: bool = False
-    chunk_api_key: Optional[str] = enhanced_field(default=None, sensitive=True)
-
-    chunk_combine_text_under_n_chars: Optional[int] = None
-    chunk_include_orig_elements: Optional[bool] = None
-    chunk_max_characters: Optional[int] = None
-    chunk_multipage_sections: Optional[bool] = None
-    chunk_new_after_n_chars: Optional[int] = None
-    chunk_overlap: Optional[int] = None
-    chunk_overlap_all: Optional[bool] = None
-
-    def to_chunking_kwargs(self) -> dict[str, Any]:
-        return {
-            "chunking_strategy": self.chunking_strategy,
-            "combine_under_n_chars": self.chunk_combine_text_under_n_chars,
-            "max_characters": self.chunk_max_characters,
-            "include_orig_elements": self.chunk_include_orig_elements,
-            "multipage_sections": self.chunk_multipage_sections,
-            "new_after_n_chars": self.chunk_new_after_n_chars,
-            "overlap": self.chunk_overlap,
-            "overlap_all": self.chunk_overlap_all,
-        }
-
-
-@dataclass
-class Chunker(BaseProcess, ABC):
-    config: ChunkerConfig
-
-    def is_async(self) -> bool:
-        return self.config.chunk_by_api
-
-    def run(self, elements_filepath: Path, **kwargs: Any) -> list[Element]:
-        elements = elements_from_json(filename=str(elements_filepath))
-        if not elements:
-            return elements
-        local_chunking_strategies = ("basic", "by_title")
-        if self.config.chunking_strategy not in local_chunking_strategies:
-            logger.warning(
-                "chunking strategy not supported for local chunking: {}, must be one of: {}".format(
-                    self.config.chunking_strategy, ", ".join(local_chunking_strategies)
-                )
-            )
-            return elements
-        chunked_elements = dispatch.chunk(elements=elements, **self.config.to_chunking_kwargs())
-        assign_and_map_hash_ids(chunked_elements)
-        return chunked_elements
-
-    async def run_async(self, elements_filepath: Path, **kwargs: Any) -> list[Element]:
-        from unstructured_client import UnstructuredClient
-        from unstructured_client.models.shared import Files, PartitionParameters
-
-        client = UnstructuredClient(
-            api_key_auth=self.config.chunk_api_key,
-            server_url=self.config.chunking_endpoint,
-        )
-        partition_request = self.config.to_chunking_kwargs()
-        possible_fields = [f.name for f in fields(PartitionParameters)]
-        filtered_partition_request = {
-            k: v for k, v in partition_request.items() if k in possible_fields
-        }
-        if len(filtered_partition_request) != len(partition_request):
-            logger.debug(
-                "Following fields were omitted due to not being "
-                "supported by the currently used unstructured client: {}".format(
-                    ", ".join([v for v in partition_request if v not in filtered_partition_request])
-                )
-            )
-        with open(elements_filepath, "rb") as f:
-            files = Files(
-                content=f.read(),
-                file_name=str(elements_filepath.resolve()),
-            )
-            filtered_partition_request["files"] = files
-        partition_params = PartitionParameters(**filtered_partition_request)
-        resp = client.general.partition(partition_params)
-        elements_raw = resp.elements or []
-        elements = dict_to_elements(elements_raw)
-        assign_and_map_hash_ids(elements)
-        return elements
diff --git a/unstructured/ingest/v2/processes/connector_registry.py b/unstructured/ingest/v2/processes/connector_registry.py
deleted file mode 100644
index 41abdd4c8..000000000
--- a/unstructured/ingest/v2/processes/connector_registry.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from dataclasses import dataclass
-from typing import Optional, Type, TypeVar
-
-from unstructured.ingest.v2.interfaces import (
-    ConnectionConfig,
-    Downloader,
-    DownloaderConfig,
-    Indexer,
-    IndexerConfig,
-    Uploader,
-    UploaderConfig,
-    UploadStager,
-    UploadStagerConfig,
-)
-
-IndexerT = TypeVar("IndexerT", bound=Indexer)
-IndexerConfigT = TypeVar("IndexerConfigT", bound=IndexerConfig)
-DownloaderT = TypeVar("DownloaderT", bound=Downloader)
-DownloaderConfigT = TypeVar("DownloaderConfigT", bound=DownloaderConfig)
-ConnectionConfigT = TypeVar("ConnectionConfigT", bound=ConnectionConfig)
-UploadStagerConfigT = TypeVar("UploadStagerConfigT", bound=UploadStagerConfig)
-UploadStagerT = TypeVar("UploadStagerT", bound=UploadStager)
-UploaderConfigT = TypeVar("UploaderConfigT", bound=UploaderConfig)
-UploaderT = TypeVar("UploaderT", bound=Uploader)
-
-
-@dataclass
-class SourceRegistryEntry:
-    indexer: Type[IndexerT]
-    downloader: Type[DownloaderT]
-
-    downloader_config: Optional[Type[DownloaderConfigT]] = None
-    indexer_config: Optional[Type[IndexerConfigT]] = None
-    connection_config: Optional[Type[ConnectionConfigT]] = None
-
-
-source_registry: dict[str, SourceRegistryEntry] = {}
-
-
-def add_source_entry(source_type: str, entry: SourceRegistryEntry):
-    if source_type in source_registry:
-        raise ValueError(f"source {source_type} has already been registered")
-    source_registry[source_type] = entry
-
-
-@dataclass
-class DestinationRegistryEntry:
-    uploader: Type[UploaderT]
-    upload_stager: Optional[Type[UploadStagerT]] = None
-
-    upload_stager_config: Optional[Type[UploadStagerConfigT]] = None
-    uploader_config: Optional[Type[UploaderConfigT]] = None
-
-    connection_config: Optional[Type[ConnectionConfigT]] = None
-
-
-destination_registry: dict[str, DestinationRegistryEntry] = {}
-
-
-def add_destination_entry(destination_type: str, entry: DestinationRegistryEntry):
-    if destination_type in destination_registry:
-        raise ValueError(f"destination {destination_type} has already been registered")
-    destination_registry[destination_type] = entry
diff --git a/unstructured/ingest/v2/processes/connectors/__init__.py b/unstructured/ingest/v2/processes/connectors/__init__.py
deleted file mode 100644
index 5e4e2cf13..000000000
--- a/unstructured/ingest/v2/processes/connectors/__init__.py
+++ /dev/null
@@ -1,76 +0,0 @@
-from __future__ import annotations
-
-import unstructured.ingest.v2.processes.connectors.fsspec  # noqa: F401
-from unstructured.ingest.v2.processes.connector_registry import (
-    add_destination_entry,
-    add_source_entry,
-)
-
-from .astradb import CONNECTOR_TYPE as ASTRADB_CONNECTOR_TYPE
-from .astradb import astradb_destination_entry
-from .chroma import CONNECTOR_TYPE as CHROMA_CONNECTOR_TYPE
-from .chroma import chroma_destination_entry
-from .databricks_volumes import CONNECTOR_TYPE as DATABRICKS_VOLUMES_CONNECTOR_TYPE
-from .databricks_volumes import databricks_volumes_destination_entry
-from .elasticsearch import CONNECTOR_TYPE as ELASTICSEARCH_CONNECTOR_TYPE
-from .elasticsearch import elasticsearch_destination_entry, elasticsearch_source_entry
-from .google_drive import CONNECTOR_TYPE as GOOGLE_DRIVE_CONNECTOR_TYPE
-from .google_drive import google_drive_source_entry
-from .local import CONNECTOR_TYPE as LOCAL_CONNECTOR_TYPE
-from .local import local_destination_entry, local_source_entry
-from .mongodb import CONNECTOR_TYPE as MONGODB_CONNECTOR_TYPE
-from .mongodb import mongodb_destination_entry
-from .onedrive import CONNECTOR_TYPE as ONEDRIVE_CONNECTOR_TYPE
-from .onedrive import onedrive_source_entry
-from .opensearch import CONNECTOR_TYPE as OPENSEARCH_CONNECTOR_TYPE
-from .opensearch import opensearch_destination_entry, opensearch_source_entry
-from .pinecone import CONNECTOR_TYPE as PINECONE_CONNECTOR_TYPE
-from .pinecone import pinecone_destination_entry
-from .salesforce import CONNECTOR_TYPE as SALESFORCE_CONNECTOR_TYPE
-from .salesforce import salesforce_source_entry
-from .sharepoint import CONNECTOR_TYPE as SHAREPOINT_CONNECTOR_TYPE
-from .sharepoint import sharepoint_source_entry
-from .singlestore import CONNECTOR_TYPE as SINGLESTORE_CONNECTOR_TYPE
-from .singlestore import singlestore_destination_entry
-from .sql import CONNECTOR_TYPE as SQL_CONNECTOR_TYPE
-from .sql import sql_destination_entry
-from .weaviate import CONNECTOR_TYPE as WEAVIATE_CONNECTOR_TYPE
-from .weaviate import weaviate_destination_entry
-
-add_destination_entry(destination_type=ASTRADB_CONNECTOR_TYPE, entry=astradb_destination_entry)
-
-add_destination_entry(destination_type=CHROMA_CONNECTOR_TYPE, entry=chroma_destination_entry)
-
-add_source_entry(source_type=ELASTICSEARCH_CONNECTOR_TYPE, entry=elasticsearch_source_entry)
-add_destination_entry(
-    destination_type=ELASTICSEARCH_CONNECTOR_TYPE, entry=elasticsearch_destination_entry
-)
-
-add_source_entry(source_type=GOOGLE_DRIVE_CONNECTOR_TYPE, entry=google_drive_source_entry)
-
-add_source_entry(source_type=LOCAL_CONNECTOR_TYPE, entry=local_source_entry)
-add_destination_entry(destination_type=LOCAL_CONNECTOR_TYPE, entry=local_destination_entry)
-
-add_source_entry(source_type=ONEDRIVE_CONNECTOR_TYPE, entry=onedrive_source_entry)
-
-add_source_entry(source_type=OPENSEARCH_CONNECTOR_TYPE, entry=opensearch_source_entry)
-add_destination_entry(
-    destination_type=OPENSEARCH_CONNECTOR_TYPE, entry=opensearch_destination_entry
-)
-
-add_source_entry(source_type=SALESFORCE_CONNECTOR_TYPE, entry=salesforce_source_entry)
-
-add_destination_entry(destination_type=WEAVIATE_CONNECTOR_TYPE, entry=weaviate_destination_entry)
-
-add_destination_entry(
-    destination_type=DATABRICKS_VOLUMES_CONNECTOR_TYPE, entry=databricks_volumes_destination_entry
-)
-
-add_destination_entry(destination_type=SQL_CONNECTOR_TYPE, entry=sql_destination_entry)
-
-add_destination_entry(destination_type=MONGODB_CONNECTOR_TYPE, entry=mongodb_destination_entry)
-add_destination_entry(destination_type=PINECONE_CONNECTOR_TYPE, entry=pinecone_destination_entry)
-add_source_entry(source_type=SHAREPOINT_CONNECTOR_TYPE, entry=sharepoint_source_entry)
-add_destination_entry(
-    destination_type=SINGLESTORE_CONNECTOR_TYPE, entry=singlestore_destination_entry
-)
diff --git a/unstructured/ingest/v2/processes/connectors/astradb.py b/unstructured/ingest/v2/processes/connectors/astradb.py
deleted file mode 100644
index dc10862e8..000000000
--- a/unstructured/ingest/v2/processes/connectors/astradb.py
+++ /dev/null
@@ -1,151 +0,0 @@
-import json
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Optional
-
-from unstructured import __name__ as integration_name
-from unstructured.__version__ import __version__ as integration_version
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.utils.data_prep import batch_generator
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-    FileData,
-    UploadContent,
-    Uploader,
-    UploaderConfig,
-    UploadStager,
-    UploadStagerConfig,
-)
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.processes.connector_registry import (
-    DestinationRegistryEntry,
-)
-from unstructured.utils import requires_dependencies
-
-if TYPE_CHECKING:
-    from astrapy.db import AstraDBCollection
-
-CONNECTOR_TYPE = "astradb"
-
-
-@dataclass
-class AstraDBAccessConfig(AccessConfig):
-    token: str
-    api_endpoint: str
-
-
-@dataclass
-class AstraDBConnectionConfig(ConnectionConfig):
-    connection_type: str = CONNECTOR_TYPE
-    access_config: AstraDBAccessConfig = enhanced_field(sensitive=True)
-
-
-@dataclass
-class AstraDBUploadStagerConfig(UploadStagerConfig):
-    pass
-
-
-@dataclass
-class AstraDBUploadStager(UploadStager):
-    upload_stager_config: AstraDBUploadStagerConfig = field(
-        default_factory=lambda: AstraDBUploadStagerConfig()
-    )
-
-    def conform_dict(self, element_dict: dict) -> dict:
-        return {
-            "$vector": element_dict.pop("embeddings", None),
-            "content": element_dict.pop("text", None),
-            "metadata": element_dict,
-        }
-
-    def run(
-        self,
-        elements_filepath: Path,
-        file_data: FileData,
-        output_dir: Path,
-        output_filename: str,
-        **kwargs: Any,
-    ) -> Path:
-        with open(elements_filepath) as elements_file:
-            elements_contents = json.load(elements_file)
-        conformed_elements = []
-        for element in elements_contents:
-            conformed_elements.append(self.conform_dict(element_dict=element))
-        output_path = Path(output_dir) / Path(f"{output_filename}.json")
-        with open(output_path, "w") as output_file:
-            json.dump(conformed_elements, output_file)
-        return output_path
-
-
-@dataclass
-class AstraDBUploaderConfig(UploaderConfig):
-    collection_name: str
-    embedding_dimension: int
-    namespace: Optional[str] = None
-    requested_indexing_policy: Optional[dict[str, Any]] = None
-    batch_size: int = 20
-
-
-@dataclass
-class AstraDBUploader(Uploader):
-    connection_config: AstraDBConnectionConfig
-    upload_config: AstraDBUploaderConfig
-    connector_type: str = CONNECTOR_TYPE
-
-    @requires_dependencies(["astrapy"], extras="astradb")
-    def get_collection(self) -> "AstraDBCollection":
-        from astrapy.db import AstraDB
-
-        # Get the collection_name and embedding dimension
-        collection_name = self.upload_config.collection_name
-        embedding_dimension = self.upload_config.embedding_dimension
-        requested_indexing_policy = self.upload_config.requested_indexing_policy
-
-        # If the user has requested an indexing policy, pass it to the Astra DB
-        options = {"indexing": requested_indexing_policy} if requested_indexing_policy else None
-
-        # Build the Astra DB object.
-        # caller_name/version for Astra DB tracking
-        astra_db = AstraDB(
-            api_endpoint=self.connection_config.access_config.api_endpoint,
-            token=self.connection_config.access_config.token,
-            namespace=self.upload_config.namespace,
-            caller_name=integration_name,
-            caller_version=integration_version,
-        )
-
-        # Create and connect to the newly created collection
-        astra_db_collection = astra_db.create_collection(
-            collection_name=collection_name,
-            dimension=embedding_dimension,
-            options=options,
-        )
-        return astra_db_collection
-
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-        elements_dict = []
-        for content in contents:
-            with open(content.path) as elements_file:
-                elements = json.load(elements_file)
-                elements_dict.extend(elements)
-
-        logger.info(
-            f"writing {len(elements_dict)} objects to destination "
-            f"collection {self.upload_config.collection_name}"
-        )
-
-        astra_batch_size = self.upload_config.batch_size
-        collection = self.get_collection()
-
-        for chunk in batch_generator(elements_dict, astra_batch_size):
-            collection.insert_many(chunk)
-
-
-astradb_destination_entry = DestinationRegistryEntry(
-    connection_config=AstraDBConnectionConfig,
-    upload_stager_config=AstraDBUploadStagerConfig,
-    upload_stager=AstraDBUploadStager,
-    uploader_config=AstraDBUploaderConfig,
-    uploader=AstraDBUploader,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/azure_cognitive_search.py b/unstructured/ingest/v2/processes/connectors/azure_cognitive_search.py
deleted file mode 100644
index aab7cfba4..000000000
--- a/unstructured/ingest/v2/processes/connectors/azure_cognitive_search.py
+++ /dev/null
@@ -1,208 +0,0 @@
-import json
-import typing as t
-import uuid
-from dataclasses import dataclass, field
-from pathlib import Path
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import DestinationConnectionError, WriteError
-from unstructured.ingest.utils.data_prep import batch_generator
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-    UploadContent,
-    Uploader,
-    UploaderConfig,
-    UploadStager,
-    UploadStagerConfig,
-)
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.processes.connector_registry import (
-    DestinationRegistryEntry,
-    add_destination_entry,
-)
-from unstructured.ingest.v2.processes.connectors.utils import parse_datetime
-from unstructured.utils import requires_dependencies
-
-if t.TYPE_CHECKING:
-    from azure.search.documents import SearchClient
-
-
-CONNECTOR_TYPE = "azure_cognitive_search"
-
-
-@dataclass
-class AzureCognitiveSearchAccessConfig(AccessConfig):
-    key: t.Optional[str] = enhanced_field(default=None, overload_name="azure_cognitive_search_key")
-
-
-@dataclass
-class AzureCognitiveSearchConnectionConfig(ConnectionConfig):
-    endpoint: str
-    index: str
-    access_config: AzureCognitiveSearchAccessConfig = enhanced_field(sensitive=True)
-
-    @requires_dependencies(["azure.search", "azure.core"], extras="azure-cognitive-search")
-    def generate_client(self) -> "SearchClient":
-        from azure.core.credentials import AzureKeyCredential
-        from azure.search.documents import SearchClient
-
-        return SearchClient(
-            endpoint=self.endpoint,
-            index_name=self.index,
-            credential=AzureKeyCredential(self.access_config.key),
-        )
-
-
-@dataclass
-class AzureCognitiveSearchUploadStagerConfig(UploadStagerConfig):
-    pass
-
-
-@dataclass
-class AzureCognitiveSearchUploaderConfig(UploaderConfig):
-    batch_size: int = 100
-
-
-@dataclass
-class AzureCognitiveSearchUploadStager(UploadStager):
-    upload_stager_config: AzureCognitiveSearchUploadStagerConfig = field(
-        default_factory=lambda: AzureCognitiveSearchUploadStagerConfig()
-    )
-
-    @staticmethod
-    def conform_dict(data: dict) -> dict:
-        """
-        updates the dictionary that is from each Element being converted into a dict/json
-        into a dictionary that conforms to the schema expected by the
-        Azure Cognitive Search index
-        """
-
-        data["id"] = str(uuid.uuid4())
-
-        if points := data.get("metadata", {}).get("coordinates", {}).get("points"):
-            data["metadata"]["coordinates"]["points"] = json.dumps(points)
-        if version := data.get("metadata", {}).get("data_source", {}).get("version"):
-            data["metadata"]["data_source"]["version"] = str(version)
-        if record_locator := data.get("metadata", {}).get("data_source", {}).get("record_locator"):
-            data["metadata"]["data_source"]["record_locator"] = json.dumps(record_locator)
-        if permissions_data := (
-            data.get("metadata", {}).get("data_source", {}).get("permissions_data")
-        ):
-            data["metadata"]["data_source"]["permissions_data"] = json.dumps(permissions_data)
-        if links := data.get("metadata", {}).get("links"):
-            data["metadata"]["links"] = [json.dumps(link) for link in links]
-        if last_modified := data.get("metadata", {}).get("last_modified"):
-            data["metadata"]["last_modified"] = parse_datetime(last_modified).strftime(
-                "%Y-%m-%dT%H:%M:%S.%fZ"
-            )
-        if date_created := data.get("metadata", {}).get("data_source", {}).get("date_created"):
-            data["metadata"]["data_source"]["date_created"] = parse_datetime(date_created).strftime(
-                "%Y-%m-%dT%H:%M:%S.%fZ"
-            )
-
-        if date_modified := data.get("metadata", {}).get("data_source", {}).get("date_modified"):
-            data["metadata"]["data_source"]["date_modified"] = parse_datetime(
-                date_modified
-            ).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
-
-        if date_processed := data.get("metadata", {}).get("data_source", {}).get("date_processed"):
-            data["metadata"]["data_source"]["date_processed"] = parse_datetime(
-                date_processed
-            ).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
-
-        if page_number := data.get("metadata", {}).get("page_number"):
-            data["metadata"]["page_number"] = str(page_number)
-        return data
-
-    def run(
-        self,
-        elements_filepath: Path,
-        output_dir: Path,
-        output_filename: str,
-        **kwargs: t.Any,
-    ) -> Path:
-        with open(elements_filepath) as elements_file:
-            elements_contents = json.load(elements_file)
-
-        conformed_elements = [self.conform_dict(data=element) for element in elements_contents]
-
-        output_path = Path(output_dir) / Path(f"{output_filename}.json")
-        with open(output_path, "w") as output_file:
-            json.dump(conformed_elements, output_file)
-        return output_path
-
-
-@dataclass
-class AzureCognitiveSearchUploader(Uploader):
-    upload_config: AzureCognitiveSearchUploaderConfig
-    connection_config: AzureCognitiveSearchConnectionConfig
-    connector_type: str = CONNECTOR_TYPE
-
-    @DestinationConnectionError.wrap
-    @requires_dependencies(["azure"], extras="azure-cognitive-search")
-    def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
-        import azure.core.exceptions
-
-        logger.info(
-            f"writing {len(elements_dict)} documents to destination "
-            f"index at {self.connection_config.index}",
-        )
-        try:
-            results = self.connection_config.generate_client().upload_documents(
-                documents=elements_dict
-            )
-
-        except azure.core.exceptions.HttpResponseError as http_error:
-            raise WriteError(f"http error: {http_error}") from http_error
-        errors = []
-        success = []
-        for result in results:
-            if result.succeeded:
-                success.append(result)
-            else:
-                errors.append(result)
-        logger.debug(f"results: {len(success)} successes, {len(errors)} failures")
-        if errors:
-            raise WriteError(
-                ", ".join(
-                    [
-                        f"{error.key}: [{error.status_code}] {error.error_message}"
-                        for error in errors
-                    ],
-                ),
-            )
-
-    def write_dict_wrapper(self, elements_dict):
-        return self.write_dict(elements_dict=elements_dict)
-
-    def run(self, contents: list[UploadContent], **kwargs: t.Any) -> None:
-        elements_dict = []
-        for content in contents:
-            with open(content.path) as elements_file:
-                elements = json.load(elements_file)
-                elements_dict.extend(elements)
-
-        logger.info(
-            f"writing document batches to destination"
-            f" endpoint at {str(self.connection_config.endpoint)}"
-            f" index at {str(self.connection_config.index)}"
-            f" with batch size {str(self.upload_config.batch_size)}"
-        )
-
-        batch_size = self.upload_config.batch_size
-
-        for chunk in batch_generator(elements_dict, batch_size):
-            self.write_dict(elements_dict=chunk)  # noqa: E203
-
-
-add_destination_entry(
-    destination_type=CONNECTOR_TYPE,
-    entry=DestinationRegistryEntry(
-        connection_config=AzureCognitiveSearchConnectionConfig,
-        uploader=AzureCognitiveSearchUploader,
-        uploader_config=AzureCognitiveSearchUploaderConfig,
-        upload_stager=AzureCognitiveSearchUploadStager,
-        upload_stager_config=AzureCognitiveSearchUploadStagerConfig,
-    ),
-)
diff --git a/unstructured/ingest/v2/processes/connectors/chroma.py b/unstructured/ingest/v2/processes/connectors/chroma.py
deleted file mode 100644
index e28e3d7f7..000000000
--- a/unstructured/ingest/v2/processes/connectors/chroma.py
+++ /dev/null
@@ -1,208 +0,0 @@
-from __future__ import annotations
-
-import json
-import uuid
-from dataclasses import dataclass, field
-from datetime import date, datetime
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Dict, Optional
-
-from chromadb.config import Settings
-from dateutil import parser
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import DestinationConnectionError
-from unstructured.ingest.utils.data_prep import batch_generator
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-    FileData,
-    UploadContent,
-    Uploader,
-    UploaderConfig,
-    UploadStager,
-    UploadStagerConfig,
-)
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.processes.connector_registry import (
-    DestinationRegistryEntry,
-)
-from unstructured.staging.base import flatten_dict
-from unstructured.utils import requires_dependencies
-
-if TYPE_CHECKING:
-    from chromadb.api import ClientAPI
-
-CONNECTOR_TYPE = "chroma"
-
-
-@dataclass
-class ChromaAccessConfig(AccessConfig):
-    settings: Optional[Settings] = None
-    headers: Optional[Dict[str, str]] = None
-
-
-@dataclass
-class ChromaConnectionConfig(ConnectionConfig):
-    collection_name: str
-    access_config: ChromaAccessConfig = enhanced_field(sensitive=True)
-    path: Optional[str] = None
-    tenant: str = "default_tenant"
-    database: str = "default_database"
-    host: Optional[str] = None
-    port: Optional[int] = None
-    ssl: bool = False
-    connector_type: str = CONNECTOR_TYPE
-
-
-@dataclass
-class ChromaUploadStagerConfig(UploadStagerConfig):
-    pass
-
-
-@dataclass
-class ChromaUploadStager(UploadStager):
-    upload_stager_config: ChromaUploadStagerConfig = field(
-        default_factory=lambda: ChromaUploadStagerConfig()
-    )
-
-    @staticmethod
-    def parse_date_string(date_string: str) -> date:
-        try:
-            timestamp = float(date_string)
-            return datetime.fromtimestamp(timestamp)
-        except Exception as e:
-            logger.debug(f"date {date_string} string not a timestamp: {e}")
-        return parser.parse(date_string)
-
-    @staticmethod
-    def conform_dict(data: dict) -> dict:
-        """
-        Prepares dictionary in the format that Chroma requires
-        """
-        element_id = data.get("element_id", str(uuid.uuid4()))
-        return {
-            "id": element_id,
-            "embedding": data.pop("embeddings", None),
-            "document": data.pop("text", None),
-            "metadata": flatten_dict(data, separator="-", flatten_lists=True, remove_none=True),
-        }
-
-    def run(
-        self,
-        elements_filepath: Path,
-        file_data: FileData,
-        output_dir: Path,
-        output_filename: str,
-        **kwargs: Any,
-    ) -> Path:
-        with open(elements_filepath) as elements_file:
-            elements_contents = json.load(elements_file)
-        conformed_elements = [self.conform_dict(data=element) for element in elements_contents]
-        output_path = Path(output_dir) / Path(f"{output_filename}.json")
-        with open(output_path, "w") as output_file:
-            json.dump(conformed_elements, output_file)
-        return output_path
-
-
-@dataclass
-class ChromaUploaderConfig(UploaderConfig):
-    batch_size: int = 100
-
-
-@dataclass
-class ChromaUploader(Uploader):
-    connector_type: str = CONNECTOR_TYPE
-    upload_config: ChromaUploaderConfig
-    connection_config: ChromaConnectionConfig
-    client: Optional[ClientAPI] = field(init=False)
-
-    def __post_init__(self):
-        self.client = self.create_client()
-
-    @requires_dependencies(["chromadb"], extras="chroma")
-    def create_client(self) -> ClientAPI:
-        import chromadb
-
-        if self.connection_config.path:
-            return chromadb.PersistentClient(
-                path=self.connection_config.path,
-                settings=self.connection_config.access_config.settings,
-                tenant=self.connection_config.tenant,
-                database=self.connection_config.database,
-            )
-
-        elif self.connection_config.host and self.connection_config.port:
-            return chromadb.HttpClient(
-                host=self.connection_config.host,
-                port=self.connection_config.port,
-                ssl=self.connection_config.ssl,
-                headers=self.connection_config.access_config.headers,
-                settings=self.connection_config.access_config.settings,
-                tenant=self.connection_config.tenant,
-                database=self.connection_config.database,
-            )
-        else:
-            raise ValueError("Chroma connector requires either path or host and port to be set.")
-
-    @DestinationConnectionError.wrap
-    def upsert_batch(self, collection, batch):
-
-        try:
-            # Chroma wants lists even if there is only one element
-            # Upserting to prevent duplicates
-            collection.upsert(
-                ids=batch["ids"],
-                documents=batch["documents"],
-                embeddings=batch["embeddings"],
-                metadatas=batch["metadatas"],
-            )
-        except Exception as e:
-            raise ValueError(f"chroma error: {e}") from e
-
-    @staticmethod
-    def prepare_chroma_list(chunk: tuple[dict[str, Any]]) -> dict[str, list[Any]]:
-        """Helper function to break a tuple of dicts into list of parallel lists for ChromaDb.
-        ({'id':1}, {'id':2}, {'id':3}) -> {'ids':[1,2,3]}"""
-        chroma_dict = {}
-        chroma_dict["ids"] = [x.get("id") for x in chunk]
-        chroma_dict["documents"] = [x.get("document") for x in chunk]
-        chroma_dict["embeddings"] = [x.get("embedding") for x in chunk]
-        chroma_dict["metadatas"] = [x.get("metadata") for x in chunk]
-        # Make sure all lists are of the same length
-        assert (
-            len(chroma_dict["ids"])
-            == len(chroma_dict["documents"])
-            == len(chroma_dict["embeddings"])
-            == len(chroma_dict["metadatas"])
-        )
-        return chroma_dict
-
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-
-        elements_dict = []
-        for content in contents:
-            with open(content.path) as elements_file:
-                elements = json.load(elements_file)
-                elements_dict.extend(elements)
-
-        logger.info(
-            f"writing {len(elements_dict)} objects to destination "
-            f"collection {self.connection_config.collection_name} "
-            f"at {self.connection_config.host}",
-        )
-
-        collection = self.client.get_or_create_collection(
-            name=self.connection_config.collection_name
-        )
-        for chunk in batch_generator(elements_dict, self.upload_config.batch_size):
-            self.upsert_batch(collection, self.prepare_chroma_list(chunk))
-
-
-chroma_destination_entry = DestinationRegistryEntry(
-    connection_config=ChromaConnectionConfig,
-    uploader=ChromaUploader,
-    uploader_config=ChromaUploaderConfig,
-    upload_stager=ChromaUploadStager,
-    upload_stager_config=ChromaUploadStagerConfig,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/databricks_volumes.py b/unstructured/ingest/v2/processes/connectors/databricks_volumes.py
deleted file mode 100644
index e875535c2..000000000
--- a/unstructured/ingest/v2/processes/connectors/databricks_volumes.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import os
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any, Optional
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-    UploadContent,
-    Uploader,
-    UploaderConfig,
-)
-from unstructured.ingest.v2.processes.connector_registry import DestinationRegistryEntry
-from unstructured.utils import requires_dependencies
-
-if TYPE_CHECKING:
-    from databricks.sdk import WorkspaceClient
-
-CONNECTOR_TYPE = "databricks_volumes"
-
-
-@dataclass
-class DatabricksVolumesAccessConfig(AccessConfig):
-    account_id: Optional[str] = None
-    username: Optional[str] = None
-    password: Optional[str] = None
-    client_id: Optional[str] = None
-    client_secret: Optional[str] = None
-    token: Optional[str] = None
-    profile: Optional[str] = None
-    azure_workspace_resource_id: Optional[str] = None
-    azure_client_secret: Optional[str] = None
-    azure_client_id: Optional[str] = None
-    azure_tenant_id: Optional[str] = None
-    azure_environment: Optional[str] = None
-    auth_type: Optional[str] = None
-    cluster_id: Optional[str] = None
-    google_credentials: Optional[str] = None
-    google_service_account: Optional[str] = None
-
-
-@dataclass
-class DatabricksVolumesConnectionConfig(ConnectionConfig):
-    access_config: DatabricksVolumesAccessConfig = enhanced_field(
-        default_factory=DatabricksVolumesAccessConfig, sensitive=True
-    )
-    host: Optional[str] = None
-
-
-@dataclass
-class DatabricksVolumesUploaderConfig(UploaderConfig):
-    volume: str
-    catalog: str
-    volume_path: Optional[str] = None
-    overwrite: bool = False
-    schema: str = "default"
-
-    @property
-    def path(self) -> str:
-        path = f"/Volumes/{self.catalog}/{self.schema}/{self.volume}"
-        if self.volume_path:
-            path = f"{path}/{self.volume_path}"
-        return path
-
-
-@dataclass
-class DatabricksVolumesUploader(Uploader):
-    connector_type: str = CONNECTOR_TYPE
-    upload_config: DatabricksVolumesUploaderConfig
-    connection_config: DatabricksVolumesConnectionConfig
-    client: Optional["WorkspaceClient"] = field(init=False, default=None)
-
-    @requires_dependencies(dependencies=["databricks.sdk"], extras="databricks-volumes")
-    def __post_init__(self) -> "WorkspaceClient":
-        from databricks.sdk import WorkspaceClient
-
-        self.client = WorkspaceClient(
-            host=self.connection_config.host, **self.connection_config.access_config.to_dict()
-        )
-
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-        for content in contents:
-            with open(content.path, "rb") as elements_file:
-                output_path = os.path.join(self.upload_config.path, content.path.name)
-                self.client.files.upload(
-                    file_path=output_path,
-                    contents=elements_file,
-                    overwrite=self.upload_config.overwrite,
-                )
-
-
-databricks_volumes_destination_entry = DestinationRegistryEntry(
-    connection_config=DatabricksVolumesConnectionConfig,
-    uploader=DatabricksVolumesUploader,
-    uploader_config=DatabricksVolumesUploaderConfig,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/elasticsearch.py b/unstructured/ingest/v2/processes/connectors/elasticsearch.py
deleted file mode 100644
index 4a45bae1b..000000000
--- a/unstructured/ingest/v2/processes/connectors/elasticsearch.py
+++ /dev/null
@@ -1,401 +0,0 @@
-import hashlib
-import json
-import sys
-import uuid
-from dataclasses import dataclass, field
-from pathlib import Path
-from time import time
-from typing import TYPE_CHECKING, Any, Generator, Optional
-
-from unstructured.documents.elements import DataSourceMetadata
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.utils.data_prep import generator_batching_wbytes
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-    Downloader,
-    DownloaderConfig,
-    DownloadResponse,
-    FileData,
-    Indexer,
-    IndexerConfig,
-    UploadContent,
-    Uploader,
-    UploaderConfig,
-    UploadStager,
-    UploadStagerConfig,
-    download_responses,
-)
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.processes.connector_registry import (
-    DestinationRegistryEntry,
-    SourceRegistryEntry,
-)
-from unstructured.staging.base import flatten_dict
-from unstructured.utils import requires_dependencies
-
-if TYPE_CHECKING:
-    from elasticsearch import Elasticsearch as ElasticsearchClient
-
-CONNECTOR_TYPE = "elasticsearch"
-
-
-@dataclass
-class ElasticsearchAccessConfig(AccessConfig):
-    password: Optional[str] = None
-    api_key: Optional[str] = enhanced_field(default=None, overload_name="es_api_key")
-    bearer_auth: Optional[str] = None
-    ssl_assert_fingerprint: Optional[str] = None
-
-
-@dataclass
-class ElasticsearchClientInput(EnhancedDataClassJsonMixin):
-    hosts: Optional[list[str]] = None
-    cloud_id: Optional[str] = None
-    ca_certs: Optional[str] = None
-    basic_auth: Optional[tuple[str, str]] = enhanced_field(sensitive=True, default=None)
-    api_key: Optional[str] = enhanced_field(sensitive=True, default=None)
-
-
-@dataclass
-class ElasticsearchConnectionConfig(ConnectionConfig):
-    hosts: Optional[list[str]] = None
-    username: Optional[str] = None
-    cloud_id: Optional[str] = None
-    api_key_id: Optional[str] = None
-    ca_certs: Optional[str] = None
-    access_config: ElasticsearchAccessConfig = enhanced_field(sensitive=True)
-
-    def get_client_kwargs(self) -> dict:
-        # Update auth related fields to conform to what the SDK expects based on the
-        # supported methods:
-        # https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html
-        client_input = ElasticsearchClientInput()
-        if self.hosts:
-            client_input.hosts = self.hosts
-        if self.cloud_id:
-            client_input.cloud_id = self.cloud_id
-        if self.ca_certs:
-            client_input.ca_certs = self.ca_certs
-        if self.access_config.password and (
-            self.cloud_id or self.ca_certs or self.access_config.ssl_assert_fingerprint
-        ):
-            client_input.basic_auth = ("elastic", self.access_config.password)
-        elif not self.cloud_id and self.username and self.access_config.password:
-            client_input.basic_auth = (self.username, self.access_config.password)
-        elif self.access_config.api_key and self.api_key_id:
-            client_input.api_key = (self.api_key_id, self.access_config.api_key)
-        elif self.access_config.api_key:
-            client_input.api_key = self.access_config.api_key
-        logger.debug(
-            f"Elasticsearch client inputs mapped to: {client_input.to_dict(redact_sensitive=True)}"
-        )
-        client_kwargs = client_input.to_dict(redact_sensitive=False)
-        client_kwargs = {k: v for k, v in client_kwargs.items() if v is not None}
-        return client_kwargs
-
-    @requires_dependencies(["elasticsearch"], extras="elasticsearch")
-    def get_client(self) -> "ElasticsearchClient":
-        from elasticsearch import Elasticsearch as ElasticsearchClient
-
-        client = ElasticsearchClient(**self.get_client_kwargs())
-        self.check_connection(client=client)
-        return client
-
-    def check_connection(self, client: "ElasticsearchClient"):
-        try:
-            client.perform_request("HEAD", "/", headers={"accept": "application/json"})
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-
-@dataclass
-class ElasticsearchIndexerConfig(IndexerConfig):
-    index_name: str
-    batch_size: int = 100
-
-
-@dataclass
-class ElasticsearchIndexer(Indexer):
-    connection_config: ElasticsearchConnectionConfig
-    index_config: ElasticsearchIndexerConfig
-    client: "ElasticsearchClient" = field(init=False)
-    connector_type: str = CONNECTOR_TYPE
-
-    def __post_init__(self):
-        self.client = self.connection_config.get_client()
-
-    @requires_dependencies(["elasticsearch"], extras="elasticsearch")
-    def load_scan(self):
-        from elasticsearch.helpers import scan
-
-        return scan
-
-    def _get_doc_ids(self) -> set[str]:
-        """Fetches all document ids in an index"""
-        scan = self.load_scan()
-
-        scan_query: dict = {"stored_fields": [], "query": {"match_all": {}}}
-        hits = scan(
-            self.client,
-            query=scan_query,
-            scroll="1m",
-            index=self.index_config.index_name,
-        )
-
-        return {hit["_id"] for hit in hits}
-
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        all_ids = self._get_doc_ids()
-        ids = list(all_ids)
-        id_batches: list[frozenset[str]] = [
-            frozenset(
-                ids[
-                    i
-                    * self.index_config.batch_size : (i + 1)  # noqa
-                    * self.index_config.batch_size
-                ]
-            )
-            for i in range(
-                (len(ids) + self.index_config.batch_size - 1) // self.index_config.batch_size
-            )
-        ]
-        for batch in id_batches:
-            # Make sure the hash is always a positive number to create identified
-            identified = str(hash(batch) + sys.maxsize + 1)
-            yield FileData(
-                identifier=identified,
-                connector_type=CONNECTOR_TYPE,
-                metadata=DataSourceMetadata(
-                    url=f"{self.connection_config.hosts[0]}/{self.index_config.index_name}",
-                    date_processed=str(time()),
-                ),
-                additional_metadata={
-                    "ids": list(batch),
-                    "index_name": self.index_config.index_name,
-                },
-            )
-
-
-@dataclass
-class ElasticsearchDownloaderConfig(DownloaderConfig):
-    fields: list[str] = field(default_factory=list)
-
-
-@dataclass
-class ElasticsearchDownloader(Downloader):
-    connection_config: ElasticsearchConnectionConfig
-    download_config: ElasticsearchDownloaderConfig
-    connector_type: str = CONNECTOR_TYPE
-
-    def is_async(self) -> bool:
-        return True
-
-    def get_identifier(self, index_name: str, record_id: str) -> str:
-        f = f"{index_name}-{record_id}"
-        if self.download_config.fields:
-            f = "{}-{}".format(
-                f,
-                hashlib.sha256(",".join(self.download_config.fields).encode()).hexdigest()[:8],
-            )
-        return f
-
-    def map_es_results(self, es_results: dict) -> str:
-        doc_body = es_results["_source"]
-        flattened_dict = flatten_dict(dictionary=doc_body)
-        str_values = [str(value) for value in flattened_dict.values()]
-        concatenated_values = "\n".join(str_values)
-        return concatenated_values
-
-    def generate_download_response(
-        self, result: dict, index_name: str, file_data: FileData
-    ) -> DownloadResponse:
-        record_id = result["_id"]
-        filename_id = self.get_identifier(index_name=index_name, record_id=record_id)
-        filename = f"{filename_id}.txt"
-        download_path = self.download_dir / Path(filename)
-        logger.debug(
-            f"Downloading results from index {index_name} and id {record_id} to {download_path}"
-        )
-        download_path.parent.mkdir(parents=True, exist_ok=True)
-        try:
-            with open(download_path, "w", encoding="utf8") as f:
-                f.write(self.map_es_results(es_results=result))
-        except Exception as e:
-            logger.error(
-                f"failed to download from index {index_name} "
-                f"and id {record_id} to {download_path}: {e}",
-                exc_info=True,
-            )
-            raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}")
-        return DownloadResponse(
-            file_data=FileData(
-                identifier=filename_id,
-                connector_type=CONNECTOR_TYPE,
-                metadata=DataSourceMetadata(
-                    version=str(result["_version"]) if "_version" in result else None,
-                    date_processed=str(time()),
-                    record_locator={
-                        "hosts": self.connection_config.hosts,
-                        "index_name": index_name,
-                        "document_id": record_id,
-                    },
-                ),
-            ),
-            path=download_path,
-        )
-
-    def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
-        raise NotImplementedError()
-
-    @requires_dependencies(["elasticsearch"], extras="elasticsearch")
-    def load_async(self):
-        from elasticsearch import AsyncElasticsearch
-        from elasticsearch.helpers import async_scan
-
-        return AsyncElasticsearch, async_scan
-
-    async def run_async(self, file_data: FileData, **kwargs: Any) -> download_responses:
-        AsyncClient, async_scan = self.load_async()
-
-        index_name: str = file_data.additional_metadata["index_name"]
-        ids: list[str] = file_data.additional_metadata["ids"]
-
-        scan_query = {
-            "_source": self.download_config.fields,
-            "version": True,
-            "query": {"ids": {"values": ids}},
-        }
-
-        download_responses = []
-        async with AsyncClient(**self.connection_config.get_client_kwargs()) as client:
-            async for result in async_scan(
-                client,
-                query=scan_query,
-                scroll="1m",
-                index=index_name,
-            ):
-                download_responses.append(
-                    self.generate_download_response(
-                        result=result, index_name=index_name, file_data=file_data
-                    )
-                )
-        return download_responses
-
-
-@dataclass
-class ElasticsearchUploadStagerConfig(UploadStagerConfig):
-    index_name: str
-
-
-@dataclass
-class ElasticsearchUploadStager(UploadStager):
-    upload_stager_config: ElasticsearchUploadStagerConfig
-
-    def conform_dict(self, data: dict) -> dict:
-        resp = {
-            "_index": self.upload_stager_config.index_name,
-            "_id": str(uuid.uuid4()),
-            "_source": {
-                "element_id": data.pop("element_id", None),
-                "embeddings": data.pop("embeddings", None),
-                "text": data.pop("text", None),
-                "type": data.pop("type", None),
-            },
-        }
-        if "metadata" in data and isinstance(data["metadata"], dict):
-            resp["_source"]["metadata"] = flatten_dict(data["metadata"], separator="-")
-        return resp
-
-    def run(
-        self,
-        elements_filepath: Path,
-        file_data: FileData,
-        output_dir: Path,
-        output_filename: str,
-        **kwargs: Any,
-    ) -> Path:
-        with open(elements_filepath) as elements_file:
-            elements_contents = json.load(elements_file)
-        conformed_elements = [self.conform_dict(data=element) for element in elements_contents]
-        output_path = Path(output_dir) / Path(f"{output_filename}.json")
-        with open(output_path, "w") as output_file:
-            json.dump(conformed_elements, output_file)
-        return output_path
-
-
-@dataclass
-class ElasticsearchUploaderConfig(UploaderConfig):
-    index_name: str
-    batch_size_bytes: int = 15_000_000
-    num_threads: int = 4
-
-
-@dataclass
-class ElasticsearchUploader(Uploader):
-    connector_type: str = CONNECTOR_TYPE
-    upload_config: ElasticsearchUploaderConfig
-    connection_config: ElasticsearchConnectionConfig
-
-    @requires_dependencies(["elasticsearch"], extras="elasticsearch")
-    def load_parallel_bulk(self):
-        from elasticsearch.helpers import parallel_bulk
-
-        return parallel_bulk
-
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-        parallel_bulk = self.load_parallel_bulk()
-        elements_dict = []
-        for content in contents:
-            with open(content.path) as elements_file:
-                elements = json.load(elements_file)
-                elements_dict.extend(elements)
-        upload_destination = self.connection_config.hosts or self.connection_config.cloud_id
-        logger.info(
-            f"writing {len(elements_dict)} elements via document batches to destination "
-            f"index named {self.upload_config.index_name} at {upload_destination} with "
-            f"batch size (in bytes) {self.upload_config.batch_size_bytes} with "
-            f"{self.upload_config.num_threads} (number of) threads"
-        )
-
-        client = self.connection_config.get_client()
-        if not client.indices.exists(index=self.upload_config.index_name):
-            logger.warning(
-                f"{(self.__class__.__name__).replace('Uploader', '')} index does not exist: "
-                f"{self.upload_config.index_name}. "
-                f"This may cause issues when uploading."
-            )
-        for batch in generator_batching_wbytes(
-            elements_dict, batch_size_limit_bytes=self.upload_config.batch_size_bytes
-        ):
-            for success, info in parallel_bulk(
-                client=client,
-                actions=batch,
-                thread_count=self.upload_config.num_threads,
-            ):
-                if not success:
-                    logger.error(
-                        "upload failed for a batch in "
-                        f"{(self.__class__.__name__).replace('Uploader', '')} "
-                        "destination connector:",
-                        info,
-                    )
-
-
-elasticsearch_source_entry = SourceRegistryEntry(
-    connection_config=ElasticsearchConnectionConfig,
-    indexer=ElasticsearchIndexer,
-    indexer_config=ElasticsearchIndexerConfig,
-    downloader=ElasticsearchDownloader,
-    downloader_config=ElasticsearchDownloaderConfig,
-)
-
-elasticsearch_destination_entry = DestinationRegistryEntry(
-    connection_config=ElasticsearchConnectionConfig,
-    upload_stager_config=ElasticsearchUploadStagerConfig,
-    upload_stager=ElasticsearchUploadStager,
-    uploader_config=ElasticsearchUploaderConfig,
-    uploader=ElasticsearchUploader,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/__init__.py b/unstructured/ingest/v2/processes/connectors/fsspec/__init__.py
deleted file mode 100644
index eacc0df96..000000000
--- a/unstructured/ingest/v2/processes/connectors/fsspec/__init__.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from __future__ import annotations
-
-from unstructured.ingest.v2.processes.connector_registry import (
-    add_destination_entry,
-    add_source_entry,
-)
-
-from .azure import CONNECTOR_TYPE as AZURE_CONNECTOR_TYPE
-from .azure import azure_destination_entry, azure_source_entry
-from .box import CONNECTOR_TYPE as BOX_CONNECTOR_TYPE
-from .box import box_destination_entry, box_source_entry
-from .dropbox import CONNECTOR_TYPE as DROPBOX_CONNECTOR_TYPE
-from .dropbox import dropbox_destination_entry, dropbox_source_entry
-from .gcs import CONNECTOR_TYPE as GCS_CONNECTOR_TYPE
-from .gcs import gcs_destination_entry, gcs_source_entry
-from .s3 import CONNECTOR_TYPE as S3_CONNECTOR_TYPE
-from .s3 import s3_destination_entry, s3_source_entry
-from .sftp import CONNECTOR_TYPE as SFTP_CONNECTOR_TYPE
-from .sftp import sftp_destination_entry, sftp_source_entry
-
-add_source_entry(source_type=AZURE_CONNECTOR_TYPE, entry=azure_source_entry)
-add_destination_entry(destination_type=AZURE_CONNECTOR_TYPE, entry=azure_destination_entry)
-
-add_source_entry(source_type=BOX_CONNECTOR_TYPE, entry=box_source_entry)
-add_destination_entry(destination_type=BOX_CONNECTOR_TYPE, entry=box_destination_entry)
-
-add_source_entry(source_type=DROPBOX_CONNECTOR_TYPE, entry=dropbox_source_entry)
-add_destination_entry(destination_type=DROPBOX_CONNECTOR_TYPE, entry=dropbox_destination_entry)
-
-add_source_entry(source_type=GCS_CONNECTOR_TYPE, entry=gcs_source_entry)
-add_destination_entry(destination_type=GCS_CONNECTOR_TYPE, entry=gcs_destination_entry)
-
-add_source_entry(source_type=S3_CONNECTOR_TYPE, entry=s3_source_entry)
-add_destination_entry(destination_type=S3_CONNECTOR_TYPE, entry=s3_destination_entry)
-
-add_source_entry(source_type=SFTP_CONNECTOR_TYPE, entry=sftp_source_entry)
-add_destination_entry(destination_type=SFTP_CONNECTOR_TYPE, entry=sftp_destination_entry)
diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/azure.py b/unstructured/ingest/v2/processes/connectors/fsspec/azure.py
deleted file mode 100644
index 8dd756600..000000000
--- a/unstructured/ingest/v2/processes/connectors/fsspec/azure.py
+++ /dev/null
@@ -1,144 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, Generator, Optional
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.v2.interfaces import DownloadResponse, FileData, UploadContent
-from unstructured.ingest.v2.processes.connector_registry import (
-    DestinationRegistryEntry,
-    SourceRegistryEntry,
-)
-from unstructured.ingest.v2.processes.connectors.fsspec.fsspec import (
-    FsspecAccessConfig,
-    FsspecConnectionConfig,
-    FsspecDownloader,
-    FsspecDownloaderConfig,
-    FsspecIndexer,
-    FsspecIndexerConfig,
-    FsspecUploader,
-    FsspecUploaderConfig,
-)
-from unstructured.ingest.v2.processes.connectors.fsspec.utils import json_serial, sterilize_dict
-from unstructured.utils import requires_dependencies
-
-CONNECTOR_TYPE = "azure"
-
-
-def azure_json_serial(obj):
-    from azure.storage.blob._models import ContentSettings
-
-    if isinstance(obj, ContentSettings):
-        return dict(obj)
-    if isinstance(obj, bytearray):
-        return str(obj)
-    return json_serial(obj)
-
-
-@dataclass
-class AzureIndexerConfig(FsspecIndexerConfig):
-    pass
-
-
-@dataclass
-class AzureAccessConfig(FsspecAccessConfig):
-    account_name: Optional[str] = None
-    account_key: Optional[str] = None
-    connection_string: Optional[str] = None
-    sas_token: Optional[str] = None
-
-    def __post_init__(self):
-        if self.connection_string is None and self.account_name is None:
-            raise ValueError("either connection_string or account_name must be set")
-
-
-@dataclass
-class AzureConnectionConfig(FsspecConnectionConfig):
-    supported_protocols: list[str] = field(default_factory=lambda: ["az"])
-    access_config: AzureAccessConfig = enhanced_field(
-        sensitive=True, default_factory=lambda: AzureAccessConfig()
-    )
-    connector_type: str = CONNECTOR_TYPE
-
-    def get_access_config(self) -> dict[str, Any]:
-        # Avoid injecting None by filtering out k,v pairs where the value is None
-        access_configs: dict[str, Any] = {
-            k: v for k, v in self.access_config.to_dict().items() if v
-        }
-        return access_configs
-
-
-@dataclass
-class AzureIndexer(FsspecIndexer):
-    connection_config: AzureConnectionConfig
-    index_config: AzureIndexerConfig
-    connector_type: str = CONNECTOR_TYPE
-
-    def sterilize_info(self, path) -> dict:
-        info = self.fs.info(path=path)
-        return sterilize_dict(data=info, default=azure_json_serial)
-
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        return super().run(**kwargs)
-
-
-@dataclass
-class AzureDownloaderConfig(FsspecDownloaderConfig):
-    pass
-
-
-@dataclass
-class AzureDownloader(FsspecDownloader):
-    protocol: str = "az"
-    connection_config: AzureConnectionConfig
-    connector_type: str = CONNECTOR_TYPE
-    download_config: Optional[AzureDownloaderConfig] = field(default_factory=AzureDownloaderConfig)
-
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return super().run(file_data=file_data, **kwargs)
-
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return await super().run_async(file_data=file_data, **kwargs)
-
-
-@dataclass
-class AzureUploaderConfig(FsspecUploaderConfig):
-    pass
-
-
-@dataclass
-class AzureUploader(FsspecUploader):
-    connector_type: str = CONNECTOR_TYPE
-    connection_config: AzureConnectionConfig
-    upload_config: AzureUploaderConfig = field(default=None)
-
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    def __post_init__(self):
-        super().__post_init__()
-
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-        return super().run(contents=contents, **kwargs)
-
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        return await super().run_async(path=path, file_data=file_data, **kwargs)
-
-
-azure_source_entry = SourceRegistryEntry(
-    indexer=AzureIndexer,
-    indexer_config=AzureIndexerConfig,
-    downloader=AzureDownloader,
-    downloader_config=AzureDownloaderConfig,
-    connection_config=AzureConnectionConfig,
-)
-
-azure_destination_entry = DestinationRegistryEntry(
-    uploader=AzureUploader,
-    uploader_config=AzureUploaderConfig,
-    connection_config=AzureConnectionConfig,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/box.py b/unstructured/ingest/v2/processes/connectors/fsspec/box.py
deleted file mode 100644
index 77d60c79e..000000000
--- a/unstructured/ingest/v2/processes/connectors/fsspec/box.py
+++ /dev/null
@@ -1,131 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, Generator, Optional
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.v2.interfaces import DownloadResponse, FileData, UploadContent
-from unstructured.ingest.v2.processes.connector_registry import (
-    DestinationRegistryEntry,
-    SourceRegistryEntry,
-)
-from unstructured.ingest.v2.processes.connectors.fsspec.fsspec import (
-    FsspecAccessConfig,
-    FsspecConnectionConfig,
-    FsspecDownloader,
-    FsspecDownloaderConfig,
-    FsspecIndexer,
-    FsspecIndexerConfig,
-    FsspecUploader,
-    FsspecUploaderConfig,
-)
-from unstructured.utils import requires_dependencies
-
-CONNECTOR_TYPE = "box"
-
-
-@dataclass
-class BoxIndexerConfig(FsspecIndexerConfig):
-    pass
-
-
-@dataclass
-class BoxAccessConfig(FsspecAccessConfig):
-    box_app_config: Optional[str] = None
-
-
-@dataclass
-class BoxConnectionConfig(FsspecConnectionConfig):
-    supported_protocols: list[str] = field(default_factory=lambda: ["box"])
-    access_config: BoxAccessConfig = enhanced_field(
-        sensitive=True, default_factory=lambda: BoxAccessConfig()
-    )
-    connector_type: str = CONNECTOR_TYPE
-
-    def get_access_config(self) -> dict[str, Any]:
-        # Return access_kwargs with oauth. The oauth object can not be stored directly in the config
-        # because it is not serializable.
-        from boxsdk import JWTAuth
-
-        access_kwargs_with_oauth: dict[str, Any] = {
-            "oauth": JWTAuth.from_settings_file(
-                self.access_config.box_app_config,
-            ),
-        }
-        access_config: dict[str, Any] = self.access_config.to_dict()
-        access_config.pop("box_app_config", None)
-        access_kwargs_with_oauth.update(access_config)
-
-        return access_kwargs_with_oauth
-
-
-@dataclass
-class BoxIndexer(FsspecIndexer):
-    connection_config: BoxConnectionConfig
-    index_config: BoxIndexerConfig
-    connector_type: str = CONNECTOR_TYPE
-
-    @requires_dependencies(["boxfs"], extras="box")
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        return super().run(**kwargs)
-
-
-@dataclass
-class BoxDownloaderConfig(FsspecDownloaderConfig):
-    pass
-
-
-@dataclass
-class BoxDownloader(FsspecDownloader):
-    protocol: str = "box"
-    connection_config: BoxConnectionConfig
-    connector_type: str = CONNECTOR_TYPE
-    download_config: Optional[BoxDownloaderConfig] = field(default_factory=BoxDownloaderConfig)
-
-    @requires_dependencies(["boxfs"], extras="box")
-    def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return super().run(file_data=file_data, **kwargs)
-
-    @requires_dependencies(["boxfs"], extras="box")
-    async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return await super().run_async(file_data=file_data, **kwargs)
-
-
-@dataclass
-class BoxUploaderConfig(FsspecUploaderConfig):
-    pass
-
-
-@dataclass
-class BoxUploader(FsspecUploader):
-    connector_type: str = CONNECTOR_TYPE
-    connection_config: BoxConnectionConfig
-    upload_config: BoxUploaderConfig = field(default=None)
-
-    @requires_dependencies(["boxfs"], extras="box")
-    def __post_init__(self):
-        super().__post_init__()
-
-    @requires_dependencies(["boxfs"], extras="box")
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-        return super().run(contents=contents, **kwargs)
-
-    @requires_dependencies(["boxfs"], extras="box")
-    async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        return await super().run_async(path=path, file_data=file_data, **kwargs)
-
-
-box_source_entry = SourceRegistryEntry(
-    indexer=BoxIndexer,
-    indexer_config=BoxIndexerConfig,
-    downloader=BoxDownloader,
-    downloader_config=BoxDownloaderConfig,
-    connection_config=BoxConnectionConfig,
-)
-
-box_destination_entry = DestinationRegistryEntry(
-    uploader=BoxUploader,
-    uploader_config=BoxUploaderConfig,
-    connection_config=BoxConnectionConfig,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/dropbox.py b/unstructured/ingest/v2/processes/connectors/fsspec/dropbox.py
deleted file mode 100644
index 96dc3ba71..000000000
--- a/unstructured/ingest/v2/processes/connectors/fsspec/dropbox.py
+++ /dev/null
@@ -1,130 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, Generator, Optional
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.v2.interfaces import DownloadResponse, FileData, UploadContent
-from unstructured.ingest.v2.processes.connector_registry import (
-    DestinationRegistryEntry,
-    SourceRegistryEntry,
-)
-from unstructured.ingest.v2.processes.connectors.fsspec.fsspec import (
-    FsspecAccessConfig,
-    FsspecConnectionConfig,
-    FsspecDownloader,
-    FsspecDownloaderConfig,
-    FsspecIndexer,
-    FsspecIndexerConfig,
-    FsspecUploader,
-    FsspecUploaderConfig,
-)
-from unstructured.ingest.v2.processes.connectors.fsspec.utils import sterilize_dict
-from unstructured.utils import requires_dependencies
-
-CONNECTOR_TYPE = "dropbox"
-
-
-@dataclass
-class DropboxIndexerConfig(FsspecIndexerConfig):
-    pass
-
-
-@dataclass
-class DropboxAccessConfig(FsspecAccessConfig):
-    token: Optional[str] = None
-
-
-@dataclass
-class DropboxConnectionConfig(FsspecConnectionConfig):
-    supported_protocols: list[str] = field(default_factory=lambda: ["dropbox"])
-    access_config: DropboxAccessConfig = enhanced_field(
-        sensitive=True, default_factory=lambda: DropboxAccessConfig()
-    )
-    connector_type: str = CONNECTOR_TYPE
-
-
-@dataclass
-class DropboxIndexer(FsspecIndexer):
-    connection_config: DropboxConnectionConfig
-    index_config: DropboxIndexerConfig
-    connector_type: str = CONNECTOR_TYPE
-
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    def __post_init__(self):
-        # dropbox expects the path to start with a /
-        if not self.index_config.path_without_protocol.startswith("/"):
-            self.index_config.path_without_protocol = "/" + self.index_config.path_without_protocol
-
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        return super().run(**kwargs)
-
-    def sterilize_info(self, path) -> dict:
-        # the fs.info method defined in the dropboxdrivefs library expects a "url"
-        # kwarg rather than "path"; though both refer to the same thing
-        info = self.fs.info(url=path)
-        return sterilize_dict(data=info)
-
-
-@dataclass
-class DropboxDownloaderConfig(FsspecDownloaderConfig):
-    pass
-
-
-@dataclass
-class DropboxDownloader(FsspecDownloader):
-    protocol: str = "dropbox"
-    connection_config: DropboxConnectionConfig
-    connector_type: str = CONNECTOR_TYPE
-    download_config: Optional[DropboxDownloaderConfig] = field(
-        default_factory=DropboxDownloaderConfig
-    )
-
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return super().run(file_data=file_data, **kwargs)
-
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return await super().run_async(file_data=file_data, **kwargs)
-
-
-@dataclass
-class DropboxUploaderConfig(FsspecUploaderConfig):
-    pass
-
-
-@dataclass
-class DropboxUploader(FsspecUploader):
-    connector_type: str = CONNECTOR_TYPE
-    connection_config: DropboxConnectionConfig
-    upload_config: DropboxUploaderConfig = field(default=None)
-
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    def __post_init__(self):
-        super().__post_init__()
-
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-        return super().run(contents=contents, **kwargs)
-
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        return await super().run_async(path=path, file_data=file_data, **kwargs)
-
-
-dropbox_source_entry = SourceRegistryEntry(
-    indexer=DropboxIndexer,
-    indexer_config=DropboxIndexerConfig,
-    downloader=DropboxDownloader,
-    downloader_config=DropboxDownloaderConfig,
-    connection_config=DropboxConnectionConfig,
-)
-
-dropbox_destination_entry = DestinationRegistryEntry(
-    uploader=DropboxUploader,
-    uploader_config=DropboxUploaderConfig,
-    connection_config=DropboxConnectionConfig,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py b/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py
deleted file mode 100644
index 2adfa99b0..000000000
--- a/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py
+++ /dev/null
@@ -1,344 +0,0 @@
-from __future__ import annotations
-
-import contextlib
-import fnmatch
-from dataclasses import dataclass, field
-from datetime import datetime
-from pathlib import Path
-from time import time
-from typing import TYPE_CHECKING, Any, Generator, Optional, TypeVar
-
-from unstructured.documents.elements import DataSourceMetadata
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-    Downloader,
-    DownloaderConfig,
-    DownloadResponse,
-    FileData,
-    Indexer,
-    IndexerConfig,
-    SourceIdentifiers,
-    UploadContent,
-    Uploader,
-    UploaderConfig,
-)
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.processes.connectors.fsspec.utils import sterilize_dict
-
-if TYPE_CHECKING:
-    from fsspec import AbstractFileSystem
-
-CONNECTOR_TYPE = "fsspec"
-
-
-class Base(object):
-    def __post_init__(self):
-        pass
-
-
-@dataclass
-class FileConfig(Base):
-    remote_url: str
-    protocol: str = field(init=False)
-    path_without_protocol: str = field(init=False)
-    supported_protocols: list[str] = field(
-        default_factory=lambda: [
-            "s3",
-            "s3a",
-            "abfs",
-            "az",
-            "gs",
-            "gcs",
-            "box",
-            "dropbox",
-            "sftp",
-        ]
-    )
-
-    def __post_init__(self):
-        super().__post_init__()
-        self.protocol, self.path_without_protocol = self.remote_url.split("://")
-        if self.protocol not in self.supported_protocols:
-            raise ValueError(
-                "Protocol {} not supported yet, only {} are supported.".format(
-                    self.protocol, ", ".join(self.supported_protocols)
-                ),
-            )
-
-
-@dataclass
-class FsspecIndexerConfig(FileConfig, IndexerConfig):
-    recursive: bool = False
-    file_glob: Optional[list[str]] = None
-
-
-@dataclass
-class FsspecAccessConfig(AccessConfig):
-    pass
-
-
-FsspecAccessConfigT = TypeVar("FsspecAccessConfigT", bound=FsspecAccessConfig)
-
-
-@dataclass
-class FsspecConnectionConfig(ConnectionConfig):
-    access_config: FsspecAccessConfigT = enhanced_field(sensitive=True, default=None)
-    connector_type: str = CONNECTOR_TYPE
-
-
-FsspecIndexerConfigT = TypeVar("FsspecIndexerConfigT", bound=FsspecIndexerConfig)
-FsspecConnectionConfigT = TypeVar("FsspecConnectionConfigT", bound=FsspecConnectionConfig)
-
-
-@dataclass
-class FsspecIndexer(Indexer):
-    connection_config: FsspecConnectionConfigT
-    index_config: FsspecIndexerConfigT
-    connector_type: str = CONNECTOR_TYPE
-
-    @property
-    def fs(self) -> "AbstractFileSystem":
-        from fsspec import get_filesystem_class
-
-        return get_filesystem_class(self.index_config.protocol)(
-            **self.connection_config.get_access_config(),
-        )
-
-    def does_path_match_glob(self, path: str) -> bool:
-        if self.index_config.file_glob is None:
-            return True
-        patterns = self.index_config.file_glob
-        for pattern in patterns:
-            if fnmatch.filter([path], pattern):
-                return True
-        logger.debug(f"The file {path!r} is discarded as it does not match any given glob.")
-        return False
-
-    def check_connection(self):
-        from fsspec import get_filesystem_class
-
-        try:
-            fs = get_filesystem_class(self.index_config.protocol)(
-                **self.connection_config.get_access_config(),
-            )
-            fs.ls(path=self.index_config.path_without_protocol, detail=False)
-        except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
-
-    def list_files(self) -> list[str]:
-        if not self.index_config.recursive:
-            # fs.ls does not walk directories
-            # directories that are listed in cloud storage can cause problems
-            # because they are seen as 0 byte files
-            found = self.fs.ls(self.index_config.path_without_protocol, detail=True)
-            if isinstance(found, list):
-                return [
-                    x.get("name") for x in found if x.get("size") > 0 and x.get("type") == "file"
-                ]
-            else:
-                raise TypeError(f"unhandled response type from ls: {type(found)}")
-        else:
-            # fs.find will recursively walk directories
-            # "size" is a common key for all the cloud protocols with fs
-            found = self.fs.find(
-                self.index_config.path_without_protocol,
-                detail=True,
-            )
-            if isinstance(found, dict):
-                return [
-                    k for k, v in found.items() if v.get("size") > 0 and v.get("type") == "file"
-                ]
-            else:
-                raise TypeError(f"unhandled response type from find: {type(found)}")
-
-    def get_metadata(self, path: str) -> DataSourceMetadata:
-        date_created = None
-        date_modified = None
-
-        try:
-            created: Optional[Any] = self.fs.created(path)
-            if created:
-                if isinstance(created, datetime):
-                    date_created = str(created.timestamp())
-                else:
-                    date_created = str(created)
-        except NotImplementedError:
-            pass
-
-        try:
-            modified: Optional[Any] = self.fs.modified(path)
-            if modified:
-                if isinstance(modified, datetime):
-                    date_modified = str(modified.timestamp())
-                else:
-                    date_modified = str(modified)
-        except NotImplementedError:
-            pass
-
-        version = self.fs.checksum(path)
-        metadata: dict[str, str] = {}
-        with contextlib.suppress(AttributeError):
-            metadata = self.fs.metadata(path)
-        record_locator = {
-            "protocol": self.index_config.protocol,
-            "remote_file_path": self.index_config.remote_url,
-        }
-        file_stat = self.fs.stat(path=path)
-        if file_id := file_stat.get("id"):
-            record_locator["file_id"] = file_id
-        if metadata:
-            record_locator["metadata"] = metadata
-        return DataSourceMetadata(
-            date_created=date_created,
-            date_modified=date_modified,
-            date_processed=str(time()),
-            version=str(version),
-            url=f"{self.index_config.protocol}://{path}",
-            record_locator=record_locator,
-        )
-
-    def sterilize_info(self, path) -> dict:
-        info = self.fs.info(path=path)
-        return sterilize_dict(data=info)
-
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        raw_files = self.list_files()
-        files = [f for f in raw_files if self.does_path_match_glob(f)]
-        for file in files:
-            # Note: we remove any remaining leading slashes (Box introduces these)
-            # to get a valid relative path
-            rel_path = file.replace(self.index_config.path_without_protocol, "").lstrip("/")
-            yield FileData(
-                identifier=file,
-                connector_type=self.connector_type,
-                source_identifiers=SourceIdentifiers(
-                    filename=Path(file).name,
-                    rel_path=rel_path or None,
-                    fullpath=file,
-                ),
-                metadata=self.get_metadata(path=file),
-                additional_metadata=self.sterilize_info(path=file),
-            )
-
-
-@dataclass
-class FsspecDownloaderConfig(DownloaderConfig):
-    pass
-
-
-FsspecDownloaderConfigT = TypeVar("FsspecDownloaderConfigT", bound=FsspecDownloaderConfig)
-
-
-@dataclass
-class FsspecDownloader(Downloader):
-    protocol: str
-    connection_config: FsspecConnectionConfigT
-    connector_type: str = CONNECTOR_TYPE
-    download_config: Optional[FsspecDownloaderConfigT] = field(
-        default_factory=lambda: FsspecDownloaderConfig()
-    )
-
-    def is_async(self) -> bool:
-        return self.fs.async_impl
-
-    @property
-    def fs(self) -> "AbstractFileSystem":
-        from fsspec import get_filesystem_class
-
-        return get_filesystem_class(self.protocol)(
-            **self.connection_config.get_access_config(),
-        )
-
-    def get_download_path(self, file_data: FileData) -> Path:
-        return (
-            self.download_dir / Path(file_data.source_identifiers.relative_path)
-            if self.download_config
-            else Path(file_data.source_identifiers.rel_path)
-        )
-
-    def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        download_path = self.get_download_path(file_data=file_data)
-        download_path.parent.mkdir(parents=True, exist_ok=True)
-        try:
-            self.fs.get(rpath=file_data.identifier, lpath=download_path.as_posix())
-        except Exception as e:
-            logger.error(f"failed to download file {file_data.identifier}: {e}", exc_info=True)
-            raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}")
-        return self.generate_download_response(file_data=file_data, download_path=download_path)
-
-    async def async_run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        download_path = self.get_download_path(file_data=file_data)
-        download_path.parent.mkdir(parents=True, exist_ok=True)
-        try:
-            await self.fs.get(rpath=file_data.identifier, lpath=download_path.as_posix())
-        except Exception as e:
-            logger.error(f"failed to download file {file_data.identifier}: {e}", exc_info=True)
-            raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}")
-        return self.generate_download_response(file_data=file_data, download_path=download_path)
-
-
-@dataclass
-class FsspecUploaderConfig(FileConfig, UploaderConfig):
-    overwrite: bool = False
-
-
-FsspecUploaderConfigT = TypeVar("FsspecUploaderConfigT", bound=FsspecUploaderConfig)
-
-
-@dataclass
-class FsspecUploader(Uploader):
-    connector_type: str = CONNECTOR_TYPE
-    upload_config: FsspecUploaderConfigT = field(default=None)
-
-    @property
-    def fs(self) -> "AbstractFileSystem":
-        from fsspec import get_filesystem_class
-
-        fs_kwargs = self.connection_config.get_access_config() if self.connection_config else {}
-        return get_filesystem_class(self.upload_config.protocol)(
-            **fs_kwargs,
-        )
-
-    def __post_init__(self):
-        # TODO once python3.9 no longer supported and kw_only is allowed in dataclasses, remove:
-        if not self.upload_config:
-            raise TypeError(
-                f"{self.__class__.__name__}.__init__() "
-                f"missing 1 required positional argument: 'upload_config'"
-            )
-
-    def get_upload_path(self, file_data: FileData) -> Path:
-        upload_path = (
-            Path(self.upload_config.path_without_protocol)
-            / file_data.source_identifiers.relative_path
-        )
-        updated_upload_path = upload_path.parent / f"{upload_path.name}.json"
-        return updated_upload_path
-
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-        for content in contents:
-            self._run(path=content.path, file_data=content.file_data)
-
-    def _run(self, path: Path, file_data: FileData) -> None:
-        path_str = str(path.resolve())
-        upload_path = self.get_upload_path(file_data=file_data)
-        if self.fs.exists(path=str(upload_path)) and not self.upload_config.overwrite:
-            logger.debug(f"Skipping upload of {path} to {upload_path}, file already exists")
-            return
-        logger.debug(f"Writing local file {path_str} to {upload_path}")
-        self.fs.upload(lpath=path_str, rpath=str(upload_path))
-
-    async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        upload_path = self.get_upload_path(file_data=file_data)
-        path_str = str(path.resolve())
-        # Odd that fsspec doesn't run exists() as async even when client support async
-        already_exists = self.fs.exists(path=str(upload_path))
-        if already_exists and not self.upload_config.overwrite:
-            logger.debug(f"Skipping upload of {path} to {upload_path}, file already exists")
-            return
-        logger.debug(f"Writing local file {path_str} to {upload_path}")
-        self.fs.upload(lpath=path_str, rpath=str(upload_path))
diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/gcs.py b/unstructured/ingest/v2/processes/connectors/fsspec/gcs.py
deleted file mode 100644
index 2c51f1c12..000000000
--- a/unstructured/ingest/v2/processes/connectors/fsspec/gcs.py
+++ /dev/null
@@ -1,141 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, Generator, Optional, Union
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.utils.string_and_date_utils import json_to_dict
-from unstructured.ingest.v2.interfaces import DownloadResponse, FileData, UploadContent
-from unstructured.ingest.v2.processes.connector_registry import (
-    DestinationRegistryEntry,
-    SourceRegistryEntry,
-)
-from unstructured.ingest.v2.processes.connectors.fsspec.fsspec import (
-    FsspecAccessConfig,
-    FsspecConnectionConfig,
-    FsspecDownloader,
-    FsspecDownloaderConfig,
-    FsspecIndexer,
-    FsspecIndexerConfig,
-    FsspecUploader,
-    FsspecUploaderConfig,
-)
-from unstructured.utils import requires_dependencies
-
-CONNECTOR_TYPE = "gcs"
-
-
-@dataclass
-class GcsIndexerConfig(FsspecIndexerConfig):
-    pass
-
-
-@dataclass
-class GcsAccessConfig(FsspecAccessConfig):
-    service_account_key: Optional[str] = None
-    token: Union[str, dict, None] = field(init=False, default=None)
-
-    def __post_init__(self):
-        ALLOWED_AUTH_VALUES = "google_default", "cache", "anon", "browser", "cloud"
-
-        # Case: null value
-        if not self.service_account_key:
-            return
-
-        # Case: one of auth constants
-        if self.service_account_key in ALLOWED_AUTH_VALUES:
-            self.token = self.service_account_key
-            return
-
-        # Case: token as json
-        if isinstance(json_to_dict(self.service_account_key), dict):
-            self.token = json_to_dict(self.service_account_key)
-            return
-
-        # Case: path to token
-        if Path(self.service_account_key).is_file():
-            self.token = self.service_account_key
-            return
-
-        raise ValueError("Invalid auth token value")
-
-
-@dataclass
-class GcsConnectionConfig(FsspecConnectionConfig):
-    supported_protocols: list[str] = field(default_factory=lambda: ["gs", "gcs"])
-    access_config: GcsAccessConfig = enhanced_field(
-        sensitive=True, default_factory=lambda: GcsAccessConfig()
-    )
-    connector_type: str = CONNECTOR_TYPE
-
-
-@dataclass
-class GcsIndexer(FsspecIndexer):
-    connection_config: GcsConnectionConfig
-    index_config: GcsIndexerConfig
-    connector_type: str = CONNECTOR_TYPE
-
-    @requires_dependencies(["gcsfs", "fsspec"], extras="gcs")
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        return super().run(**kwargs)
-
-
-@dataclass
-class GcsDownloaderConfig(FsspecDownloaderConfig):
-    pass
-
-
-@dataclass
-class GcsDownloader(FsspecDownloader):
-    protocol: str = "gcs"
-    connection_config: GcsConnectionConfig
-    connector_type: str = CONNECTOR_TYPE
-    download_config: Optional[GcsDownloaderConfig] = field(default_factory=GcsDownloaderConfig)
-
-    @requires_dependencies(["gcsfs", "fsspec"], extras="gcs")
-    def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return super().run(file_data=file_data, **kwargs)
-
-    @requires_dependencies(["gcsfs", "fsspec"], extras="gcs")
-    async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return await super().run_async(file_data=file_data, **kwargs)
-
-
-@dataclass
-class GcsUploaderConfig(FsspecUploaderConfig):
-    pass
-
-
-@dataclass
-class GcsUploader(FsspecUploader):
-    connector_type: str = CONNECTOR_TYPE
-    connection_config: GcsConnectionConfig
-    upload_config: GcsUploaderConfig = field(default=None)
-
-    @requires_dependencies(["gcsfs", "fsspec"], extras="gcs")
-    def __post_init__(self):
-        super().__post_init__()
-
-    @requires_dependencies(["gcsfs", "fsspec"], extras="gcs")
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-        return super().run(contents=contents, **kwargs)
-
-    @requires_dependencies(["gcsfs", "fsspec"], extras="gcs")
-    async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        return await super().run_async(path=path, file_data=file_data, **kwargs)
-
-
-gcs_source_entry = SourceRegistryEntry(
-    indexer=GcsIndexer,
-    indexer_config=GcsIndexerConfig,
-    downloader=GcsDownloader,
-    downloader_config=GcsDownloaderConfig,
-    connection_config=GcsConnectionConfig,
-)
-
-gcs_destination_entry = DestinationRegistryEntry(
-    uploader=GcsUploader,
-    uploader_config=GcsUploaderConfig,
-    connection_config=GcsConnectionConfig,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/s3.py b/unstructured/ingest/v2/processes/connectors/fsspec/s3.py
deleted file mode 100644
index 7f48bdc81..000000000
--- a/unstructured/ingest/v2/processes/connectors/fsspec/s3.py
+++ /dev/null
@@ -1,163 +0,0 @@
-import contextlib
-from dataclasses import dataclass, field
-from datetime import datetime
-from pathlib import Path
-from time import time
-from typing import Any, Generator, Optional
-
-from unstructured.documents.elements import DataSourceMetadata
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.v2.interfaces import DownloadResponse, FileData, UploadContent
-from unstructured.ingest.v2.processes.connector_registry import (
-    DestinationRegistryEntry,
-    SourceRegistryEntry,
-)
-from unstructured.ingest.v2.processes.connectors.fsspec.fsspec import (
-    FsspecAccessConfig,
-    FsspecConnectionConfig,
-    FsspecDownloader,
-    FsspecDownloaderConfig,
-    FsspecIndexer,
-    FsspecIndexerConfig,
-    FsspecUploader,
-    FsspecUploaderConfig,
-)
-from unstructured.utils import requires_dependencies
-
-CONNECTOR_TYPE = "s3"
-
-
-@dataclass
-class S3IndexerConfig(FsspecIndexerConfig):
-    pass
-
-
-@dataclass
-class S3AccessConfig(FsspecAccessConfig):
-    key: Optional[str] = None
-    secret: Optional[str] = None
-    token: Optional[str] = None
-
-
-@dataclass
-class S3ConnectionConfig(FsspecConnectionConfig):
-    supported_protocols: list[str] = field(default_factory=lambda: ["s3", "s3a"])
-    access_config: S3AccessConfig = enhanced_field(
-        sensitive=True, default_factory=lambda: S3AccessConfig()
-    )
-    endpoint_url: Optional[str] = None
-    anonymous: bool = False
-    connector_type: str = CONNECTOR_TYPE
-
-    def get_access_config(self) -> dict[str, Any]:
-        access_configs: dict[str, Any] = {"anon": self.anonymous}
-        if self.endpoint_url:
-            access_configs["endpoint_url"] = self.endpoint_url
-
-        # Avoid injecting None by filtering out k,v pairs where the value is None
-        access_configs.update({k: v for k, v in self.access_config.to_dict().items() if v})
-        return access_configs
-
-
-@dataclass
-class S3Indexer(FsspecIndexer):
-    connection_config: S3ConnectionConfig
-    index_config: S3IndexerConfig
-    connector_type: str = CONNECTOR_TYPE
-
-    def get_metadata(self, path: str) -> DataSourceMetadata:
-        date_created = None
-        date_modified = None
-        try:
-            modified: Optional[datetime] = self.fs.modified(path)
-            if modified:
-                date_created = str(modified.timestamp())
-                date_modified = str(modified.timestamp())
-        except NotImplementedError:
-            pass
-
-        version = None
-        info: dict[str, Any] = self.fs.info(path)
-        if etag := info.get("ETag"):
-            version = str(etag).rstrip('"').lstrip('"')
-        metadata: dict[str, str] = {}
-        with contextlib.suppress(AttributeError):
-            metadata = self.fs.metadata(path)
-        record_locator = {
-            "protocol": self.index_config.protocol,
-            "remote_file_path": self.index_config.remote_url,
-        }
-        if metadata:
-            record_locator["metadata"] = metadata
-        return DataSourceMetadata(
-            date_created=date_created,
-            date_modified=date_modified,
-            date_processed=str(time()),
-            version=version,
-            url=f"{self.index_config.protocol}://{path}",
-            record_locator=record_locator,
-        )
-
-    @requires_dependencies(["s3fs", "fsspec"], extras="s3")
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        return super().run(**kwargs)
-
-
-@dataclass
-class S3DownloaderConfig(FsspecDownloaderConfig):
-    pass
-
-
-@dataclass
-class S3Downloader(FsspecDownloader):
-    protocol: str = "s3"
-    connection_config: S3ConnectionConfig
-    connector_type: str = CONNECTOR_TYPE
-    download_config: Optional[S3DownloaderConfig] = field(default_factory=S3DownloaderConfig)
-
-    @requires_dependencies(["s3fs", "fsspec"], extras="s3")
-    def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return super().run(file_data=file_data, **kwargs)
-
-    @requires_dependencies(["s3fs", "fsspec"], extras="s3")
-    async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return await super().run_async(file_data=file_data, **kwargs)
-
-
-@dataclass
-class S3UploaderConfig(FsspecUploaderConfig):
-    pass
-
-
-@dataclass
-class S3Uploader(FsspecUploader):
-    connector_type: str = CONNECTOR_TYPE
-    connection_config: S3ConnectionConfig
-    upload_config: S3UploaderConfig = field(default=None)
-
-    @requires_dependencies(["s3fs", "fsspec"], extras="s3")
-    def __post_init__(self):
-        super().__post_init__()
-
-    @requires_dependencies(["s3fs", "fsspec"], extras="s3")
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-        return super().run(contents=contents, **kwargs)
-
-    @requires_dependencies(["s3fs", "fsspec"], extras="s3")
-    async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        return await super().run_async(path=path, file_data=file_data, **kwargs)
-
-
-s3_source_entry = SourceRegistryEntry(
-    indexer=S3Indexer,
-    indexer_config=S3IndexerConfig,
-    downloader=S3Downloader,
-    downloader_config=S3DownloaderConfig,
-    connection_config=S3ConnectionConfig,
-)
-
-s3_destination_entry = DestinationRegistryEntry(
-    uploader=S3Uploader,
-    uploader_config=S3UploaderConfig,
-    connection_config=S3ConnectionConfig,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/sftp.py b/unstructured/ingest/v2/processes/connectors/fsspec/sftp.py
deleted file mode 100644
index d73a22195..000000000
--- a/unstructured/ingest/v2/processes/connectors/fsspec/sftp.py
+++ /dev/null
@@ -1,166 +0,0 @@
-from __future__ import annotations
-
-import os
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, Generator, Optional
-from urllib.parse import urlparse
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.v2.interfaces import DownloadResponse, FileData, UploadContent
-from unstructured.ingest.v2.processes.connector_registry import (
-    DestinationRegistryEntry,
-    SourceRegistryEntry,
-)
-from unstructured.ingest.v2.processes.connectors.fsspec.fsspec import (
-    FsspecAccessConfig,
-    FsspecConnectionConfig,
-    FsspecDownloader,
-    FsspecDownloaderConfig,
-    FsspecIndexer,
-    FsspecIndexerConfig,
-    FsspecUploader,
-    FsspecUploaderConfig,
-)
-from unstructured.utils import requires_dependencies
-
-CONNECTOR_TYPE = "sftp"
-
-
-@dataclass
-class SftpIndexerConfig(FsspecIndexerConfig):
-    def __post_init__(self):
-        super().__post_init__()
-        _, ext = os.path.splitext(self.remote_url)
-        parsed_url = urlparse(self.remote_url)
-        if ext:
-            self.path_without_protocol = Path(parsed_url.path).parent.as_posix().lstrip("/")
-        else:
-            self.path_without_protocol = parsed_url.path.lstrip("/")
-
-
-@dataclass
-class SftpAccessConfig(FsspecAccessConfig):
-    password: str
-
-
-@dataclass
-class SftpConnectionConfig(FsspecConnectionConfig):
-    supported_protocols: list[str] = field(default_factory=lambda: ["sftp"])
-    access_config: SftpAccessConfig = enhanced_field(sensitive=True)
-    connector_type: str = CONNECTOR_TYPE
-    username: Optional[str] = None
-    host: Optional[str] = None
-    port: int = 22
-    look_for_keys: bool = False
-    allow_agent: bool = False
-
-    def get_access_config(self) -> dict[str, Any]:
-        access_config = {
-            "username": self.username,
-            "host": self.host,
-            "port": self.port,
-            "look_for_keys": self.look_for_keys,
-            "allow_agent": self.allow_agent,
-            "password": self.access_config.password,
-        }
-        return access_config
-
-
-@dataclass
-class SftpIndexer(FsspecIndexer):
-    connection_config: SftpConnectionConfig
-    index_config: SftpIndexerConfig
-    connector_type: str = CONNECTOR_TYPE
-
-    @requires_dependencies(["paramiko", "fsspec"], extras="sftp")
-    def __post_init__(self):
-        parsed_url = urlparse(self.index_config.remote_url)
-        self.connection_config.host = parsed_url.hostname or self.connection_config.host
-        self.connection_config.port = parsed_url.port or self.connection_config.port
-
-    @requires_dependencies(["paramiko", "fsspec"], extras="sftp")
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        for file in super().run(**kwargs):
-            new_identifier = (
-                f"sftp://"
-                f"{self.connection_config.host}:"
-                f"{self.connection_config.port}/"
-                f"{file.identifier}"
-            )
-            file.identifier = new_identifier
-            yield file
-
-
-@dataclass
-class SftpDownloaderConfig(FsspecDownloaderConfig):
-    remote_url: Optional[str] = None
-
-    def __post_init__(self):
-        # TODO once python3.9 no longer supported and kw_only is allowed in dataclasses, remove:
-        if not self.remote_url:
-            raise TypeError(
-                f"{self.__class__.__name__}.__init__() "
-                f"missing 1 required positional argument: 'remote_url'"
-            )
-
-
-@dataclass
-class SftpDownloader(FsspecDownloader):
-    protocol: str = "sftp"
-    connection_config: SftpConnectionConfig
-    connector_type: str = CONNECTOR_TYPE
-    download_config: Optional[SftpDownloaderConfig] = field(default_factory=SftpDownloaderConfig)
-
-    @requires_dependencies(["paramiko", "fsspec"], extras="sftp")
-    def __post_init__(self):
-        parsed_url = urlparse(self.download_config.remote_url)
-        self.connection_config.host = parsed_url.hostname or self.connection_config.host
-        self.connection_config.port = parsed_url.port or self.connection_config.port
-
-    @requires_dependencies(["paramiko", "fsspec"], extras="sftp")
-    def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return super().run(file_data=file_data, **kwargs)
-
-    @requires_dependencies(["paramiko", "fsspec"], extras="sftp")
-    async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return await super().run_async(file_data=file_data, **kwargs)
-
-
-@dataclass
-class SftpUploaderConfig(FsspecUploaderConfig):
-    pass
-
-
-@dataclass
-class SftpUploader(FsspecUploader):
-    connector_type: str = CONNECTOR_TYPE
-    connection_config: SftpConnectionConfig
-    upload_config: SftpUploaderConfig = field(default=None)
-
-    @requires_dependencies(["paramiko", "fsspec"], extras="sftp")
-    def __post_init__(self):
-        super().__post_init__()
-
-    @requires_dependencies(["paramiko", "fsspec"], extras="sftp")
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-        return super().run(contents=contents, **kwargs)
-
-    @requires_dependencies(["paramiko", "fsspec"], extras="sftp")
-    async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        return await super().run_async(path=path, file_data=file_data, **kwargs)
-
-
-sftp_source_entry = SourceRegistryEntry(
-    indexer=SftpIndexer,
-    indexer_config=SftpIndexerConfig,
-    downloader=SftpDownloader,
-    downloader_config=SftpDownloaderConfig,
-    connection_config=SftpConnectionConfig,
-)
-
-sftp_destination_entry = DestinationRegistryEntry(
-    uploader=SftpUploader,
-    uploader_config=SftpUploaderConfig,
-    connection_config=SftpConnectionConfig,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/utils.py b/unstructured/ingest/v2/processes/connectors/fsspec/utils.py
deleted file mode 100644
index e852e21dd..000000000
--- a/unstructured/ingest/v2/processes/connectors/fsspec/utils.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import json
-from datetime import datetime
-from pathlib import Path
-from typing import Callable
-
-
-def json_serial(obj):
-    if isinstance(obj, Path):
-        return obj.as_posix()
-    if isinstance(obj, datetime):
-        return obj.isoformat()
-    raise TypeError("Type %s not serializable" % type(obj))
-
-
-def sterilize_dict(data: dict, default: Callable = json_serial) -> dict:
-    data_s = json.dumps(data, default=default)
-    return json.loads(data_s)
diff --git a/unstructured/ingest/v2/processes/connectors/google_drive.py b/unstructured/ingest/v2/processes/connectors/google_drive.py
deleted file mode 100644
index 8d61671cf..000000000
--- a/unstructured/ingest/v2/processes/connectors/google_drive.py
+++ /dev/null
@@ -1,335 +0,0 @@
-import io
-import os
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Generator, Optional, Union
-
-from dateutil import parser
-
-from unstructured.documents.elements import DataSourceMetadata
-from unstructured.file_utils.google_filetype import GOOGLE_DRIVE_EXPORT_TYPES
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionNetworkError
-from unstructured.ingest.utils.string_and_date_utils import json_to_dict
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-    Downloader,
-    DownloaderConfig,
-    FileData,
-    Indexer,
-    IndexerConfig,
-    SourceIdentifiers,
-    download_responses,
-)
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.processes.connector_registry import (
-    SourceRegistryEntry,
-)
-from unstructured.utils import requires_dependencies
-
-CONNECTOR_TYPE = "google_drive"
-
-if TYPE_CHECKING:
-    from googleapiclient.discovery import Resource as GoogleAPIResource
-    from googleapiclient.http import MediaIoBaseDownload
-
-
-@dataclass
-class GoogleDriveAccessConfig(AccessConfig):
-    service_account_key: Union[str, dict]
-
-
-@dataclass
-class GoogleDriveConnectionConfig(ConnectionConfig):
-    drive_id: str
-    access_config: GoogleDriveAccessConfig = enhanced_field(sensitive=True)
-
-    @requires_dependencies(["googleapiclient"], extras="google-drive")
-    def get_files_service(self) -> "GoogleAPIResource":
-        from google.auth import default, exceptions
-        from google.oauth2 import service_account
-        from googleapiclient.discovery import build
-        from googleapiclient.errors import HttpError
-
-        # Service account key can be a dict or a file path(str)
-        # But the dict may come in as a string
-        if isinstance(self.access_config.service_account_key, str):
-            key_path = json_to_dict(self.access_config.service_account_key)
-        elif isinstance(self.access_config.service_account_key, dict):
-            key_path = self.access_config.service_account_key
-        else:
-            raise TypeError(
-                f"access_config.service_account_key must be "
-                f"str or dict, got: {type(self.access_config.service_account_key)}"
-            )
-
-        try:
-            if isinstance(key_path, dict):
-                creds = service_account.Credentials.from_service_account_info(key_path)
-            elif isinstance(key_path, str):
-                os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
-                creds, _ = default()
-            else:
-                raise ValueError(
-                    f"key path not recognized as a dictionary or a file path: "
-                    f"[{type(key_path)}] {key_path}",
-                )
-            service = build("drive", "v3", credentials=creds)
-            return service.files()
-
-        except HttpError as exc:
-            raise ValueError(f"{exc.reason}")
-        except exceptions.DefaultCredentialsError:
-            raise ValueError("The provided API key is invalid.")
-
-
-@dataclass
-class GoogleDriveIndexerConfig(IndexerConfig):
-    extensions: Optional[list[str]] = None
-    recursive: bool = False
-
-    def __post_init__(self):
-        # Strip leading period of extension
-        if self.extensions is not None:
-            self.extensions = [e[1:] if e.startswith(".") else e for e in self.extensions]
-
-
-@dataclass
-class GoogleDriveIndexer(Indexer):
-    connection_config: GoogleDriveConnectionConfig
-    index_config: GoogleDriveIndexerConfig
-    fields: list[str] = field(
-        default_factory=lambda: [
-            "id",
-            "name",
-            "mimeType",
-            "fileExtension",
-            "md5Checksum",
-            "sha1Checksum",
-            "sha256Checksum",
-            "headRevisionId",
-            "permissions",
-            "createdTime",
-            "modifiedTime",
-            "version",
-            "originalFilename",
-            "capabilities",
-            "permissionIds",
-            "webViewLink",
-            "webContentLink",
-        ]
-    )
-
-    @staticmethod
-    def is_dir(record: dict) -> bool:
-        return record.get("mimeType") == "application/vnd.google-apps.folder"
-
-    @staticmethod
-    def map_file_data(f: dict) -> FileData:
-        file_id = f["id"]
-        filename = f.pop("name")
-        url = f.pop("webContentLink", None)
-        version = f.pop("version", None)
-        permissions = f.pop("permissions", None)
-        date_created_str = f.pop("createdTime", None)
-        date_created_dt = parser.parse(date_created_str) if date_created_str else None
-        date_modified_str = f.pop("modifiedTime", None)
-        parent_path = f.pop("parent_path", None)
-        parent_root_path = f.pop("parent_root_path", None)
-        date_modified_dt = parser.parse(date_modified_str) if date_modified_str else None
-        if (
-            parent_path
-            and isinstance(parent_path, str)
-            and parent_root_path
-            and isinstance(parent_root_path, str)
-        ):
-            fullpath = f"{parent_path}/{filename}"
-            rel_path = fullpath.replace(parent_root_path, "")
-            source_identifiers = SourceIdentifiers(
-                filename=filename, fullpath=fullpath, rel_path=rel_path
-            )
-        else:
-            source_identifiers = SourceIdentifiers(fullpath=filename, filename=filename)
-        return FileData(
-            connector_type=CONNECTOR_TYPE,
-            identifier=file_id,
-            source_identifiers=source_identifiers,
-            metadata=DataSourceMetadata(
-                url=url,
-                version=version,
-                date_created=str(date_created_dt.timestamp()),
-                date_modified=str(date_modified_dt.timestamp()),
-                permissions_data=permissions,
-                record_locator={
-                    "file_id": file_id,
-                },
-            ),
-            additional_metadata=f,
-        )
-
-    def get_paginated_results(
-        self,
-        files_client,
-        object_id: str,
-        extensions: Optional[list[str]] = None,
-        recursive: bool = False,
-        previous_path: Optional[str] = None,
-    ) -> list[dict]:
-
-        fields_input = "nextPageToken, files({})".format(",".join(self.fields))
-        q = f"'{object_id}' in parents"
-        # Filter by extension but still include any directories
-        if extensions:
-            ext_filter = " or ".join([f"fileExtension = '{e}'" for e in extensions])
-            q = f"{q} and ({ext_filter} or mimeType = 'application/vnd.google-apps.folder')"
-        logger.debug(f"Query used when indexing: {q}")
-        logger.debug("response fields limited to: {}".format(", ".join(self.fields)))
-        done = False
-        page_token = None
-        files_response = []
-        while not done:
-            response: dict = files_client.list(
-                spaces="drive",
-                fields=fields_input,
-                corpora="user",
-                pageToken=page_token,
-                q=q,
-            ).execute()
-            if files := response.get("files", []):
-                fs = [f for f in files if not self.is_dir(record=f)]
-                for r in fs:
-                    r["parent_path"] = previous_path
-                dirs = [f for f in files if self.is_dir(record=f)]
-                files_response.extend(fs)
-                if recursive:
-                    for d in dirs:
-                        dir_id = d["id"]
-                        dir_name = d["name"]
-                        files_response.extend(
-                            self.get_paginated_results(
-                                files_client=files_client,
-                                object_id=dir_id,
-                                extensions=extensions,
-                                recursive=recursive,
-                                previous_path=f"{previous_path}/{dir_name}",
-                            )
-                        )
-            page_token = response.get("nextPageToken")
-            if page_token is None:
-                done = True
-        for r in files_response:
-            r["parent_root_path"] = previous_path
-        return files_response
-
-    def get_root_info(self, files_client, object_id: str) -> dict:
-        return files_client.get(fileId=object_id, fields=",".join(self.fields)).execute()
-
-    def get_files(
-        self,
-        files_client,
-        object_id: str,
-        recursive: bool = False,
-        extensions: Optional[list[str]] = None,
-    ) -> list[FileData]:
-        root_info = self.get_root_info(files_client=files_client, object_id=object_id)
-        if not self.is_dir(root_info):
-            data = [self.map_file_data(root_info)]
-        else:
-
-            file_contents = self.get_paginated_results(
-                files_client=files_client,
-                object_id=object_id,
-                extensions=extensions,
-                recursive=recursive,
-                previous_path=root_info["name"],
-            )
-            data = [self.map_file_data(f=f) for f in file_contents]
-        for d in data:
-            d.metadata.record_locator["drive_id"]: object_id
-        return data
-
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        for f in self.get_files(
-            files_client=self.connection_config.get_files_service(),
-            object_id=self.connection_config.drive_id,
-            recursive=self.index_config.recursive,
-            extensions=self.index_config.extensions,
-        ):
-            yield f
-
-
-@dataclass
-class GoogleDriveDownloaderConfig(DownloaderConfig):
-    pass
-
-
-@dataclass
-class GoogleDriveDownloader(Downloader):
-    connection_config: GoogleDriveConnectionConfig
-    download_config: GoogleDriveDownloaderConfig = field(
-        default_factory=lambda: GoogleDriveDownloaderConfig()
-    )
-    connector_type: str = CONNECTOR_TYPE
-
-    def get_download_path(self, file_data: FileData) -> Path:
-        rel_path = file_data.source_identifiers.relative_path
-        rel_path = rel_path[1:] if rel_path.startswith("/") else rel_path
-        return self.download_dir / Path(rel_path)
-
-    @SourceConnectionNetworkError.wrap
-    def _get_content(self, downloader: "MediaIoBaseDownload") -> bool:
-        downloaded = False
-        while downloaded is False:
-            _, downloaded = downloader.next_chunk()
-        return downloaded
-
-    def _write_file(self, file_data: FileData, file_contents: io.BytesIO):
-        download_path = self.get_download_path(file_data=file_data)
-        download_path.parent.mkdir(parents=True, exist_ok=True)
-        logger.debug(f"writing {file_data.source_identifiers.fullpath} to {download_path}")
-        with open(download_path, "wb") as handler:
-            handler.write(file_contents.getbuffer())
-        return self.generate_download_response(file_data=file_data, download_path=download_path)
-
-    @requires_dependencies(["googleapiclient"], extras="google-drive")
-    def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
-        from googleapiclient.http import MediaIoBaseDownload
-
-        logger.debug(f"fetching file: {file_data.source_identifiers.fullpath}")
-        mime_type = file_data.additional_metadata["mimeType"]
-        record_id = file_data.identifier
-        files_client = self.connection_config.get_files_service()
-        if mime_type.startswith("application/vnd.google-apps"):
-            export_mime = GOOGLE_DRIVE_EXPORT_TYPES.get(
-                self.meta.get("mimeType"),  # type: ignore
-            )
-            if not export_mime:
-                raise TypeError(
-                    f"File not supported. Name: {file_data.source_identifiers.filename} "
-                    f"ID: {record_id} "
-                    f"MimeType: {mime_type}"
-                )
-
-            request = files_client.export_media(
-                fileId=record_id,
-                mimeType=export_mime,
-            )
-        else:
-            request = files_client.get_media(fileId=record_id)
-
-        file_contents = io.BytesIO()
-        downloader = MediaIoBaseDownload(file_contents, request)
-        downloaded = self._get_content(downloader=downloader)
-        if not downloaded or not file_contents:
-            return []
-        return self._write_file(file_data=file_data, file_contents=file_contents)
-
-
-google_drive_source_entry = SourceRegistryEntry(
-    connection_config=GoogleDriveConnectionConfig,
-    indexer_config=GoogleDriveIndexerConfig,
-    indexer=GoogleDriveIndexer,
-    downloader_config=GoogleDriveDownloaderConfig,
-    downloader=GoogleDriveDownloader,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/local.py b/unstructured/ingest/v2/processes/connectors/local.py
deleted file mode 100644
index 811606d79..000000000
--- a/unstructured/ingest/v2/processes/connectors/local.py
+++ /dev/null
@@ -1,203 +0,0 @@
-import glob
-import itertools
-import shutil
-from dataclasses import dataclass, field
-from pathlib import Path
-from time import time
-from typing import Any, Generator, Optional
-
-from unstructured.documents.elements import DataSourceMetadata
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-    Downloader,
-    DownloaderConfig,
-    DownloadResponse,
-    FileData,
-    Indexer,
-    IndexerConfig,
-    SourceIdentifiers,
-    UploadContent,
-    Uploader,
-    UploaderConfig,
-)
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.processes.connector_registry import (
-    DestinationRegistryEntry,
-    SourceRegistryEntry,
-)
-
-CONNECTOR_TYPE = "local"
-
-
-@dataclass
-class LocalAccessConfig(AccessConfig):
-    pass
-
-
-@dataclass
-class LocalConnectionConfig(ConnectionConfig):
-    access_config: LocalAccessConfig = field(default_factory=lambda: LocalAccessConfig())
-
-
-@dataclass
-class LocalIndexerConfig(IndexerConfig):
-    input_path: str
-    recursive: bool = False
-    file_glob: Optional[list[str]] = None
-
-    @property
-    def path(self) -> Path:
-        return Path(self.input_path).resolve()
-
-
-@dataclass
-class LocalIndexer(Indexer):
-    index_config: LocalIndexerConfig
-    connection_config: LocalConnectionConfig = field(
-        default_factory=lambda: LocalConnectionConfig()
-    )
-    connector_type: str = CONNECTOR_TYPE
-
-    def list_files(self) -> list[Path]:
-        input_path = self.index_config.path
-        if input_path.is_file():
-            return [Path(s) for s in glob.glob(f"{self.index_config.path}")]
-        glob_fn = input_path.rglob if self.index_config.recursive else input_path.glob
-        if not self.index_config.file_glob:
-            return list(glob_fn("*"))
-        return list(
-            itertools.chain.from_iterable(
-                glob_fn(pattern) for pattern in self.index_config.file_glob
-            )
-        )
-
-    def get_file_metadata(self, path: Path) -> DataSourceMetadata:
-        stats = path.stat()
-        try:
-            date_modified = str(stats.st_mtime)
-        except Exception as e:
-            logger.warning(f"Couldn't detect date modified: {e}")
-            date_modified = None
-
-        try:
-            date_created = str(stats.st_birthtime)
-        except Exception as e:
-            logger.warning(f"Couldn't detect date created: {e}")
-            date_created = None
-
-        try:
-            mode = stats.st_mode
-            permissions_data = [{"mode": mode}]
-        except Exception as e:
-            logger.warning(f"Couldn't detect file mode: {e}")
-            permissions_data = None
-        return DataSourceMetadata(
-            date_modified=date_modified,
-            date_created=date_created,
-            date_processed=str(time()),
-            permissions_data=permissions_data,
-            record_locator={"path": str(path.resolve())},
-        )
-
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        for file_path in self.list_files():
-            file_data = FileData(
-                identifier=str(file_path.resolve()),
-                connector_type=CONNECTOR_TYPE,
-                source_identifiers=SourceIdentifiers(
-                    fullpath=str(file_path.resolve()),
-                    filename=file_path.name,
-                    rel_path=(
-                        str(file_path.resolve()).replace(str(self.index_config.path.resolve()), "")[
-                            1:
-                        ]
-                        if not self.index_config.path.is_file()
-                        else self.index_config.path.name
-                    ),
-                ),
-                metadata=self.get_file_metadata(path=file_path),
-            )
-            yield file_data
-
-
-@dataclass
-class LocalDownloaderConfig(DownloaderConfig):
-    pass
-
-
-@dataclass
-class LocalDownloader(Downloader):
-    connector_type: str = CONNECTOR_TYPE
-    connection_config: LocalConnectionConfig = field(
-        default_factory=lambda: LocalConnectionConfig()
-    )
-    download_config: LocalDownloaderConfig = field(default_factory=lambda: LocalDownloaderConfig())
-
-    def get_download_path(self, file_data: FileData) -> Path:
-        return Path(file_data.source_identifiers.fullpath)
-
-    def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return DownloadResponse(
-            file_data=file_data, path=Path(file_data.source_identifiers.fullpath)
-        )
-
-
-@dataclass
-class LocalUploaderConfig(UploaderConfig):
-    output_dir: str = field(default="structured-output")
-
-    @property
-    def output_path(self) -> Path:
-        return Path(self.output_dir).resolve()
-
-    def __post_init__(self):
-        if self.output_path.exists() and self.output_path.is_file():
-            raise ValueError("output path already exists as a file")
-
-
-@dataclass
-class LocalUploader(Uploader):
-    connector_type: str = CONNECTOR_TYPE
-    upload_config: LocalUploaderConfig = field(default_factory=lambda: LocalUploaderConfig())
-    connection_config: LocalConnectionConfig = field(
-        default_factory=lambda: LocalConnectionConfig()
-    )
-
-    def is_async(self) -> bool:
-        return False
-
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-        self.upload_config.output_path.mkdir(parents=True, exist_ok=True)
-        for content in contents:
-            if source_identifiers := content.file_data.source_identifiers:
-                identifiers = source_identifiers
-                rel_path = (
-                    identifiers.relative_path[1:]
-                    if identifiers.relative_path.startswith("/")
-                    else identifiers.relative_path
-                )
-                new_path = self.upload_config.output_path / Path(rel_path)
-                final_path = str(new_path).replace(
-                    identifiers.filename, f"{identifiers.filename}.json"
-                )
-            else:
-                final_path = self.upload_config.output_path / Path(
-                    f"{content.file_data.identifier}.json"
-                )
-            Path(final_path).parent.mkdir(parents=True, exist_ok=True)
-            logger.debug(f"copying file from {content.path} to {final_path}")
-            shutil.copy(src=str(content.path), dst=str(final_path))
-
-
-local_source_entry = SourceRegistryEntry(
-    indexer=LocalIndexer,
-    indexer_config=LocalIndexerConfig,
-    downloader=LocalDownloader,
-    downloader_config=LocalDownloaderConfig,
-    connection_config=LocalConnectionConfig,
-)
-
-local_destination_entry = DestinationRegistryEntry(
-    uploader=LocalUploader, uploader_config=LocalUploaderConfig
-)
diff --git a/unstructured/ingest/v2/processes/connectors/mongodb.py b/unstructured/ingest/v2/processes/connectors/mongodb.py
deleted file mode 100644
index f5003911c..000000000
--- a/unstructured/ingest/v2/processes/connectors/mongodb.py
+++ /dev/null
@@ -1,137 +0,0 @@
-import json
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Optional
-
-from unstructured.__version__ import __version__ as unstructured_version
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.utils.data_prep import batch_generator
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-    FileData,
-    UploadContent,
-    Uploader,
-    UploaderConfig,
-    UploadStager,
-    UploadStagerConfig,
-)
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.processes.connector_registry import (
-    DestinationRegistryEntry,
-)
-from unstructured.utils import requires_dependencies
-
-if TYPE_CHECKING:
-    from pymongo import MongoClient
-
-CONNECTOR_TYPE = "mongodb"
-SERVER_API_VERSION = "1"
-
-
-@dataclass
-class MongoDBAccessConfig(AccessConfig):
-    uri: Optional[str] = None
-
-
-@dataclass
-class MongoDBConnectionConfig(ConnectionConfig):
-    access_config: MongoDBAccessConfig = enhanced_field(
-        sensitive=True, default_factory=MongoDBAccessConfig
-    )
-    host: Optional[str] = None
-    database: Optional[str] = None
-    collection: Optional[str] = None
-    port: int = 27017
-    batch_size: int = 100
-    connector_type: str = CONNECTOR_TYPE
-
-
-@dataclass
-class MongoDBUploadStagerConfig(UploadStagerConfig):
-    pass
-
-
-@dataclass
-class MongoDBUploadStager(UploadStager):
-    upload_stager_config: MongoDBUploadStagerConfig = field(
-        default_factory=lambda: MongoDBUploadStagerConfig()
-    )
-
-    def run(
-        self,
-        elements_filepath: Path,
-        file_data: FileData,
-        output_dir: Path,
-        output_filename: str,
-        **kwargs: Any,
-    ) -> Path:
-        with open(elements_filepath) as elements_file:
-            elements_contents = json.load(elements_file)
-
-        output_path = Path(output_dir) / Path(f"{output_filename}.json")
-        with open(output_path, "w") as output_file:
-            json.dump(elements_contents, output_file)
-        return output_path
-
-
-@dataclass
-class MongoDBUploaderConfig(UploaderConfig):
-    batch_size: int = 100
-
-
-@dataclass
-class MongoDBUploader(Uploader):
-    upload_config: MongoDBUploaderConfig
-    connection_config: MongoDBConnectionConfig
-    client: Optional["MongoClient"] = field(init=False)
-    connector_type: str = CONNECTOR_TYPE
-
-    def __post_init__(self):
-        self.client = self.create_client()
-
-    @requires_dependencies(["pymongo"], extras="mongodb")
-    def create_client(self) -> "MongoClient":
-        from pymongo import MongoClient
-        from pymongo.driver_info import DriverInfo
-        from pymongo.server_api import ServerApi
-
-        if self.connection_config.access_config.uri:
-            return MongoClient(
-                self.connection_config.access_config.uri,
-                server_api=ServerApi(version=SERVER_API_VERSION),
-                driver=DriverInfo(name="unstructured", version=unstructured_version),
-            )
-        else:
-            return MongoClient(
-                host=self.connection_config.host,
-                port=self.connection_config.port,
-                server_api=ServerApi(version=SERVER_API_VERSION),
-            )
-
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-        elements_dict = []
-        for content in contents:
-            with open(content.path) as elements_file:
-                elements = json.load(elements_file)
-                elements_dict.extend(elements)
-
-        logger.info(
-            f"writing {len(elements_dict)} objects to destination "
-            f"db, {self.connection_config.database}, "
-            f"collection {self.connection_config.collection} "
-            f"at {self.connection_config.host}",
-        )
-        db = self.client[self.connection_config.database]
-        collection = db[self.connection_config.collection]
-        for chunk in batch_generator(elements_dict, self.upload_config.batch_size):
-            collection.insert_many(chunk)
-
-
-mongodb_destination_entry = DestinationRegistryEntry(
-    connection_config=MongoDBConnectionConfig,
-    uploader=MongoDBUploader,
-    uploader_config=MongoDBUploaderConfig,
-    upload_stager=MongoDBUploadStager,
-    upload_stager_config=MongoDBUploadStagerConfig,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/onedrive.py b/unstructured/ingest/v2/processes/connectors/onedrive.py
deleted file mode 100644
index 4769cf626..000000000
--- a/unstructured/ingest/v2/processes/connectors/onedrive.py
+++ /dev/null
@@ -1,218 +0,0 @@
-from __future__ import annotations
-
-import json
-from dataclasses import dataclass, field
-from pathlib import Path
-from time import time
-from typing import TYPE_CHECKING, Any, Generator, Optional
-
-from dateutil import parser
-
-from unstructured.documents.elements import DataSourceMetadata
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-    Downloader,
-    DownloaderConfig,
-    DownloadResponse,
-    FileData,
-    Indexer,
-    IndexerConfig,
-    SourceIdentifiers,
-    download_responses,
-)
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.processes.connector_registry import (
-    SourceRegistryEntry,
-)
-from unstructured.utils import requires_dependencies
-
-if TYPE_CHECKING:
-    from office365.graph_client import GraphClient
-    from office365.onedrive.driveitems.driveItem import DriveItem
-
-CONNECTOR_TYPE = "onedrive"
-MAX_MB_SIZE = 512_000_000
-
-
-@dataclass
-class OnedriveAccessConfig(AccessConfig):
-    client_cred: str
-
-
-@dataclass
-class OnedriveConnectionConfig(ConnectionConfig):
-    client_id: str
-    user_pname: str
-    tenant: str = field(repr=False)
-    authority_url: Optional[str] = field(repr=False, default="https://login.microsoftonline.com")
-    access_config: OnedriveAccessConfig = enhanced_field(sensitive=True)
-
-    @requires_dependencies(["msal"], extras="onedrive")
-    def get_token(self):
-        from msal import ConfidentialClientApplication
-
-        try:
-            app = ConfidentialClientApplication(
-                authority=f"{self.authority_url}/{self.tenant}",
-                client_id=self.client_id,
-                client_credential=self.access_config.client_cred,
-            )
-            token = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
-        except ValueError as exc:
-            logger.error("Couldn't set up credentials for OneDrive")
-            raise exc
-        if "error" in token:
-            raise SourceConnectionNetworkError(
-                "failed to fetch token, {}: {}".format(token["error"], token["error_description"])
-            )
-        return token
-
-    @requires_dependencies(["office365"], extras="onedrive")
-    def get_client(self) -> "GraphClient":
-        from office365.graph_client import GraphClient
-
-        client = GraphClient(self.get_token)
-        return client
-
-
-@dataclass
-class OnedriveIndexerConfig(IndexerConfig):
-    path: Optional[str] = field(default="")
-    recursive: bool = False
-
-
-@dataclass
-class OnedriveIndexer(Indexer):
-    connection_config: OnedriveConnectionConfig
-    index_config: OnedriveIndexerConfig
-
-    def list_objects(self, folder: DriveItem, recursive: bool) -> list[DriveItem]:
-        drive_items: list[DriveItem] = list(folder.children.get().execute_query())
-        files = [d for d in drive_items if d.is_file]
-        if not recursive:
-            return files
-        folders = [d for d in drive_items if d.is_folder]
-        for f in folders:
-            files.extend(self.list_objects(f, recursive))
-        return files
-
-    def get_root(self, client: "GraphClient") -> "DriveItem":
-        root = client.users[self.connection_config.user_pname].drive.get().execute_query().root
-        if fpath := self.index_config.path:
-            root = root.get_by_path(fpath).get().execute_query()
-            if root is None or not root.is_folder:
-                raise ValueError(f"Unable to find directory, given: {fpath}")
-        return root
-
-    def get_properties(self, drive_item: "DriveItem") -> dict:
-        properties = drive_item.properties
-        filtered_properties = {}
-        for k, v in properties.items():
-            try:
-                json.dumps(v)
-                filtered_properties[k] = v
-            except TypeError:
-                pass
-        return filtered_properties
-
-    def drive_item_to_file_data(self, drive_item: "DriveItem") -> FileData:
-        file_path = drive_item.parent_reference.path.split(":")[-1]
-        file_path = file_path[1:] if file_path and file_path[0] == "/" else file_path
-        filename = drive_item.name
-        server_path = file_path + "/" + filename
-        rel_path = server_path.replace(self.index_config.path, "").lstrip("/")
-        date_modified_dt = (
-            parser.parse(str(drive_item.last_modified_datetime))
-            if drive_item.last_modified_datetime
-            else None
-        )
-        date_created_at = (
-            parser.parse(str(drive_item.created_datetime)) if drive_item.created_datetime else None
-        )
-        return FileData(
-            identifier=drive_item.id,
-            connector_type=CONNECTOR_TYPE,
-            source_identifiers=SourceIdentifiers(
-                fullpath=server_path, filename=drive_item.name, rel_path=rel_path
-            ),
-            metadata=DataSourceMetadata(
-                url=drive_item.parent_reference.path + "/" + drive_item.name,
-                version=drive_item.etag,
-                date_modified=str(date_modified_dt.timestamp()) if date_modified_dt else None,
-                date_created=str(date_created_at.timestamp()) if date_created_at else None,
-                date_processed=str(time()),
-                record_locator={
-                    "user_pname": self.connection_config.user_pname,
-                    "server_relative_path": server_path,
-                },
-            ),
-            additional_metadata=self.get_properties(drive_item=drive_item),
-        )
-
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        client = self.connection_config.get_client()
-        root = self.get_root(client=client)
-        drive_items = self.list_objects(folder=root, recursive=self.index_config.recursive)
-        for drive_item in drive_items:
-            file_data = self.drive_item_to_file_data(drive_item=drive_item)
-            yield file_data
-
-
-@dataclass
-class OnedriveDownloaderConfig(DownloaderConfig):
-    pass
-
-
-@dataclass
-class OnedriveDownloader(Downloader):
-    connection_config: OnedriveConnectionConfig
-    download_config: OnedriveDownloaderConfig
-
-    @SourceConnectionNetworkError.wrap
-    def _fetch_file(self, file_data: FileData):
-        if file_data.source_identifiers is None or not file_data.source_identifiers.fullpath:
-            raise ValueError(
-                f"file data doesn't have enough information to get "
-                f"file content: {file_data.to_dict()}"
-            )
-
-        server_relative_path = file_data.source_identifiers.fullpath
-        client = self.connection_config.get_client()
-        root = client.users[self.connection_config.user_pname].drive.get().execute_query().root
-        file = root.get_by_path(server_relative_path).get().execute_query()
-        if not file:
-            raise FileNotFoundError(f"file not found: {server_relative_path}")
-        return file
-
-    def get_download_path(self, file_data: FileData) -> Optional[Path]:
-        rel_path = file_data.source_identifiers.relative_path
-        rel_path = rel_path[1:] if rel_path.startswith("/") else rel_path
-        return self.download_dir / Path(rel_path)
-
-    @SourceConnectionError.wrap
-    def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
-        file = self._fetch_file(file_data=file_data)
-        fsize = file.get_property("size", 0)
-        download_path = self.get_download_path(file_data=file_data)
-        download_path.parent.mkdir(parents=True, exist_ok=True)
-        logger.info(f"Downloading {file_data.source_identifiers.fullpath} to {download_path}")
-        if fsize > MAX_MB_SIZE:
-            logger.info(f"Downloading file with size: {fsize} bytes in chunks")
-            with download_path.open(mode="wb") as f:
-                file.download_session(f, chunk_size=1024 * 1024 * 100).execute_query()
-        else:
-            with download_path.open(mode="wb") as f:
-                file.download(f).execute_query()
-        return DownloadResponse(file_data=file_data, path=download_path)
-
-
-onedrive_source_entry = SourceRegistryEntry(
-    connection_config=OnedriveConnectionConfig,
-    indexer_config=OnedriveIndexerConfig,
-    indexer=OnedriveIndexer,
-    downloader_config=OnedriveDownloaderConfig,
-    downloader=OnedriveDownloader,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/opensearch.py b/unstructured/ingest/v2/processes/connectors/opensearch.py
deleted file mode 100644
index 0933cd1fa..000000000
--- a/unstructured/ingest/v2/processes/connectors/opensearch.py
+++ /dev/null
@@ -1,155 +0,0 @@
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Optional
-
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field
-from unstructured.ingest.error import (
-    DestinationConnectionError,
-)
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-)
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.processes.connector_registry import (
-    DestinationRegistryEntry,
-    SourceRegistryEntry,
-)
-from unstructured.ingest.v2.processes.connectors.elasticsearch import (
-    ElasticsearchDownloader,
-    ElasticsearchDownloaderConfig,
-    ElasticsearchIndexer,
-    ElasticsearchIndexerConfig,
-    ElasticsearchUploader,
-    ElasticsearchUploaderConfig,
-    ElasticsearchUploadStager,
-    ElasticsearchUploadStagerConfig,
-)
-from unstructured.utils import requires_dependencies
-
-if TYPE_CHECKING:
-    from opensearchpy import OpenSearch
-
-CONNECTOR_TYPE = "opensearch"
-
-"""Since the actual OpenSearch project is a fork of Elasticsearch, we are relying
-heavily on the Elasticsearch connector code, inheriting the functionality as much as possible."""
-
-
-@dataclass
-class OpenSearchAccessConfig(AccessConfig):
-    password: Optional[str] = enhanced_field(default=None, sensitive=True)
-    use_ssl: bool = False
-    verify_certs: bool = False
-    ssl_show_warn: bool = False
-    ca_certs: Optional[str] = None
-    client_cert: Optional[str] = None
-    client_key: Optional[str] = None
-
-
-@dataclass
-class OpenSearchClientInput(EnhancedDataClassJsonMixin):
-    http_auth: Optional[tuple[str, str]] = enhanced_field(sensitive=True, default=None)
-    hosts: Optional[list[str]] = None
-    use_ssl: bool = False
-    verify_certs: bool = False
-    ssl_show_warn: bool = False
-    ca_certs: Optional[str] = None
-    client_cert: Optional[str] = None
-    client_key: Optional[str] = None
-
-
-@dataclass
-class OpenSearchConnectionConfig(ConnectionConfig):
-    hosts: Optional[list[str]] = None
-    username: Optional[str] = None
-    access_config: OpenSearchAccessConfig = enhanced_field(sensitive=True)
-
-    def get_client_kwargs(self) -> dict:
-        # Update auth related fields to conform to what the SDK expects based on the
-        # supported methods:
-        # https://github.com/opensearch-project/opensearch-py/blob/main/opensearchpy/client/__init__.py
-        client_input = OpenSearchClientInput()
-        if self.hosts:
-            client_input.hosts = self.hosts
-        if self.access_config.use_ssl:
-            client_input.use_ssl = self.access_config.use_ssl
-        if self.access_config.verify_certs:
-            client_input.verify_certs = self.access_config.verify_certs
-        if self.access_config.ssl_show_warn:
-            client_input.ssl_show_warn = self.access_config.ssl_show_warn
-        if self.access_config.ca_certs:
-            client_input.ca_certs = self.access_config.ca_certs
-        if self.access_config.client_cert:
-            client_input.client_cert = self.access_config.client_cert
-        if self.access_config.client_key:
-            client_input.client_key = self.access_config.client_key
-        if self.username and self.access_config.password:
-            client_input.http_auth = (self.username, self.access_config.password)
-        logger.debug(
-            f"OpenSearch client inputs mapped to: {client_input.to_dict(redact_sensitive=True)}"
-        )
-        client_kwargs = client_input.to_dict(redact_sensitive=False)
-        client_kwargs = {k: v for k, v in client_kwargs.items() if v is not None}
-        return client_kwargs
-
-    @DestinationConnectionError.wrap
-    @requires_dependencies(["opensearchpy"], extras="opensearch")
-    def get_client(self) -> "OpenSearch":
-        from opensearchpy import OpenSearch
-
-        return OpenSearch(**self.get_client_kwargs())
-
-
-@dataclass
-class OpenSearchIndexer(ElasticsearchIndexer):
-    connection_config: OpenSearchConnectionConfig
-    client: "OpenSearch" = field(init=False)
-
-    @requires_dependencies(["opensearchpy"], extras="opensearch")
-    def load_scan(self):
-        from opensearchpy.helpers import scan
-
-        return scan
-
-
-@dataclass
-class OpenSearchDownloader(ElasticsearchDownloader):
-    connection_config: OpenSearchConnectionConfig
-    connector_type: str = CONNECTOR_TYPE
-
-    @requires_dependencies(["opensearchpy"], extras="opensearch")
-    def load_async(self):
-        from opensearchpy import AsyncOpenSearch
-        from opensearchpy.helpers import async_scan
-
-        return AsyncOpenSearch, async_scan
-
-
-@dataclass
-class OpenSearchUploader(ElasticsearchUploader):
-    connection_config: OpenSearchConnectionConfig
-    connector_type: str = CONNECTOR_TYPE
-
-    @requires_dependencies(["opensearchpy"], extras="opensearch")
-    def load_parallel_bulk(self):
-        from opensearchpy.helpers import parallel_bulk
-
-        return parallel_bulk
-
-
-opensearch_source_entry = SourceRegistryEntry(
-    connection_config=OpenSearchConnectionConfig,
-    indexer=OpenSearchIndexer,
-    indexer_config=ElasticsearchIndexerConfig,
-    downloader=OpenSearchDownloader,
-    downloader_config=ElasticsearchDownloaderConfig,
-)
-
-
-opensearch_destination_entry = DestinationRegistryEntry(
-    connection_config=OpenSearchConnectionConfig,
-    upload_stager_config=ElasticsearchUploadStagerConfig,
-    upload_stager=ElasticsearchUploadStager,
-    uploader_config=ElasticsearchUploaderConfig,
-    uploader=OpenSearchUploader,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/pinecone.py b/unstructured/ingest/v2/processes/connectors/pinecone.py
deleted file mode 100644
index 0cd087b9c..000000000
--- a/unstructured/ingest/v2/processes/connectors/pinecone.py
+++ /dev/null
@@ -1,178 +0,0 @@
-import json
-import multiprocessing as mp
-import uuid
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Optional
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import DestinationConnectionError
-from unstructured.ingest.utils.data_prep import batch_generator
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-    UploadContent,
-    Uploader,
-    UploaderConfig,
-    UploadStager,
-    UploadStagerConfig,
-)
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.processes.connector_registry import (
-    DestinationRegistryEntry,
-)
-from unstructured.staging.base import flatten_dict
-from unstructured.utils import requires_dependencies
-
-if TYPE_CHECKING:
-    from pinecone import Index as PineconeIndex
-
-
-CONNECTOR_TYPE = "pinecone"
-
-
-@dataclass
-class PineconeAccessConfig(AccessConfig):
-    api_key: Optional[str] = enhanced_field(default=None, overload_name="pinecone_api_key")
-
-
-@dataclass
-class PineconeConnectionConfig(ConnectionConfig):
-    index_name: str
-    environment: str
-    access_config: PineconeAccessConfig = enhanced_field(sensitive=True)
-
-    @requires_dependencies(["pinecone"], extras="pinecone")
-    def get_index(self) -> "PineconeIndex":
-        from pinecone import Pinecone
-
-        from unstructured import __version__ as unstructured_version
-
-        pc = Pinecone(
-            api_key=self.access_config.api_key,
-            source_tag=f"unstructured=={unstructured_version}",
-        )
-
-        index = pc.Index(self.index_name)
-        logger.debug(f"Connected to index: {pc.describe_index(self.index_name)}")
-        return index
-
-
-@dataclass
-class PineconeUploadStagerConfig(UploadStagerConfig):
-    pass
-
-
-@dataclass
-class PineconeUploaderConfig(UploaderConfig):
-    batch_size: int = 100
-    num_of_processes: int = 4
-
-
-@dataclass
-class PineconeUploadStager(UploadStager):
-    upload_stager_config: PineconeUploadStagerConfig = field(
-        default_factory=lambda: PineconeUploadStagerConfig()
-    )
-
-    @staticmethod
-    def conform_dict(element_dict: dict) -> dict:
-        # While flatten_dict enables indexing on various fields,
-        # element_serialized enables easily reloading the element object to memory.
-        # element_serialized is formed without text/embeddings to avoid data bloating.
-        return {
-            "id": str(uuid.uuid4()),
-            "values": element_dict.pop("embeddings", None),
-            "metadata": {
-                "text": element_dict.pop("text", None),
-                "element_serialized": json.dumps(element_dict),
-                **flatten_dict(
-                    element_dict,
-                    separator="-",
-                    flatten_lists=True,
-                    remove_none=True,
-                ),
-            },
-        }
-
-    def run(
-        self,
-        elements_filepath: Path,
-        output_dir: Path,
-        output_filename: str,
-        **kwargs: Any,
-    ) -> Path:
-        with open(elements_filepath) as elements_file:
-            elements_contents = json.load(elements_file)
-
-        conformed_elements = [
-            self.conform_dict(element_dict=element) for element in elements_contents
-        ]
-
-        output_path = Path(output_dir) / Path(f"{output_filename}.json")
-        output_path.parent.mkdir(parents=True, exist_ok=True)
-
-        with open(output_path, "w") as output_file:
-            json.dump(conformed_elements, output_file)
-        return output_path
-
-
-@dataclass
-class PineconeUploader(Uploader):
-    upload_config: PineconeUploaderConfig
-    connection_config: PineconeConnectionConfig
-    connector_type: str = CONNECTOR_TYPE
-
-    @DestinationConnectionError.wrap
-    def check_connection(self):
-        _ = self.connection_config.get_index()
-
-    @requires_dependencies(["pinecone"], extras="pinecone")
-    def upsert_batch(self, batch):
-        from pinecone.exceptions import PineconeApiException
-
-        try:
-            index = self.connection_config.get_index()
-            response = index.upsert(batch)
-        except PineconeApiException as api_error:
-            raise DestinationConnectionError(f"http error: {api_error}") from api_error
-        logger.debug(f"results: {response}")
-
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-
-        elements_dict = []
-        for content in contents:
-            with open(content.path) as elements_file:
-                elements = json.load(elements_file)
-                elements_dict.extend(elements)
-
-        logger.info(
-            f"writing document batches to destination"
-            f" index named {self.connection_config.index_name}"
-            f" environment named {self.connection_config.environment}"
-            f" with batch size {self.upload_config.batch_size}"
-            f" with {self.upload_config.num_of_processes} (number of) processes"
-        )
-
-        pinecone_batch_size = self.upload_config.batch_size
-
-        if self.upload_config.num_of_processes == 1:
-            for batch in batch_generator(elements_dict, pinecone_batch_size):
-                self.upsert_batch(batch)  # noqa: E203
-
-        else:
-            with mp.Pool(
-                processes=self.upload_config.num_of_processes,
-            ) as pool:
-                pool.map(
-                    self.upsert_batch, list(batch_generator(elements_dict, pinecone_batch_size))
-                )
-
-
-pinecone_destination_entry = DestinationRegistryEntry(
-    connection_config=PineconeConnectionConfig,
-    uploader=PineconeUploader,
-    uploader_config=PineconeUploaderConfig,
-    upload_stager=PineconeUploadStager,
-    upload_stager_config=PineconeUploadStagerConfig,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/salesforce.py b/unstructured/ingest/v2/processes/connectors/salesforce.py
deleted file mode 100644
index e1f018895..000000000
--- a/unstructured/ingest/v2/processes/connectors/salesforce.py
+++ /dev/null
@@ -1,293 +0,0 @@
-"""
-Salesforce Connector
-Able to download Account, Case, Campaign, EmailMessage, Lead
-Salesforce returns everything as a list of json.
-This saves each entry as a separate file to be partitioned.
-Using JWT authorization
-https://developer.salesforce.com/docs/atlas.en-us.sfdx_dev.meta/sfdx_dev/sfdx_dev_auth_key_and_cert.htm
-https://developer.salesforce.com/docs/atlas.en-us.sfdx_dev.meta/sfdx_dev/sfdx_dev_auth_connected_app.htm
-"""
-
-import json
-from collections import OrderedDict
-from dataclasses import dataclass, field
-from email.utils import formatdate
-from pathlib import Path
-from string import Template
-from textwrap import dedent
-from typing import TYPE_CHECKING, Any, Generator, Type
-
-from dateutil import parser
-
-from unstructured.documents.elements import DataSourceMetadata
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.error import SourceConnectionNetworkError
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-    Downloader,
-    DownloaderConfig,
-    DownloadResponse,
-    FileData,
-    Indexer,
-    IndexerConfig,
-    SourceIdentifiers,
-)
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.processes.connector_registry import (
-    SourceRegistryEntry,
-)
-from unstructured.utils import requires_dependencies
-
-
-class MissingCategoryError(Exception):
-    """There are no categories with that name."""
-
-
-CONNECTOR_TYPE = "salesforce"
-
-if TYPE_CHECKING:
-    from simple_salesforce import Salesforce
-
-SALESFORCE_API_VERSION = "57.0"
-
-# TODO: Add more categories as needed
-ACCEPTED_CATEGORIES: list[str] = ["Account", "Case", "Campaign", "EmailMessage", "Lead"]
-
-# Generic minimal email template used only
-# to process EmailMessage records as .eml files
-EMAIL_TEMPLATE = Template(
-    """MIME-Version: 1.0
-Date: $date
-Message-ID: $message_identifier
-Subject: $subject
-From: $from_email
-To: $to_email
-Content-Type: multipart/alternative; boundary="00000000000095c9b205eff92630"
---00000000000095c9b205eff92630
-Content-Type: text/plain; charset="UTF-8"
-$textbody
---00000000000095c9b205eff92630
-Content-Type: text/html; charset="UTF-8"
-$htmlbody
---00000000000095c9b205eff92630--
-""",
-)
-
-
-@dataclass
-class SalesforceAccessConfig(AccessConfig):
-    consumer_key: str
-    private_key: str
-
-    @requires_dependencies(["cryptography"])
-    def get_private_key_value_and_type(self) -> tuple[str, Type]:
-        from cryptography.hazmat.primitives import serialization
-
-        try:
-            serialization.load_pem_private_key(data=self.private_key.encode("utf-8"), password=None)
-        except ValueError:
-            pass
-        else:
-            return self.private_key, str
-
-        if Path(self.private_key).is_file():
-            return self.private_key, Path
-
-        raise ValueError("private_key does not contain PEM private key or path")
-
-
-@dataclass
-class SalesforceConnectionConfig(ConnectionConfig):
-    username: str
-    access_config: SalesforceAccessConfig = enhanced_field(sensitive=True)
-
-    @requires_dependencies(["simple_salesforce"], extras="salesforce")
-    def get_client(self) -> "Salesforce":
-        from simple_salesforce import Salesforce
-
-        pkey_value, pkey_type = self.access_config.get_private_key_value_and_type()
-
-        return Salesforce(
-            username=self.username,
-            consumer_key=self.access_config.consumer_key,
-            privatekey_file=pkey_value if pkey_type is Path else None,
-            privatekey=pkey_value if pkey_type is str else None,
-            version=SALESFORCE_API_VERSION,
-        )
-
-
-@dataclass
-class SalesforceIndexerConfig(IndexerConfig):
-    categories: list[str]
-
-
-@dataclass
-class SalesforceIndexer(Indexer):
-    connection_config: SalesforceConnectionConfig
-    index_config: SalesforceIndexerConfig
-
-    def __post_init__(self):
-        for record_type in self.index_config.categories:
-            if record_type not in ACCEPTED_CATEGORIES:
-                raise ValueError(f"{record_type} not currently an accepted Salesforce category")
-
-    def get_file_extension(self, record_type) -> str:
-        if record_type == "EmailMessage":
-            extension = ".eml"
-        elif record_type in ["Account", "Lead", "Case", "Campaign"]:
-            extension = ".xml"
-        else:
-            raise MissingCategoryError(
-                f"There are no categories with the name: {record_type}",
-            )
-        return extension
-
-    @requires_dependencies(["simple_salesforce"], extras="salesforce")
-    def list_files(self) -> list[FileData]:
-        """Get Salesforce Ids for the records.
-        Send them to next phase where each doc gets downloaded into the
-        appropriate format for partitioning.
-        """
-        from simple_salesforce.exceptions import SalesforceMalformedRequest
-
-        client = self.connection_config.get_client()
-
-        files_list = []
-        for record_type in self.index_config.categories:
-            try:
-                # Get ids from Salesforce
-                records = client.query_all_iter(
-                    f"select Id, SystemModstamp, CreatedDate, LastModifiedDate from {record_type}",
-                )
-                for record in records:
-                    record_with_extension = record["Id"] + self.get_file_extension(
-                        record["attributes"]["type"]
-                    )
-                    files_list.append(
-                        FileData(
-                            connector_type=CONNECTOR_TYPE,
-                            identifier=record["Id"],
-                            source_identifiers=SourceIdentifiers(
-                                filename=record_with_extension,
-                                fullpath=f"{record['attributes']['type']}/{record_with_extension}",
-                            ),
-                            metadata=DataSourceMetadata(
-                                url=record["attributes"]["url"],
-                                version=str(parser.parse(record["SystemModstamp"]).timestamp()),
-                                date_created=str(parser.parse(record["CreatedDate"]).timestamp()),
-                                date_modified=str(
-                                    parser.parse(record["LastModifiedDate"]).timestamp()
-                                ),
-                                record_locator={"id": record["Id"]},
-                            ),
-                            additional_metadata={"record_type": record["attributes"]["type"]},
-                        )
-                    )
-            except SalesforceMalformedRequest as e:
-                raise SalesforceMalformedRequest(f"Problem with Salesforce query: {e}")
-
-        return files_list
-
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        for f in self.list_files():
-            yield f
-
-
-@dataclass
-class SalesforceDownloaderConfig(DownloaderConfig):
-    pass
-
-
-@dataclass
-class SalesforceDownloader(Downloader):
-    connection_config: SalesforceConnectionConfig
-    download_config: SalesforceDownloaderConfig = field(
-        default_factory=lambda: SalesforceDownloaderConfig()
-    )
-    connector_type: str = CONNECTOR_TYPE
-
-    def get_download_path(self, file_data: FileData) -> Path:
-        rel_path = file_data.source_identifiers.relative_path
-        rel_path = rel_path[1:] if rel_path.startswith("/") else rel_path
-        return self.download_dir / Path(rel_path)
-
-    def _xml_for_record(self, record: OrderedDict) -> str:
-        """Creates partitionable xml file from a record"""
-        import xml.etree.ElementTree as ET
-
-        def create_xml_doc(data, parent, prefix=""):
-            for key, value in data.items():
-                if isinstance(value, OrderedDict):
-                    create_xml_doc(value, parent, prefix=f"{prefix}{key}.")
-                else:
-                    item = ET.Element("item")
-                    item.text = f"{prefix}{key}: {value}"
-                    parent.append(item)
-
-        root = ET.Element("root")
-        create_xml_doc(record, root)
-
-        xml_string = ET.tostring(root, encoding="utf-8", xml_declaration=True).decode()
-        return xml_string
-
-    def _eml_for_record(self, email_json: dict[str, Any]) -> str:
-        """Recreates standard expected .eml format using template."""
-        eml = EMAIL_TEMPLATE.substitute(
-            date=formatdate(parser.parse(email_json.get("MessageDate")).timestamp()),
-            message_identifier=email_json.get("MessageIdentifier"),
-            subject=email_json.get("Subject"),
-            from_email=email_json.get("FromAddress"),
-            to_email=email_json.get("ToAddress"),
-            textbody=email_json.get("TextBody"),
-            htmlbody=email_json.get("HtmlBody"),
-        )
-        return dedent(eml)
-
-    @SourceConnectionNetworkError.wrap
-    def _get_response(self, file_data: FileData) -> OrderedDict:
-        client = self.connection_config.get_client()
-        return client.query(
-            f"select FIELDS(STANDARD) from {file_data.additional_metadata['record_type']} where Id='{file_data.identifier}'",  # noqa: E501
-        )
-
-    def get_record(self, file_data: FileData) -> OrderedDict:
-        # Get record from Salesforce based on id
-        response = self._get_response(file_data)
-        logger.debug(f"response was returned for salesforce record id: {file_data.identifier}")
-        records = response["records"]
-        if not records:
-            raise ValueError(
-                f"No record found with record id {file_data.identifier}: {json.dumps(response)}"
-            )
-        record_json = records[0]
-        return record_json
-
-    def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        record = self.get_record(file_data)
-
-        try:
-            if file_data.additional_metadata["record_type"] == "EmailMessage":
-                document = self._eml_for_record(record)
-            else:
-                document = self._xml_for_record(record)
-            download_path = self.get_download_path(file_data=file_data)
-            download_path.parent.mkdir(parents=True, exist_ok=True)
-
-            with open(download_path, "w") as page_file:
-                page_file.write(document)
-
-        except Exception as e:
-            logger.error(f"failed to download file {file_data.identifier}: {e}", exc_info=True)
-            raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}")
-
-        return self.generate_download_response(file_data=file_data, download_path=download_path)
-
-
-salesforce_source_entry = SourceRegistryEntry(
-    connection_config=SalesforceConnectionConfig,
-    indexer_config=SalesforceIndexerConfig,
-    indexer=SalesforceIndexer,
-    downloader_config=SalesforceDownloaderConfig,
-    downloader=SalesforceDownloader,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/sharepoint.py b/unstructured/ingest/v2/processes/connectors/sharepoint.py
deleted file mode 100644
index 696d327ce..000000000
--- a/unstructured/ingest/v2/processes/connectors/sharepoint.py
+++ /dev/null
@@ -1,411 +0,0 @@
-import json
-from dataclasses import dataclass, field
-from enum import Enum
-from pathlib import Path
-from time import time
-from typing import TYPE_CHECKING, Any, Generator, Optional
-from urllib.parse import quote
-
-from unstructured.documents.elements import DataSourceMetadata
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field
-from unstructured.ingest.error import SourceConnectionNetworkError
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-    Downloader,
-    DownloaderConfig,
-    DownloadResponse,
-    FileData,
-    Indexer,
-    IndexerConfig,
-    SourceIdentifiers,
-    download_responses,
-)
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.processes.connector_registry import (
-    SourceRegistryEntry,
-)
-from unstructured.utils import requires_dependencies
-
-from .utils import parse_datetime
-
-if TYPE_CHECKING:
-    from office365.graph_client import GraphClient
-    from office365.onedrive.driveitems.driveItem import DriveItem
-    from office365.onedrive.drives.drive import Drive
-    from office365.onedrive.permissions.permission import Permission
-    from office365.onedrive.sites.site import Site
-    from office365.sharepoint.client_context import ClientContext
-    from office365.sharepoint.files.file import File
-    from office365.sharepoint.folders.folder import Folder
-    from office365.sharepoint.publishing.pages.page import SitePage
-
-CONNECTOR_TYPE = "sharepoint"
-
-MAX_MB_SIZE = 512_000_000
-
-# TODO handle other data types possible from Sharepoint
-# exampled: https://github.com/vgrem/Office365-REST-Python-Client/tree/master/examples/sharepoint
-
-
-class SharepointContentType(Enum):
-    DOCUMENT = "document"
-    SITEPAGE = "site_page"
-    LIST = "list"
-
-
-@dataclass
-class SharepointAccessConfig(AccessConfig):
-    client_cred: str
-
-
-@dataclass
-class SharepointPermissionsConfig(EnhancedDataClassJsonMixin):
-    permissions_application_id: str
-    permissions_tenant: str
-    permissions_client_cred: str = enhanced_field(sensitive=True)
-    authority_url: Optional[str] = field(repr=False, default="https://login.microsoftonline.com")
-
-
-@dataclass
-class SharepointConnectionConfig(ConnectionConfig):
-    client_id: str
-    site: str
-    access_config: SharepointAccessConfig = enhanced_field(sensitive=True)
-    permissions_config: Optional[SharepointPermissionsConfig] = None
-
-    @requires_dependencies(["office365"], extras="sharepoint")
-    def get_client(self) -> "ClientContext":
-        from office365.runtime.auth.client_credential import ClientCredential
-        from office365.sharepoint.client_context import ClientContext
-
-        try:
-            credentials = ClientCredential(self.client_id, self.access_config.client_cred)
-            site_client = ClientContext(self.site).with_credentials(credentials)
-        except Exception as e:
-            logger.error(f"Couldn't set Sharepoint client: {e}")
-            raise e
-        return site_client
-
-    @requires_dependencies(["msal"], extras="sharepoint")
-    def get_permissions_token(self):
-        from msal import ConfidentialClientApplication
-
-        try:
-            app = ConfidentialClientApplication(
-                authority=f"{self.permissions_config.authority_url}/"
-                f"{self.permissions_config.permissions_tenant}",
-                client_id=self.permissions_config.permissions_application_id,
-                client_credential=self.permissions_config.permissions_client_cred,
-            )
-            token = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
-        except ValueError as exc:
-            logger.error("Couldn't set up credentials for Sharepoint")
-            raise exc
-        if "error" in token:
-            raise SourceConnectionNetworkError(
-                "failed to fetch token, {}: {}".format(token["error"], token["error_description"])
-            )
-        return token
-
-    @requires_dependencies(["office365"], extras="sharepoint")
-    def get_permissions_client(self) -> Optional["GraphClient"]:
-        from office365.graph_client import GraphClient
-
-        if self.permissions_config is None:
-            return None
-
-        client = GraphClient(self.get_permissions_token)
-        return client
-
-
-@dataclass
-class SharepointIndexerConfig(IndexerConfig):
-    path: Optional[str] = None
-    recursive: bool = False
-    omit_files: bool = False
-    omit_pages: bool = False
-    omit_lists: bool = False
-
-
-@dataclass
-class SharepointIndexer(Indexer):
-    connection_config: SharepointConnectionConfig
-    index_config: SharepointIndexerConfig = field(default_factory=lambda: SharepointIndexerConfig())
-
-    def list_files(self, folder: "Folder", recursive: bool = False) -> list["File"]:
-        if not recursive:
-            folder.expand(["Files"]).get().execute_query()
-            return folder.files
-
-        folder.expand(["Files", "Folders"]).get().execute_query()
-        files: list["File"] = list(folder.files)
-        folders: list["Folder"] = list(folder.folders)
-        for f in folders:
-            if "/Forms" in f.serverRelativeUrl:
-                continue
-            files.extend(self.list_files(f, recursive))
-        return files
-
-    def get_properties(self, raw_properties: dict) -> dict:
-        raw_properties = {k: v for k, v in raw_properties.items() if v}
-        filtered_properties = {}
-        for k, v in raw_properties.items():
-            try:
-                json.dumps(v)
-                filtered_properties[k] = v
-            except TypeError:
-                pass
-        return filtered_properties
-
-    def list_pages(self, client: "ClientContext") -> list["SitePage"]:
-        pages = client.site_pages.pages.get().execute_query()
-        return pages
-
-    def page_to_file_data(self, site_page: "SitePage") -> FileData:
-        site_page.expand(site_page.properties.keys()).get().execute_query()
-        version = site_page.properties.get("Version", None)
-        unique_id = site_page.properties.get("UniqueId", None)
-        modified_date = site_page.properties.get("Modified", None)
-        url = site_page.properties.get("AbsoluteUrl", None)
-        date_modified_dt = parse_datetime(modified_date) if modified_date else None
-        date_created_at = (
-            parse_datetime(site_page.first_published)
-            if (site_page.first_published and site_page.first_published != "0001-01-01T08:00:00Z")
-            else None
-        )
-        file_path = site_page.get_property("Url", "")
-        server_path = file_path if file_path[0] != "/" else file_path[1:]
-        additional_metadata = self.get_properties(raw_properties=site_page.properties)
-        additional_metadata["sharepoint_content_type"] = SharepointContentType.SITEPAGE.value
-        return FileData(
-            identifier=unique_id,
-            connector_type=CONNECTOR_TYPE,
-            source_identifiers=SourceIdentifiers(
-                filename=site_page.file_name,
-                fullpath=file_path,
-                rel_path=file_path.replace(self.index_config.path, ""),
-            ),
-            metadata=DataSourceMetadata(
-                url=url,
-                version=version,
-                date_modified=str(date_modified_dt.timestamp()) if date_modified_dt else None,
-                date_created=str(date_created_at.timestamp()) if date_created_at else None,
-                date_processed=str(time()),
-                record_locator={
-                    "server_path": server_path,
-                },
-            ),
-            additional_metadata=additional_metadata,
-        )
-
-    def file_to_file_data(self, client: "ClientContext", file: "File") -> FileData:
-        file.expand(file.properties.keys()).get().execute_query()
-        absolute_url = f"{client.base_url}{quote(file.serverRelativeUrl)}"
-        date_modified_dt = (
-            parse_datetime(file.time_last_modified) if file.time_last_modified else None
-        )
-
-        date_created_at = parse_datetime(file.time_created) if file.time_created else None
-        additional_metadata = self.get_properties(raw_properties=file.properties)
-        additional_metadata["sharepoint_content_type"] = SharepointContentType.DOCUMENT.value
-        fullpath = str(file.serverRelativeUrl)
-        rel_path = fullpath.replace(self.index_config.path, "")
-        while rel_path[0] == "/":
-            rel_path = rel_path[1:]
-        return FileData(
-            identifier=file.unique_id,
-            connector_type=CONNECTOR_TYPE,
-            source_identifiers=SourceIdentifiers(
-                filename=file.name,
-                fullpath=fullpath,
-                rel_path=rel_path,
-            ),
-            metadata=DataSourceMetadata(
-                url=absolute_url,
-                version=f"{file.major_version}.{file.minor_version}",
-                date_modified=str(date_modified_dt.timestamp()) if date_modified_dt else None,
-                date_created=str(date_created_at.timestamp()) if date_created_at else None,
-                date_processed=str(time()),
-                record_locator={"server_path": file.serverRelativeUrl, "site_url": client.base_url},
-            ),
-            additional_metadata=additional_metadata,
-        )
-
-    def get_root(self, client: "ClientContext") -> "Folder":
-        if path := self.index_config.path:
-            return client.web.get_folder_by_server_relative_path(path)
-        default_document_library = client.web.default_document_library()
-        root_folder = default_document_library.root_folder
-        root_folder = root_folder.get().execute_query()
-        self.index_config.path = root_folder.name
-        return root_folder
-
-    def get_site_url(self, client: "ClientContext") -> str:
-        res = client.web.get().execute_query()
-        return res.url
-
-    def get_site(self, permissions_client: "GraphClient", site_url) -> "Site":
-        return permissions_client.sites.get_by_url(url=site_url).execute_query()
-
-    def get_permissions_items(self, site: "Site") -> list["DriveItem"]:
-        # TODO find a way to narrow this search down by name of drive
-        items: list["DriveItem"] = []
-        drives: list["Drive"] = site.drives.get_all().execute_query()
-        for drive in drives:
-            items.extend(drive.root.children.get_all().execute_query())
-        return items
-
-    def map_permission(self, permission: "Permission") -> dict:
-        return {
-            "id": permission.id,
-            "roles": list(permission.roles),
-            "share_id": permission.share_id,
-            "has_password": permission.has_password,
-            "link": permission.link.to_json(),
-            "granted_to_identities": permission.granted_to_identities.to_json(),
-            "granted_to": permission.granted_to.to_json(),
-            "granted_to_v2": permission.granted_to_v2.to_json(),
-            "granted_to_identities_v2": permission.granted_to_identities_v2.to_json(),
-            "invitation": permission.invitation.to_json(),
-        }
-
-    def enrich_permissions_on_files(self, all_file_data: list[FileData], site_url: str) -> None:
-        logger.debug("Enriching permissions on files")
-        permission_client = self.connection_config.get_permissions_client()
-        if permission_client is None:
-            return
-        site = self.get_site(permissions_client=permission_client, site_url=site_url)
-        existing_items = self.get_permissions_items(site=site)
-        for file_data in all_file_data:
-            etag = file_data.additional_metadata.get("ETag")
-            if not etag:
-                continue
-            matching_items = list(filter(lambda x: x.etag == etag, existing_items))
-            if not matching_items:
-                continue
-            if len(matching_items) > 1:
-                logger.warning(
-                    "Found multiple drive items with etag matching {}, skipping: {}".format(
-                        etag, ", ".join([i.name for i in matching_items])
-                    )
-                )
-                continue
-            matching_item = matching_items[0]
-            permissions: list["Permission"] = matching_item.permissions.get_all().execute_query()
-            permissions_data = [
-                self.map_permission(permission=permission) for permission in permissions
-            ]
-            file_data.metadata.permissions_data = permissions_data
-
-    @property
-    def process_permissions(self) -> bool:
-        return (
-            self.connection_config.permissions_config.permissions_tenant
-            and self.connection_config.permissions_config.permissions_client_cred
-            and self.connection_config.permissions_config.permissions_application_id
-        )
-
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        client = self.connection_config.get_client()
-        root_folder = self.get_root(client=client)
-        logger.debug(f"processing content from path: {self.index_config.path}")
-        if not self.index_config.omit_files:
-            files = self.list_files(root_folder, recursive=self.index_config.recursive)
-            file_data = [self.file_to_file_data(file=file, client=client) for file in files]
-            if self.process_permissions:
-                self.enrich_permissions_on_files(
-                    all_file_data=file_data, site_url=self.get_site_url(client=client)
-                )
-            for file in file_data:
-                yield file
-        if not self.index_config.omit_pages:
-            pages = self.list_pages(client=client)
-            for page in pages:
-                file_data = self.page_to_file_data(site_page=page)
-                file_data.metadata.record_locator["site_url"] = client.base_url
-                yield file_data
-
-
-@dataclass
-class SharepointDownloaderConfig(DownloaderConfig):
-    pass
-
-
-@dataclass
-class SharepointDownloader(Downloader):
-    connection_config: SharepointConnectionConfig
-    download_config: SharepointDownloaderConfig
-    connector_type: str = CONNECTOR_TYPE
-
-    def get_download_path(self, file_data: FileData) -> Path:
-        content_type = file_data.additional_metadata.get("sharepoint_content_type")
-        rel_path = file_data.source_identifiers.fullpath
-        rel_path = rel_path[1:] if rel_path.startswith("/") else rel_path
-        download_path = self.download_dir / Path(rel_path)
-        if content_type == SharepointContentType.SITEPAGE.value:
-            # Update output extension to html if site page
-            download_path = download_path.with_suffix(".html")
-        return download_path
-
-    def get_document(self, file_data: FileData) -> DownloadResponse:
-        client: "ClientContext" = self.connection_config.get_client()
-        file: "File" = client.web.get_file_by_id(unique_id=file_data.identifier)
-        download_path = self.get_download_path(file_data=file_data)
-        download_path.parent.mkdir(parents=True, exist_ok=True)
-        logger.debug(
-            f"writing document content {file_data.source_identifiers.fullpath} to {download_path}"
-        )
-        with download_path.open("wb") as f:
-            file.download(f).execute_query()
-        return self.generate_download_response(file_data=file_data, download_path=download_path)
-
-    def get_site_page(self, file_data: FileData) -> DownloadResponse:
-        # TODO fetch comments for site page as well
-        from lxml import etree, html
-
-        canvas_content_raw = file_data.additional_metadata.get("CanvasContent1")
-        layout_web_parts_content_raw = file_data.additional_metadata.get("LayoutWebpartsContent")
-        html_content = []
-        if layout_web_parts_content_raw:
-            layout_web_parts_content = json.loads(layout_web_parts_content_raw)
-            for web_part in layout_web_parts_content:
-                properties = web_part.get("properties", {})
-                if title := properties.get("title"):
-                    html_content.append(f"<title>{title}</title>")
-        if canvas_content_raw:
-            canvas_content = json.loads(canvas_content_raw)
-            for content in canvas_content:
-                if inner_html := content.get("innerHTML"):
-                    html_content.append(inner_html)
-        htmls = "".join(html_content)
-        content = f"<div>{htmls}</div>"
-        document = html.fromstring(content)
-        download_path = self.get_download_path(file_data=file_data)
-        download_path.parent.mkdir(parents=True, exist_ok=True)
-        logger.debug(
-            f"writing site page content {file_data.source_identifiers.filename} to {download_path}"
-        )
-        with download_path.open("w") as f:
-            f.write(etree.tostring(document, encoding="unicode", pretty_print=True))
-        return self.generate_download_response(file_data=file_data, download_path=download_path)
-
-    def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
-        content_type = file_data.additional_metadata.get("sharepoint_content_type")
-        if not content_type:
-            raise ValueError(
-                f"Missing sharepoint_content_type metadata: {file_data.additional_metadata}"
-            )
-        if content_type == SharepointContentType.DOCUMENT.value:
-            return self.get_document(file_data=file_data)
-        elif content_type == SharepointContentType.SITEPAGE.value:
-            return self.get_site_page(file_data=file_data)
-
-
-sharepoint_source_entry = SourceRegistryEntry(
-    connection_config=SharepointConnectionConfig,
-    indexer_config=SharepointIndexerConfig,
-    indexer=SharepointIndexer,
-    downloader_config=SharepointDownloaderConfig,
-    downloader=SharepointDownloader,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/singlestore.py b/unstructured/ingest/v2/processes/connectors/singlestore.py
deleted file mode 100644
index 3e2d534e2..000000000
--- a/unstructured/ingest/v2/processes/connectors/singlestore.py
+++ /dev/null
@@ -1,160 +0,0 @@
-import json
-from dataclasses import dataclass
-from datetime import date, datetime
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Optional
-
-import numpy as np
-import pandas as pd
-from dateutil import parser
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.utils.data_prep import batch_generator
-from unstructured.ingest.utils.table import convert_to_pandas_dataframe
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-    FileData,
-    UploadContent,
-    Uploader,
-    UploaderConfig,
-    UploadStager,
-    UploadStagerConfig,
-)
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.processes.connector_registry import (
-    DestinationRegistryEntry,
-)
-from unstructured.utils import requires_dependencies
-
-if TYPE_CHECKING:
-    from singlestoredb.connection import Connection
-
-CONNECTOR_TYPE = "singlestore"
-
-
-@dataclass
-class SingleStoreAccessConfig(AccessConfig):
-    password: Optional[str] = None
-
-
-@dataclass
-class SingleStoreConnectionConfig(ConnectionConfig):
-    host: Optional[str] = None
-    port: Optional[int] = None
-    user: Optional[str] = None
-    database: Optional[str] = None
-    access_config: SingleStoreAccessConfig = enhanced_field(sensitive=True)
-
-    @requires_dependencies(["singlestoredb"], extras="singlestore")
-    def get_connection(self) -> "Connection":
-        import singlestoredb as s2
-
-        conn = s2.connect(
-            host=self.host,
-            port=self.port,
-            database=self.database,
-            user=self.user,
-            password=self.access_config.password,
-        )
-        return conn
-
-
-@dataclass
-class SingleStoreUploadStagerConfig(UploadStagerConfig):
-    drop_empty_cols: bool = False
-
-
-@dataclass
-class SingleStoreUploadStager(UploadStager):
-    upload_stager_config: SingleStoreUploadStagerConfig
-
-    @staticmethod
-    def parse_date_string(date_string: str) -> date:
-        try:
-            timestamp = float(date_string)
-            return datetime.fromtimestamp(timestamp)
-        except Exception as e:
-            logger.debug(f"date {date_string} string not a timestamp: {e}")
-        return parser.parse(date_string)
-
-    def run(
-        self,
-        elements_filepath: Path,
-        file_data: FileData,
-        output_dir: Path,
-        output_filename: str,
-        **kwargs: Any,
-    ) -> Path:
-        with open(elements_filepath) as elements_file:
-            elements_contents = json.load(elements_file)
-        output_path = Path(output_dir) / Path(f"{output_filename}.csv")
-        output_path.parent.mkdir(parents=True, exist_ok=True)
-
-        df = convert_to_pandas_dataframe(
-            elements_dict=elements_contents,
-            drop_empty_cols=self.upload_stager_config.drop_empty_cols,
-        )
-        datetime_columns = [
-            "data_source_date_created",
-            "data_source_date_modified",
-            "data_source_date_processed",
-        ]
-        for column in filter(lambda x: x in df.columns, datetime_columns):
-            df[column] = df[column].apply(self.parse_date_string)
-        if "data_source_record_locator" in df.columns:
-            df["data_source_record_locator"] = df["data_source_record_locator"].apply(
-                lambda x: json.dumps(x) if x else None
-            )
-
-        with output_path.open("w") as output_file:
-            df.to_csv(output_file, index=False)
-        return output_path
-
-
-@dataclass
-class SingleStoreUploaderConfig(UploaderConfig):
-    table_name: str
-    batch_size: int = 100
-
-
-@dataclass
-class SingleStoreUploader(Uploader):
-    connection_config: SingleStoreConnectionConfig
-    upload_config: SingleStoreUploaderConfig
-    connector_type: str = CONNECTOR_TYPE
-
-    def upload_csv(self, content: UploadContent) -> None:
-        df = pd.read_csv(content.path)
-        logger.debug(
-            f"uploading {len(df)} entries to {self.connection_config.database} "
-            f"db in table {self.upload_config.table_name}"
-        )
-        stmt = "INSERT INTO {} ({}) VALUES ({})".format(
-            self.upload_config.table_name,
-            ", ".join(df.columns),
-            ", ".join(["%s"] * len(df.columns)),
-        )
-        logger.debug(f"sql statement: {stmt}")
-        df.replace({np.nan: None}, inplace=True)
-        data_as_tuples = list(df.itertuples(index=False, name=None))
-        with self.connection_config.get_connection() as conn:
-            with conn.cursor() as cur:
-                for chunk in batch_generator(
-                    data_as_tuples, batch_size=self.upload_config.batch_size
-                ):
-                    cur.executemany(stmt, chunk)
-                    conn.commit()
-
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-        for content in contents:
-            self.upload_csv(content=content)
-
-
-singlestore_destination_entry = DestinationRegistryEntry(
-    connection_config=SingleStoreConnectionConfig,
-    uploader=SingleStoreUploader,
-    uploader_config=SingleStoreUploaderConfig,
-    upload_stager=SingleStoreUploadStager,
-    upload_stager_config=SingleStoreUploadStagerConfig,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/sql.py b/unstructured/ingest/v2/processes/connectors/sql.py
deleted file mode 100644
index cfec183a1..000000000
--- a/unstructured/ingest/v2/processes/connectors/sql.py
+++ /dev/null
@@ -1,265 +0,0 @@
-import enum
-import json
-import uuid
-from dataclasses import dataclass, field
-from datetime import date, datetime
-from pathlib import Path
-from typing import Any, Optional, Union
-
-import numpy as np
-import pandas as pd
-from dateutil import parser
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-    FileData,
-    UploadContent,
-    Uploader,
-    UploaderConfig,
-    UploadStager,
-    UploadStagerConfig,
-)
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.processes.connector_registry import DestinationRegistryEntry
-from unstructured.utils import requires_dependencies
-
-CONNECTOR_TYPE = "sql"
-ELEMENTS_TABLE_NAME = "elements"
-
-
-@dataclass
-class SQLAccessConfig(AccessConfig):
-    username: Optional[str] = None
-    password: Optional[str] = None
-
-
-class DatabaseType(str, enum.Enum):
-    SQLITE = "sqlite"
-    POSTGRESQL = "postgresql"
-
-
-@dataclass
-class SimpleSqlConfig(ConnectionConfig):
-    db_type: DatabaseType = (
-        # required default value here because of parent class
-        DatabaseType.SQLITE
-    )
-    database: Optional[str] = None
-    host: Optional[str] = None
-    port: Optional[int] = 5432
-    access_config: Optional[SQLAccessConfig] = enhanced_field(default=None, sensitive=True)
-    connector_type: str = CONNECTOR_TYPE
-
-    def __post_init__(self):
-        if (self.db_type == DatabaseType.SQLITE) and (self.database is None):
-            raise ValueError(
-                "A sqlite connection requires a path to a *.db file "
-                "through the `database` argument"
-            )
-
-
-@dataclass
-class SQLUploadStagerConfig(UploadStagerConfig):
-    pass
-
-
-_COLUMNS = (
-    "id",
-    "element_id",
-    "text",
-    "embeddings",
-    "type",
-    "system",
-    "layout_width",
-    "layout_height",
-    "points",
-    "url",
-    "version",
-    "date_created",
-    "date_modified",
-    "date_processed",
-    "permissions_data",
-    "record_locator",
-    "category_depth",
-    "parent_id",
-    "attached_filename",
-    "filetype",
-    "last_modified",
-    "file_directory",
-    "filename",
-    "languages",
-    "page_number",
-    "links",
-    "page_name",
-    "link_urls",
-    "link_texts",
-    "sent_from",
-    "sent_to",
-    "subject",
-    "section",
-    "header_footer_type",
-    "emphasized_text_contents",
-    "emphasized_text_tags",
-    "text_as_html",
-    "detection_class_prob",
-)
-
-_DATE_COLUMNS = ("date_created", "date_modified", "date_processed", "last_modified")
-
-
-def parse_date_string(date_value: Union[str, int]) -> date:
-    try:
-        timestamp = float(date_value) / 1000 if isinstance(date_value, int) else float(date_value)
-        return datetime.fromtimestamp(timestamp)
-    except Exception as e:
-        logger.debug(f"date {date_value} string not a timestamp: {e}")
-    return parser.parse(date_value)
-
-
-@dataclass
-class SQLUploadStager(UploadStager):
-    upload_stager_config: SQLUploadStagerConfig = field(
-        default_factory=lambda: SQLUploadStagerConfig()
-    )
-
-    def run(
-        self,
-        elements_filepath: Path,
-        file_data: FileData,
-        output_dir: Path,
-        output_filename: str,
-        **kwargs: Any,
-    ) -> Path:
-        with open(elements_filepath) as elements_file:
-            elements_contents = json.load(elements_file)
-        output_path = Path(output_dir) / Path(f"{output_filename}.json")
-        output_path.parent.mkdir(parents=True, exist_ok=True)
-
-        output = []
-        for data in elements_contents:
-            metadata: dict[str, Any] = data.pop("metadata", {})
-            data_source = metadata.pop("data_source", {})
-            coordinates = metadata.pop("coordinates", {})
-
-            data.update(metadata)
-            data.update(data_source)
-            data.update(coordinates)
-
-            data["id"] = str(uuid.uuid4())
-
-            # remove extraneous, not supported columns
-            [data.pop(column) for column in data if column not in _COLUMNS]
-
-            output.append(data)
-
-        df = pd.DataFrame.from_dict(output)
-        for column in filter(lambda x: x in df.columns, _DATE_COLUMNS):
-            df[column] = df[column].apply(parse_date_string)
-        for column in filter(
-            lambda x: x in df.columns,
-            ("permissions_data", "record_locator", "points", "links"),
-        ):
-            df[column] = df[column].apply(
-                lambda x: json.dumps(x) if isinstance(x, (list, dict)) else None
-            )
-        for column in filter(lambda x: x in df.columns, ("version", "page_number")):
-            df[column] = df[column].apply(str)
-
-        with output_path.open("w") as output_file:
-            df.to_json(output_file, orient="records", lines=True)
-        return output_path
-
-
-@dataclass
-class SQLUploaderConfig(UploaderConfig):
-    batch_size: int = 50
-
-
-@dataclass
-class SQLUploader(Uploader):
-    connector_type: str = CONNECTOR_TYPE
-    upload_config: SQLUploaderConfig
-    connection_config: SimpleSqlConfig
-
-    @property
-    def connection(self):
-        if self.connection_config.db_type == DatabaseType.POSTGRESQL:
-            return self._make_psycopg_connection
-        elif self.connection_config.db_type == DatabaseType.SQLITE:
-            return self._make_sqlite_connection
-        raise ValueError(f"Unsupported database {self.connection_config.db_type} connection.")
-
-    def _make_sqlite_connection(self):
-        from sqlite3 import connect
-
-        return connect(database=self.connection_config.database)
-
-    @requires_dependencies(["psycopg2"], extras="postgres")
-    def _make_psycopg_connection(self):
-        from psycopg2 import connect
-
-        return connect(
-            user=self.connection_config.access_config.username,
-            password=self.connection_config.access_config.password,
-            dbname=self.connection_config.database,
-            host=self.connection_config.host,
-            port=self.connection_config.port,
-        )
-
-    def prepare_data(
-        self, columns: list[str], data: tuple[tuple[Any, ...], ...]
-    ) -> list[tuple[Any, ...]]:
-        output = []
-        for row in data:
-            parsed = []
-            for column_name, value in zip(columns, row):
-                if self.connection_config.db_type == DatabaseType.SQLITE and isinstance(
-                    value, (list, dict)
-                ):
-                    value = json.dumps(value)
-                if column_name in _DATE_COLUMNS:
-                    if value is None:
-                        parsed.append(None)
-                    else:
-                        parsed.append(parse_date_string(value))
-                else:
-                    parsed.append(value)
-            output.append(tuple(parsed))
-        return output
-
-    def upload_contents(self, content: UploadContent) -> None:
-        df = pd.read_json(content.path, orient="records", lines=True)
-        logger.debug(f"uploading {len(df)} entries to {self.connection_config.database} ")
-        df.replace({np.nan: None}, inplace=True)
-
-        columns = tuple(df.columns)
-        stmt = f"INSERT INTO {ELEMENTS_TABLE_NAME} ({','.join(columns)}) \
-                VALUES({','.join(['?' if self.connection_config.db_type==DatabaseType.SQLITE else '%s' for x in columns])})"  # noqa E501
-
-        for rows in pd.read_json(
-            content.path, orient="records", lines=True, chunksize=self.upload_config.batch_size
-        ):
-            with self.connection() as conn:
-                values = self.prepare_data(columns, tuple(rows.itertuples(index=False, name=None)))
-                if self.connection_config.db_type == DatabaseType.SQLITE:
-                    conn.executemany(stmt, values)
-                else:
-                    with conn.cursor() as cur:
-                        cur.executemany(stmt, values)
-
-                conn.commit()
-
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-        for content in contents:
-            self.upload_contents(content=content)
-
-
-sql_destination_entry = DestinationRegistryEntry(
-    connection_config=SimpleSqlConfig,
-    uploader=SQLUploader,
-    uploader_config=SQLUploaderConfig,
-    upload_stager=SQLUploadStager,
-    upload_stager_config=SQLUploadStagerConfig,
-)
diff --git a/unstructured/ingest/v2/processes/connectors/utils.py b/unstructured/ingest/v2/processes/connectors/utils.py
deleted file mode 100644
index 6e6a8e5fc..000000000
--- a/unstructured/ingest/v2/processes/connectors/utils.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from datetime import datetime
-from typing import Union
-
-from dateutil import parser
-
-
-def parse_datetime(date_value: Union[int, str, float, datetime]) -> datetime:
-    if isinstance(date_value, datetime):
-        return date_value
-    elif isinstance(date_value, float):
-        return datetime.fromtimestamp(date_value)
-    elif isinstance(date_value, int):
-        return datetime.fromtimestamp(date_value / 1000)
-
-    try:
-        timestamp = float(date_value)
-        return datetime.fromtimestamp(timestamp)
-    except ValueError:
-        return parser.parse(date_value)
diff --git a/unstructured/ingest/v2/processes/connectors/weaviate.py b/unstructured/ingest/v2/processes/connectors/weaviate.py
deleted file mode 100644
index 67a6c024c..000000000
--- a/unstructured/ingest/v2/processes/connectors/weaviate.py
+++ /dev/null
@@ -1,232 +0,0 @@
-import json
-from dataclasses import dataclass, field
-from datetime import date, datetime
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Optional
-
-from dateutil import parser
-
-from unstructured.ingest.enhanced_dataclass import enhanced_field
-from unstructured.ingest.v2.interfaces import (
-    AccessConfig,
-    ConnectionConfig,
-    FileData,
-    UploadContent,
-    Uploader,
-    UploaderConfig,
-    UploadStager,
-    UploadStagerConfig,
-)
-from unstructured.ingest.v2.logger import logger
-from unstructured.ingest.v2.processes.connector_registry import (
-    DestinationRegistryEntry,
-)
-from unstructured.utils import requires_dependencies
-
-if TYPE_CHECKING:
-    from weaviate import Client
-
-CONNECTOR_TYPE = "weaviate"
-
-
-@dataclass
-class WeaviateAccessConfig(AccessConfig):
-    access_token: Optional[str] = None
-    api_key: Optional[str] = None
-    client_secret: Optional[str] = None
-    password: Optional[str] = None
-
-
-@dataclass
-class WeaviateConnectionConfig(ConnectionConfig):
-    host_url: str
-    class_name: str
-    access_config: WeaviateAccessConfig = enhanced_field(sensitive=True)
-    username: Optional[str] = None
-    anonymous: bool = False
-    scope: Optional[list[str]] = None
-    refresh_token: Optional[str] = None
-    connector_type: str = CONNECTOR_TYPE
-
-
-@dataclass
-class WeaviateUploadStagerConfig(UploadStagerConfig):
-    pass
-
-
-@dataclass
-class WeaviateUploadStager(UploadStager):
-    upload_stager_config: WeaviateUploadStagerConfig = field(
-        default_factory=lambda: WeaviateUploadStagerConfig()
-    )
-
-    @staticmethod
-    def parse_date_string(date_string: str) -> date:
-        try:
-            timestamp = float(date_string)
-            return datetime.fromtimestamp(timestamp)
-        except Exception as e:
-            logger.debug(f"date {date_string} string not a timestamp: {e}")
-        return parser.parse(date_string)
-
-    @classmethod
-    def conform_dict(cls, data: dict) -> None:
-        """
-        Updates the element dictionary to conform to the Weaviate schema
-        """
-
-        # Dict as string formatting
-        if record_locator := data.get("metadata", {}).get("data_source", {}).get("record_locator"):
-            # Explicit casting otherwise fails schema type checking
-            data["metadata"]["data_source"]["record_locator"] = str(json.dumps(record_locator))
-
-        # Array of items as string formatting
-        if points := data.get("metadata", {}).get("coordinates", {}).get("points"):
-            data["metadata"]["coordinates"]["points"] = str(json.dumps(points))
-
-        if links := data.get("metadata", {}).get("links", {}):
-            data["metadata"]["links"] = str(json.dumps(links))
-
-        if permissions_data := (
-            data.get("metadata", {}).get("data_source", {}).get("permissions_data")
-        ):
-            data["metadata"]["data_source"]["permissions_data"] = json.dumps(permissions_data)
-
-        # Datetime formatting
-        if date_created := data.get("metadata", {}).get("data_source", {}).get("date_created"):
-            data["metadata"]["data_source"]["date_created"] = cls.parse_date_string(
-                date_created
-            ).strftime(
-                "%Y-%m-%dT%H:%M:%S.%fZ",
-            )
-
-        if date_modified := data.get("metadata", {}).get("data_source", {}).get("date_modified"):
-            data["metadata"]["data_source"]["date_modified"] = cls.parse_date_string(
-                date_modified
-            ).strftime(
-                "%Y-%m-%dT%H:%M:%S.%fZ",
-            )
-
-        if date_processed := data.get("metadata", {}).get("data_source", {}).get("date_processed"):
-            data["metadata"]["data_source"]["date_processed"] = cls.parse_date_string(
-                date_processed
-            ).strftime(
-                "%Y-%m-%dT%H:%M:%S.%fZ",
-            )
-
-        if last_modified := data.get("metadata", {}).get("last_modified"):
-            data["metadata"]["last_modified"] = cls.parse_date_string(last_modified).strftime(
-                "%Y-%m-%dT%H:%M:%S.%fZ",
-            )
-
-        # String casting
-        if version := data.get("metadata", {}).get("data_source", {}).get("version"):
-            data["metadata"]["data_source"]["version"] = str(version)
-
-        if page_number := data.get("metadata", {}).get("page_number"):
-            data["metadata"]["page_number"] = str(page_number)
-
-    def run(
-        self,
-        elements_filepath: Path,
-        file_data: FileData,
-        output_dir: Path,
-        output_filename: str,
-        **kwargs: Any,
-    ) -> Path:
-        with open(elements_filepath) as elements_file:
-            elements_contents = json.load(elements_file)
-        for element in elements_contents:
-            self.conform_dict(data=element)
-        output_path = Path(output_dir) / Path(f"{output_filename}.json")
-        with open(output_path, "w") as output_file:
-            json.dump(elements_contents, output_file)
-        return output_path
-
-
-@dataclass
-class WeaviateUploaderConfig(UploaderConfig):
-    batch_size: int = 100
-
-
-@dataclass
-class WeaviateUploader(Uploader):
-    upload_config: WeaviateUploaderConfig
-    connection_config: WeaviateConnectionConfig
-    client: Optional["Client"] = field(init=False)
-    connector_type: str = CONNECTOR_TYPE
-
-    @requires_dependencies(["weaviate"], extras="weaviate")
-    def __post_init__(self):
-        from weaviate import Client
-
-        auth = self._resolve_auth_method()
-        self.client = Client(url=self.connection_config.host_url, auth_client_secret=auth)
-
-    @requires_dependencies(["weaviate"], extras="weaviate")
-    def _resolve_auth_method(self):
-        access_configs = self.connection_config.access_config
-        connection_config = self.connection_config
-        if connection_config.anonymous:
-            return None
-
-        if access_configs.access_token:
-            from weaviate.auth import AuthBearerToken
-
-            return AuthBearerToken(
-                access_token=access_configs.access_token,
-                refresh_token=connection_config.refresh_token,
-            )
-        elif access_configs.api_key:
-            from weaviate.auth import AuthApiKey
-
-            return AuthApiKey(api_key=access_configs.api_key)
-        elif access_configs.client_secret:
-            from weaviate.auth import AuthClientCredentials
-
-            return AuthClientCredentials(
-                client_secret=access_configs.client_secret, scope=connection_config.scope
-            )
-        elif connection_config.username and access_configs.password:
-            from weaviate.auth import AuthClientPassword
-
-            return AuthClientPassword(
-                username=connection_config.username,
-                password=access_configs.password,
-                scope=connection_config.scope,
-            )
-        return None
-
-    def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
-        # TODO update to use async support in weaviate client
-        #  once the version can be bumped to include it
-        elements_dict = []
-        for content in contents:
-            with open(content.path) as elements_file:
-                elements = json.load(elements_file)
-                elements_dict.extend(elements)
-
-        logger.info(
-            f"writing {len(elements_dict)} objects to destination "
-            f"class {self.connection_config.class_name} "
-            f"at {self.connection_config.host_url}",
-        )
-
-        self.client.batch.configure(batch_size=self.upload_config.batch_size)
-        with self.client.batch as b:
-            for e in elements_dict:
-                vector = e.pop("embeddings", None)
-                b.add_data_object(
-                    e,
-                    self.connection_config.class_name,
-                    vector=vector,
-                )
-
-
-weaviate_destination_entry = DestinationRegistryEntry(
-    connection_config=WeaviateConnectionConfig,
-    uploader=WeaviateUploader,
-    uploader_config=WeaviateUploaderConfig,
-    upload_stager=WeaviateUploadStager,
-    upload_stager_config=WeaviateUploadStagerConfig,
-)
diff --git a/unstructured/ingest/v2/processes/embedder.py b/unstructured/ingest/v2/processes/embedder.py
deleted file mode 100644
index 6ed1c560c..000000000
--- a/unstructured/ingest/v2/processes/embedder.py
+++ /dev/null
@@ -1,82 +0,0 @@
-from abc import ABC
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Optional
-
-from unstructured.documents.elements import Element
-from unstructured.embed.interfaces import BaseEmbeddingEncoder
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field
-from unstructured.ingest.v2.interfaces.process import BaseProcess
-from unstructured.staging.base import elements_from_json
-
-
-@dataclass
-class EmbedderConfig(EnhancedDataClassJsonMixin):
-    embedding_provider: Optional[str] = None
-    embedding_api_key: Optional[str] = enhanced_field(default=None, sensitive=True)
-    embedding_model_name: Optional[str] = None
-    embedding_aws_access_key_id: Optional[str] = None
-    embedding_aws_secret_access_key: Optional[str] = None
-    embedding_aws_region: Optional[str] = None
-
-    def get_embedder(self) -> BaseEmbeddingEncoder:
-        kwargs: dict[str, Any] = {}
-        if self.embedding_api_key:
-            kwargs["api_key"] = self.embedding_api_key
-        if self.embedding_model_name:
-            kwargs["model_name"] = self.embedding_model_name
-        # TODO make this more dynamic to map to encoder configs
-        if self.embedding_provider == "langchain-openai":
-            from unstructured.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder
-
-            return OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig(**kwargs))
-        elif self.embedding_provider == "langchain-huggingface":
-            from unstructured.embed.huggingface import (
-                HuggingFaceEmbeddingConfig,
-                HuggingFaceEmbeddingEncoder,
-            )
-
-            return HuggingFaceEmbeddingEncoder(config=HuggingFaceEmbeddingConfig(**kwargs))
-        elif self.embedding_provider == "octoai":
-            from unstructured.embed.octoai import OctoAiEmbeddingConfig, OctoAIEmbeddingEncoder
-
-            return OctoAIEmbeddingEncoder(config=OctoAiEmbeddingConfig(**kwargs))
-        elif self.embedding_provider == "langchain-aws-bedrock":
-            from unstructured.embed.bedrock import BedrockEmbeddingConfig, BedrockEmbeddingEncoder
-
-            return BedrockEmbeddingEncoder(
-                config=BedrockEmbeddingConfig(
-                    aws_access_key_id=self.embedding_aws_access_key_id,
-                    aws_secret_access_key=self.embedding_aws_secret_access_key,
-                    region_name=self.embedding_aws_region,
-                )
-            )
-        elif self.embedding_provider == "langchain-vertexai":
-            from unstructured.embed.vertexai import (
-                VertexAIEmbeddingConfig,
-                VertexAIEmbeddingEncoder,
-            )
-
-            return VertexAIEmbeddingEncoder(config=VertexAIEmbeddingConfig(**kwargs))
-        elif self.embedding_provider == "mixedbread-ai":
-            from unstructured.embed.mixedbreadai import (
-                MixedbreadAIEmbeddingConfig,
-                MixedbreadAIEmbeddingEncoder,
-            )
-
-            return MixedbreadAIEmbeddingEncoder(config=MixedbreadAIEmbeddingConfig(**kwargs))
-        else:
-            raise ValueError(f"{self.embedding_provider} not a recognized encoder")
-
-
-@dataclass
-class Embedder(BaseProcess, ABC):
-    config: EmbedderConfig
-
-    def run(self, elements_filepath: Path, **kwargs: Any) -> list[Element]:
-        # TODO update base embedder classes to support async
-        embedder = self.config.get_embedder()
-        elements = elements_from_json(filename=str(elements_filepath))
-        if not elements:
-            return elements
-        return embedder.embed_documents(elements=elements)
diff --git a/unstructured/ingest/v2/processes/partitioner.py b/unstructured/ingest/v2/processes/partitioner.py
deleted file mode 100644
index 71bcd5700..000000000
--- a/unstructured/ingest/v2/processes/partitioner.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import asyncio
-from abc import ABC
-from dataclasses import dataclass, field, fields
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Optional
-
-from unstructured.documents.elements import DataSourceMetadata
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
-from unstructured.ingest.enhanced_dataclass.dataclasses import enhanced_field
-from unstructured.ingest.v2.interfaces.process import BaseProcess
-from unstructured.ingest.v2.logger import logger
-from unstructured.staging.base import elements_to_dicts, flatten_dict
-
-if TYPE_CHECKING:
-    from unstructured_client import UnstructuredClient
-    from unstructured_client.models.shared import PartitionParameters
-
-
-@dataclass
-class PartitionerConfig(EnhancedDataClassJsonMixin):
-    strategy: str = "auto"
-    ocr_languages: Optional[list[str]] = None
-    encoding: Optional[str] = None
-    additional_partition_args: Optional[dict[str, Any]] = None
-    skip_infer_table_types: Optional[list[str]] = None
-    fields_include: list[str] = field(
-        default_factory=lambda: ["element_id", "text", "type", "metadata", "embeddings"],
-    )
-    flatten_metadata: bool = False
-    metadata_exclude: list[str] = field(default_factory=list)
-    metadata_include: list[str] = field(default_factory=list)
-    partition_endpoint: Optional[str] = "https://api.unstructured.io/general/v0/general"
-    partition_by_api: bool = False
-    api_key: Optional[str] = enhanced_field(default=None, sensitive=True)
-    hi_res_model_name: Optional[str] = None
-
-    def __post_init__(self):
-        if self.metadata_exclude and self.metadata_include:
-            raise ValueError(
-                "metadata_exclude and metadata_include are "
-                "mutually exclusive with each other. Cannot specify both."
-            )
-
-    def to_partition_kwargs(self) -> dict[str, Any]:
-        partition_kwargs: dict[str, Any] = {
-            "strategy": self.strategy,
-            "languages": self.ocr_languages,
-            "hi_res_model_name": self.hi_res_model_name,
-            "skip_infer_table_types": self.skip_infer_table_types,
-        }
-        # Don't inject information if None and allow default values in method to be used
-        partition_kwargs = {k: v for k, v in partition_kwargs.items() if v is not None}
-        if self.additional_partition_args:
-            partition_kwargs.update(self.additional_partition_args)
-        return partition_kwargs
-
-
-@dataclass
-class Partitioner(BaseProcess, ABC):
-    config: PartitionerConfig
-
-    def is_async(self) -> bool:
-        return self.config.partition_by_api
-
-    def postprocess(self, elements: list[dict]) -> list[dict]:
-        element_dicts = [e.copy() for e in elements]
-        for elem in element_dicts:
-            if self.config.metadata_exclude:
-                ex_list = self.config.metadata_exclude
-                for ex in ex_list:
-                    if "." in ex:  # handle nested fields
-                        nested_fields = ex.split(".")
-                        current_elem = elem
-                        for f in nested_fields[:-1]:
-                            if f in current_elem:
-                                current_elem = current_elem[f]
-                        field_to_exclude = nested_fields[-1]
-                        if field_to_exclude in current_elem:
-                            current_elem.pop(field_to_exclude, None)
-                    else:  # handle top-level fields
-                        elem["metadata"].pop(ex, None)  # type: ignore[attr-defined]
-            elif self.config.metadata_include:
-                in_list = self.config.metadata_include
-                for k in list(elem["metadata"].keys()):  # type: ignore[attr-defined]
-                    if k not in in_list:
-                        elem["metadata"].pop(k, None)  # type: ignore[attr-defined]
-            in_list = self.config.fields_include
-            elem = {k: v for k, v in elem.items() if k in in_list}
-
-            if self.config.flatten_metadata and "metadata" in elem:
-                metadata = elem.pop("metadata")
-                elem.update(flatten_dict(metadata, keys_to_omit=["data_source_record_locator"]))
-        return element_dicts
-
-    def partition_locally(
-        self, filename: Path, metadata: Optional[DataSourceMetadata] = None, **kwargs
-    ) -> list[dict]:
-        from unstructured.partition.auto import partition
-
-        logger.debug(f"Using local partition with kwargs: {self.config.to_partition_kwargs()}")
-        logger.debug(f"partitioning file {filename} with metadata {metadata.to_dict()}")
-        elements = partition(
-            filename=str(filename.resolve()),
-            data_source_metadata=metadata,
-            **self.config.to_partition_kwargs(),
-        )
-        return self.postprocess(elements=elements_to_dicts(elements))
-
-    async def call_api(self, client: "UnstructuredClient", request: "PartitionParameters"):
-        # TODO when client supports async, run without using run_in_executor
-        # isolate the IO heavy call
-        loop = asyncio.get_event_loop()
-        return await loop.run_in_executor(None, client.general.partition, request)
-
-    def create_partition_parameters(self, filename: Path) -> "PartitionParameters":
-        from unstructured_client.models.shared import Files, PartitionParameters
-
-        partition_request = self.config.to_partition_kwargs()
-        possible_fields = [f.name for f in fields(PartitionParameters)]
-        filtered_partition_request = {
-            k: v for k, v in partition_request.items() if k in possible_fields
-        }
-        if len(filtered_partition_request) != len(partition_request):
-            logger.debug(
-                "Following fields were omitted due to not being "
-                "supported by the currently used unstructured client: {}".format(
-                    ", ".join([v for v in partition_request if v not in filtered_partition_request])
-                )
-            )
-        logger.debug(f"Using hosted partitioner with kwargs: {partition_request}")
-        with open(filename, "rb") as f:
-            files = Files(
-                content=f.read(),
-                file_name=str(filename.resolve()),
-            )
-            filtered_partition_request["files"] = files
-        partition_params = PartitionParameters(**filtered_partition_request)
-        return partition_params
-
-    async def partition_via_api(
-        self, filename: Path, metadata: Optional[DataSourceMetadata] = None, **kwargs
-    ) -> list[dict]:
-        from unstructured_client import UnstructuredClient
-
-        logger.debug(f"partitioning file {filename} with metadata: {metadata.to_dict()}")
-        client = UnstructuredClient(
-            server_url=self.config.partition_endpoint, api_key_auth=self.config.api_key
-        )
-        partition_params = self.create_partition_parameters(filename=filename)
-        resp = await self.call_api(client=client, request=partition_params)
-        elements = resp.elements or []
-        # Append the data source metadata the auto partition does for you
-        for element in elements:
-            element["metadata"]["data_source"] = metadata.to_dict()
-        return self.postprocess(elements=elements)
-
-    def run(
-        self, filename: Path, metadata: Optional[DataSourceMetadata] = None, **kwargs
-    ) -> list[dict]:
-        return self.partition_locally(filename, metadata=metadata, **kwargs)
-
-    async def run_async(
-        self, filename: Path, metadata: Optional[DataSourceMetadata] = None, **kwargs
-    ) -> list[dict]:
-        return await self.partition_via_api(filename, metadata=metadata, **kwargs)
diff --git a/unstructured/ingest/v2/processes/uncompress.py b/unstructured/ingest/v2/processes/uncompress.py
deleted file mode 100644
index e0b826461..000000000
--- a/unstructured/ingest/v2/processes/uncompress.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from abc import ABC
-from copy import copy
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any
-
-from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
-from unstructured.ingest.utils.compression import TAR_FILE_EXT, ZIP_FILE_EXT, uncompress_file
-from unstructured.ingest.v2.interfaces import FileData
-from unstructured.ingest.v2.interfaces.process import BaseProcess
-
-
-@dataclass
-class UncompressConfig(EnhancedDataClassJsonMixin):
-    pass
-
-
-@dataclass
-class Uncompressor(BaseProcess, ABC):
-    config: UncompressConfig = field(default_factory=UncompressConfig)
-
-    def is_async(self) -> bool:
-        return True
-
-    def run(self, file_data: FileData, **kwargs: Any) -> list[FileData]:
-        local_filepath = Path(file_data.source_identifiers.fullpath)
-        if local_filepath.suffix not in TAR_FILE_EXT + ZIP_FILE_EXT:
-            return [file_data]
-        new_path = uncompress_file(filename=str(local_filepath))
-        new_files = [i for i in Path(new_path).rglob("*") if i.is_file()]
-        responses = []
-        for f in new_files:
-            new_file_data = copy(file_data)
-            new_file_data.source_identifiers.fullpath = str(f)
-            if new_file_data.source_identifiers.rel_path:
-                new_file_data.source_identifiers.rel_path = str(f).replace(
-                    str(local_filepath.parent), ""
-                )[1:]
-            responses.append(new_file_data)
-        return responses
-
-    async def run_async(self, file_data: FileData, **kwargs: Any) -> list[FileData]:
-        return self.run(file_data=file_data, **kwargs)
diff --git a/unstructured/utils.py b/unstructured/utils.py
index 03632e37a..523fcd4a0 100644
--- a/unstructured/utils.py
+++ b/unstructured/utils.py
@@ -10,7 +10,6 @@ import platform
 import subprocess
 import tempfile
 import threading
-from datetime import datetime
 from functools import wraps
 from itertools import combinations
 from typing import (
@@ -238,36 +237,6 @@ def dependency_exists(dependency: str):
     return True
 
 
-def validate_date_args(date: Optional[str] = None) -> bool:
-    """Validate whether the provided date string satisfies any of the supported date formats.
-
-    Used by unstructured/ingest/connector/biomed.py
-
-    Returns `True` if the date string satisfies any of the supported formats, otherwise raises
-    `ValueError`.
-
-    Supported Date Formats:
-        - 'YYYY-MM-DD'
-        - 'YYYY-MM-DDTHH:MM:SS'
-        - 'YYYY-MM-DD+HH:MM:SS'
-        - 'YYYY-MM-DDTHH:MM:SS±HHMM'
-    """
-    if not date:
-        raise ValueError("The argument date is None.")
-
-    for format in DATE_FORMATS:
-        try:
-            datetime.strptime(date, format)
-            return True
-        except ValueError:
-            pass
-
-    raise ValueError(
-        f"The argument {date} does not satisfy the format:"
-        f" YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or YYYY-MM-DD+HH:MM:SS or YYYY-MM-DDTHH:MM:SS±HHMM",
-    )
-
-
 def _first_and_remaining_iterator(it: Iterable[_T]) -> Tuple[_T, Iterator[_T]]:
     iterator = iter(it)
     try: