From 0c562d80503f6ef96504c6e38f27cfd9da8761df Mon Sep 17 00:00:00 2001
From: Steve Canny <stcanny@gmail.com>
Date: Tue, 9 Jul 2024 22:29:07 -0700
Subject: [PATCH] rfctr(auto): fix auto-partition test xfails and skips (#3367)

**Summary**
Improve expression in auto-partition tests and fix xfails and skips. Add
issues for the two hard-fails where xfail needed to stay.
---
 CHANGELOG.md                             |    2 +-
 example-docs/simple.json                 |  127 +++
 example-docs/spring-weather.html.json    | 1124 ++++++++++++++++++++--
 test_unstructured/partition/test_auto.py |  411 ++++----
 unstructured/__version__.py              |    2 +-
 5 files changed, 1360 insertions(+), 306 deletions(-)
 create mode 100644 example-docs/simple.json

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 31420ba82..851fc2c5e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-## 0.14.11-dev4
+## 0.14.11-dev5
 
 ### Enhancements
 
diff --git a/example-docs/simple.json b/example-docs/simple.json
new file mode 100644
index 000000000..cd47b9e73
--- /dev/null
+++ b/example-docs/simple.json
@@ -0,0 +1,127 @@
+[
+    {
+        "element_id": "a06d2d9e65212d4aa955c3ab32950ffa",
+        "metadata": {
+            "category_depth": 0,
+            "file_directory": "unstructured/example-docs",
+            "filename": "simple.docx",
+            "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            "languages": [
+                "eng"
+            ],
+            "last_modified": "2024-07-06T16:44:51"
+        },
+        "text": "These are a few of my favorite things:",
+        "type": "Title"
+    },
+    {
+        "element_id": "b334c93e9b1cbca3b6f6d78ce8bc2484",
+        "metadata": {
+            "category_depth": 0,
+            "file_directory": "unstructured/example-docs",
+            "filename": "simple.docx",
+            "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            "languages": [
+                "eng"
+            ],
+            "last_modified": "2024-07-06T16:44:51",
+            "parent_id": "a06d2d9e65212d4aa955c3ab32950ffa"
+        },
+        "text": "Parrots",
+        "type": "ListItem"
+    },
+    {
+        "element_id": "76469ecb9f1459943c8d8cca1a550b5a",
+        "metadata": {
+            "category_depth": 0,
+            "file_directory": "unstructured/example-docs",
+            "filename": "simple.docx",
+            "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            "languages": [
+                "eng"
+            ],
+            "last_modified": "2024-07-06T16:44:51",
+            "parent_id": "a06d2d9e65212d4aa955c3ab32950ffa"
+        },
+        "text": "Hockey",
+        "type": "ListItem"
+    },
+    {
+        "element_id": "261fac731945a138415adc2dd4434b17",
+        "metadata": {
+            "category_depth": 0,
+            "file_directory": "unstructured/example-docs",
+            "filename": "simple.docx",
+            "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            "languages": [
+                "eng"
+            ],
+            "last_modified": "2024-07-06T16:44:51"
+        },
+        "text": "Analysis",
+        "type": "Title"
+    },
+    {
+        "element_id": "95f392d32c5271bfdb30eaef45921e59",
+        "metadata": {
+            "category_depth": 0,
+            "file_directory": "unstructured/example-docs",
+            "filename": "simple.docx",
+            "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            "languages": [
+                "eng"
+            ],
+            "last_modified": "2024-07-06T16:44:51",
+            "parent_id": "261fac731945a138415adc2dd4434b17"
+        },
+        "text": "This is my first thought. This is my second thought.",
+        "type": "NarrativeText"
+    },
+    {
+        "element_id": "0de25bd6f0d74bc4f909f2678f385736",
+        "metadata": {
+            "category_depth": 0,
+            "file_directory": "unstructured/example-docs",
+            "filename": "simple.docx",
+            "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            "languages": [
+                "eng"
+            ],
+            "last_modified": "2024-07-06T16:44:51",
+            "parent_id": "261fac731945a138415adc2dd4434b17"
+        },
+        "text": "This is my third thought.",
+        "type": "NarrativeText"
+    },
+    {
+        "element_id": "f296a3bc8a901f19199fda1da92829b6",
+        "metadata": {
+            "category_depth": 0,
+            "file_directory": "unstructured/example-docs",
+            "filename": "simple.docx",
+            "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            "languages": [
+                "eng"
+            ],
+            "last_modified": "2024-07-06T16:44:51",
+            "parent_id": "261fac731945a138415adc2dd4434b17"
+        },
+        "text": "2023",
+        "type": "UncategorizedText"
+    },
+    {
+        "element_id": "78c62edbc674fdca0f6a0e3ffb459f86",
+        "metadata": {
+            "category_depth": 0,
+            "file_directory": "unstructured/example-docs",
+            "filename": "simple.docx",
+            "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            "languages": [
+                "eng"
+            ],
+            "last_modified": "2024-07-06T16:44:51"
+        },
+        "text": "DOYLESTOWN, PA 18901",
+        "type": "Address"
+    }
+]
\ No newline at end of file
diff --git a/example-docs/spring-weather.html.json b/example-docs/spring-weather.html.json
index 1b3b4a980..591a796c9 100644
--- a/example-docs/spring-weather.html.json
+++ b/example-docs/spring-weather.html.json
@@ -1,226 +1,1178 @@
 [
   {
-    "element_id": "41f6e17bf5e9a407fcca74e902f802a0",
+    "type": "Title",
+    "element_id": "fb902c5b26b38e2d35a70a55d43a5de6",
     "text": "News Around NOAA",
-    "type": "Title",
     "metadata": {
-      "page_number": 1
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "aa589c25dc22dcc8a75baba1244e6c8f",
+    "type": "Title",
+    "element_id": "100233c72890df3d216e2bc2c36f7153",
     "text": "National Program",
-    "type": "Title",
     "metadata": {
-      "page_number": 1
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "62c26d2e16774d2334bd804c7bb6a711",
+    "type": "Title",
+    "element_id": "88f0bebe7a9cca77675bd8a5db823092",
     "text": "Are You Weather-Ready for the Spring?",
-    "type": "Title",
     "metadata": {
-      "page_number": 1
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "32709cd3bec72640bbbe32f58e6e23f6",
+    "type": "Title",
+    "element_id": "568c824acda361cfc270a75e2eca7a23",
     "text": "Weather.gov >",
-    "type": "Title",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "Weather.gov"
+      ],
+      "link_urls": [
+        "https://www.weather.gov"
+      ],
+      "link_start_indexes": [
+        -1
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "2661da76db570876b075083aaeeaee55",
+    "type": "Title",
+    "element_id": "767e68cdb3d891322eb8b65489f53b4c",
     "text": "News Around NOAA > Are You Weather-Ready for the Spring?",
-    "type": "Title",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "News Around NOAA"
+      ],
+      "link_urls": [
+        "https://www.weather.gov/news"
+      ],
+      "link_start_indexes": [
+        -1
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "fab6c4df083f0fb6f324fff65b652c86",
+    "type": "ListItem",
+    "element_id": "79fb885317b2666481d0a1c31970400d",
     "text": "Weather Safety                                                                                                        Air Quality                                                                            Beach Hazards                                                                            Cold                                                                            Cold Water                                                                            Drought                                                                            Floods                                                                            Fog                                                                            Heat                                                                             Hurricanes                                                                             Lightning Safety                                                                            Rip Currents                                                                            Safe Boating                                                                            Space Weather                                                                            Sun (Ultraviolet Radiation)                                                                             Thunderstorms & Tornadoes                                                                            Tornado                                                                            Tsunami                                                                            Wildfire                                                                            Wind                                                                            Winter",
-    "type": "ListItem",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "Weather Safety",
+        "Air Quality",
+        "Beach Hazards",
+        "Cold",
+        "Cold Water",
+        "Drought",
+        "Floods",
+        "Fog",
+        "Heat",
+        " Hurricanes",
+        " Lightning Safety",
+        "Rip Currents",
+        "Safe Boating",
+        "Space Weather",
+        "Sun (Ultraviolet Radiation)",
+        " Thunderstorms & Tornadoes",
+        "Tornado",
+        "Tsunami",
+        "Wildfire",
+        "Wind",
+        "Winter"
+      ],
+      "link_urls": [
+        "http://www.weather.gov/safetycampaign",
+        "https://www.weather.gov/safety/airquality",
+        "https://www.weather.gov/safety/beachhazards",
+        "https://www.weather.gov/safety/cold",
+        "https://www.weather.gov/safety/coldwater",
+        "https://www.weather.gov/safety/drought",
+        "https://www.weather.gov/safety/flood",
+        "https://www.weather.gov/safety/fog",
+        "https://www.weather.gov/safety/heat",
+        "https://www.weather.gov/safety/hurricane",
+        "https://www.weather.gov/safety/lightning",
+        "https://www.weather.gov/safety/ripcurrent",
+        "https://www.weather.gov/safety/safeboating",
+        "https://www.weather.gov/safety/space",
+        "https://www.weather.gov/safety/heat-uv",
+        "https://www.weather.gov/safety/thunderstorm",
+        "https://www.weather.gov/safety/tornado",
+        "https://www.weather.gov/safety/tsunami",
+        "https://www.weather.gov/safety/wildfire",
+        "https://www.weather.gov/safety/wind",
+        "https://www.weather.gov/safety/winter "
+      ],
+      "link_start_indexes": [
+        0,
+        14,
+        25,
+        38,
+        42,
+        52,
+        59,
+        65,
+        68,
+        72,
+        83,
+        100,
+        112,
+        124,
+        137,
+        164,
+        190,
+        197,
+        204,
+        212,
+        216
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "45c26cf3457e6d18985a435e2c0fcc65",
+    "type": "ListItem",
+    "element_id": "512e6a00cacb0ab139ede6b0145f441d",
     "text": "Safety Campaigns                                                                                                        Seasonal Safety Campaigns                                                                            #SafePlaceSelfie                                                                            Deaf & Hard of Hearing                                                                            Intellectual Disabilities                                                                            Spanish-language Content                                                                            The Great Outdoors",
-    "type": "ListItem",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "Safety Campaigns",
+        "Seasonal Safety Campaigns",
+        "#SafePlaceSelfie",
+        "Deaf & Hard of Hearing",
+        "Intellectual Disabilities",
+        "Spanish-language Content",
+        "The Great Outdoors"
+      ],
+      "link_urls": [
+        "https://www.weather.gov/safetycampaign",
+        "https://www.weather.gov/safetycampaign",
+        "https://www.weather.gov/wrn/safeplaceselfie",
+        "https://www.weather.gov/wrn/dhh-safety",
+        "https://www.weather.gov/wrn/intellectualdisabilities",
+        "https://www.weather.gov/wrn/fall2020-espanol-sm",
+        "https://www.noaa.gov/explainers/great-outdoors-weather-safety"
+      ],
+      "link_start_indexes": [
+        0,
+        16,
+        41,
+        57,
+        79,
+        104,
+        128
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "77f5acc603de9a165ed87a5c3fbaf14a",
-    "text": "Ambassador                                                                                                        About WRN Ambassadors                                                                            Become an Ambassador                                                                            Ambassadors of Excellence                                                                            People of WRN                                                                             FAQS                                                                            Tell Your Success Story                                                                             Success Stories                                                                            Tri-fold                                                                            Aviation                                                                             Current Ambassadors                                                                            Brochure                                                                            En Español",
     "type": "ListItem",
+    "element_id": "d4145282089e41261300a9bcf440edb9",
+    "text": "Ambassador                                                                                                        About WRN Ambassadors                                                                            Become an Ambassador                                                                            Ambassadors of Excellence                                                                            People of WRN                                                                             FAQS                                                                            Tell Your Success Story                                                                             Success Stories                                                                            Tri-fold                                                                            Aviation                                                                             Current Ambassadors                                                                            Brochure                                                                            En Espa\u00f1ol",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "Ambassador",
+        "About WRN Ambassadors",
+        "Become an Ambassador",
+        "Ambassadors of Excellence",
+        "People of WRN",
+        " FAQS",
+        "Tell Your Success Story",
+        " Success Stories",
+        "Tri-fold",
+        "Aviation",
+        " Current Ambassadors",
+        "Brochure",
+        "En Espa\u00f1ol"
+      ],
+      "link_urls": [
+        "https://www.weather.gov/wrn/ambassadors",
+        "https://www.weather.gov/wrn/ambassadors",
+        "https://www.weather.gov/wrn/amb-tou",
+        "https://www.weather.gov/wrn/ambassador_recognition",
+        "https://www.weather.gov/people/",
+        "https://www.weather.gov/wrn/amb-faqs",
+        "https://docs.google.com/forms/d/e/1FAIpQLScPHee5WAyC5K1LZ3pWLa2zjaM1HZSKN4_AxGUc6RaCy_gxLA/viewform",
+        " https://www.weather.gov/wrn/success-stories",
+        "http://www.weather.gov/media/wrn/WRN_Ambassador_Trifold.pdf",
+        "https://www.weather.gov/wrn/aviation",
+        " http://www.weather.gov/wrn/current-ambassadors",
+        "http://www.weather.gov/media/wrn/WRN_Ambassador_Flyer.pdf",
+        "https://www.weather.gov/wrn/en-espanol"
+      ],
+      "link_start_indexes": [
+        0,
+        10,
+        31,
+        51,
+        76,
+        89,
+        94,
+        117,
+        133,
+        141,
+        149,
+        169,
+        177
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "8f19bcaabbd1bafa5e9826ac69766c8b",
+    "type": "ListItem",
+    "element_id": "aeee9b1d3904eda123d21c851ce4747d",
     "text": "Education                                                                                                        NWS Education Home                                                                            Be A Force Of Nature                                                                            WRN Kids Flyer                                                                            Wireless Emergency Alerts                                                                            NOAA Weather Radio                                                                            Mobile Weather                                                                            Brochures                                                                            Hourly Weather Forecast                                                                            Citizen Science                                                                            Intellectual Disabilities",
-    "type": "ListItem",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "Education",
+        "NWS Education Home",
+        "Be A Force Of Nature",
+        "WRN Kids Flyer",
+        "Wireless Emergency Alerts",
+        "NOAA Weather Radio",
+        "Mobile Weather",
+        "Brochures",
+        "Hourly Weather Forecast",
+        "Citizen Science",
+        "Intellectual Disabilities"
+      ],
+      "link_urls": [
+        "http://www.weather.gov/owlie/",
+        "http://www.weather.gov/owlie/",
+        "https://www.weather.gov/wrn/force",
+        " http://www.weather.gov/media/owlie/nws_kids_fact_sheet2.pdf",
+        "https://www.weather.gov/wrn/wea",
+        "http://www.nws.noaa.gov/nwr/",
+        "https://www.weather.gov/wrn/mobile-phone",
+        "http://www.weather.gov/owlie/publication_brochures",
+        "https://www.weather.gov/wrn/hourly-weather-graph",
+        "http://www.weather.gov/media/wrn/citizen_science_page.pdf",
+        "https://www.weather.gov/wrn/intellectualdisabilities"
+      ],
+      "link_start_indexes": [
+        0,
+        9,
+        27,
+        47,
+        61,
+        86,
+        104,
+        118,
+        127,
+        150,
+        165
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "1245f9cf9e019713391e4ee3bac54a63",
-    "text": "Collaboration                                                                                                        Get Involved                                                                             Social Media                                                                            WRN Ambassadors ​                                                                            Enterprise Resources                                                                            StormReady                                                                            TsunamiReady                                                                            NWSChat (core partners only)                                                                            InteractiveNWS (iNWS) (core partners only)​                                                                            SKYWARN",
     "type": "ListItem",
+    "element_id": "752f5b846e4a24df6d62d9dc014e5aec",
+    "text": "Collaboration                                                                                                        Get Involved                                                                             Social Media                                                                            WRN Ambassadors \u200b                                                                            Enterprise Resources                                                                            StormReady                                                                            TsunamiReady                                                                            NWSChat (core partners only)                                                                            InteractiveNWS (iNWS) (core partners only)\u200b                                                                            SKYWARN",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "Collaboration",
+        "Get Involved ",
+        "Social Media",
+        "WRN Ambassadors \u200b",
+        "Enterprise Resources",
+        "StormReady",
+        "TsunamiReady",
+        "NWSChat (core partners only)",
+        "InteractiveNWS (iNWS) (core partners only)\u200b",
+        "SKYWARN"
+      ],
+      "link_urls": [
+        "https://www.weather.gov/wrn/collaborate",
+        "https://www.weather.gov/wrn/get-involved",
+        "http://www.weather.gov/socialmedia",
+        "https://www.weather.gov/wrn/ambassadors",
+        "https://www.weather.gov/enterprise/",
+        "http://www.weather.gov/stormready/",
+        "https://www.weather.gov/tsunamiready/",
+        "https://nwschat.weather.gov/",
+        "https://inws.ncep.noaa.gov/",
+        "https://www.weather.gov/SKYWARN"
+      ],
+      "link_start_indexes": [
+        0,
+        13,
+        26,
+        38,
+        55,
+        75,
+        85,
+        97,
+        125,
+        168
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "23dfa7f98424dbf86e00b3d500096dfa",
+    "type": "ListItem",
+    "element_id": "8729b5380b0f442c0512948bd18de66b",
     "text": "News & Events                                                                                                        Latest News                                                                            Calendar                                                                            Meetings & Workshops                                                                            NWS Aware Newsletter",
-    "type": "ListItem",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        " News & Events",
+        "Latest News",
+        "Calendar",
+        "Meetings & Workshops",
+        "NWS Aware Newsletter"
+      ],
+      "link_urls": [
+        "http://www.weather.gov/news/",
+        " http://www.weather.gov/news/",
+        "https://www.weather.gov/wrn/calendar",
+        " https://www.weather.gov/wrn/workshops",
+        "https://www.weather.gov/publications/aware"
+      ],
+      "link_start_indexes": [
+        0,
+        14,
+        25,
+        33,
+        53
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "93202df2ec7081b28b47901b5c287a5a",
+    "type": "ListItem",
+    "element_id": "ec0f9efa0e7de0d7bbf11f3b8fb2a1ca",
     "text": "International",
-    "type": "ListItem",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "International"
+      ],
+      "link_urls": [
+        "https://www.weather.gov/wrn/wrns"
+      ],
+      "link_start_indexes": [
+        0
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "e53d6a9c615bdf1a8d7b98a67cade488",
+    "type": "ListItem",
+    "element_id": "f17da617a620de011003a204ecf48752",
     "text": "About                                                                                                        Contact Us                                                                             What is WRN?                                                                             WRN FAQ                                                                            WRN Brochure                                                                            Hazard Simplification                                                                            IDSS Brochure                                                                            Roadmap                                                                            Strategic Plan                                                                            WRN International                                                                            Social Science",
-    "type": "ListItem",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "About",
+        "Contact Us",
+        " What is WRN?",
+        " WRN FAQ",
+        "WRN Brochure",
+        "Hazard Simplification",
+        "IDSS Brochure",
+        "Roadmap",
+        "Strategic Plan",
+        "WRN International",
+        "Social Science"
+      ],
+      "link_urls": [
+        "https://www.weather.gov/wrn/about",
+        " https://www.weather.gov/wrn/contact",
+        "https://www.weather.gov/wrn/about",
+        "https://www.weather.gov/wrn/faqs",
+        "http://www.weather.gov/media/wrn/WRN_Ambassador_Flyer.pdf",
+        "https://www.weather.gov/hazardsimplification/",
+        "https://www.weather.gov/media/wrn/2018-IDSS2-Pager.pdf",
+        "http://www.weather.gov/media/wrn/nws_wrn_roadmap_final_april17.pdf",
+        "https://www.weather.gov/media/wrn/NWS_Weather-Ready-Nation_Strategic_Plan_2019-2022.pdf",
+        " https://www.weather.gov/wrn/international",
+        "https://vlab.noaa.gov/web/nws-social-science"
+      ],
+      "link_start_indexes": [
+        0,
+        5,
+        15,
+        28,
+        36,
+        48,
+        69,
+        82,
+        89,
+        103,
+        120
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "6cbcf8c11f8c0781bd9ecc7f67169ff0",
-    "text": "The spring season is all about change – a rebirth both literally and figuratively. Even though the spring season doesn’t officially (astronomically, that is) begin until March 20 this year, climatologically, it starts March 1.",
     "type": "NarrativeText",
+    "element_id": "623c25f2247b125d6df5138a7c5ee153",
+    "text": "The spring season is all about change \u2013 a rebirth both literally and figuratively. Even though the spring season doesn\u2019t officially (astronomically, that is) begin until March 20 this year, climatologically, it starts March 1.",
     "metadata": {
-      "page_number": 1
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "7184168da442c6ef28553b274bf2be8f",
+    "type": "NarrativeText",
+    "element_id": "c8c953bd87e4571df8e6486e9c467861",
     "text": "As cold winter nights are replaced by the warmth of longer daylight hours, the National Weather Service invites you to do two important things that may save your life or the life of a loved one.",
-    "type": "NarrativeText",
     "metadata": {
-      "page_number": 1
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "f3be9748ecd68b20d706548129baa22d",
-    "text": "First, take steps to better prepare for the seasonal hazards weather can throw at you.\nThis could include a spring cleaning of your storm shelter or ensuring your emergency kit is fully stocked. Take a look at our infographics and social media posts to help you become “weather-ready.”",
     "type": "NarrativeText",
+    "element_id": "b6553aef4dc61e5d31e2e28426e56f0b",
+    "text": "First, take steps to better prepare for the seasonal hazards weather can throw at you.",
     "metadata": {
-      "page_number": 1
+      "emphasized_text_contents": [
+        "First, take steps to better prepare for the seasonal hazards weather can throw at you."
+      ],
+      "emphasized_text_tags": [
+        "strong"
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "126c3cd201fb259cfeabc6bffc0b5473",
-    "text": "Second, encourage others to become Weather-Ready as well. Share the message by taking advantage of our vast array of weather safety content – everything posted on our Spring Safety website is freely available, and we encourage sharing on social media networks. Also remember those who are most vulnerable, like an elderly family member or neighbor who might have limited mobility or is isolated. Reach out to those who are at higher risk of being impacted by extreme weather, and help them get prepared. This simple act of caring could become heroic.",
     "type": "NarrativeText",
+    "element_id": "ac246c4693669d08d274f628c3293a78",
+    "text": "This could include a spring cleaning of your storm shelter or ensuring your emergency kit is fully stocked. Take a look at our infographics and social media posts to help you become \u201cweather-ready.\u201d",
     "metadata": {
-      "page_number": 1
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "c1944fb037f3e1cb14969bc59a7dd9c2",
-    "text": "This spring, the campaign is focused on heat dangers. Heat illness and death can occur even in spring’s moderately warm weather. The majority of all heat-related deaths occur outside of heat waves and roughly a third of child hot car deaths occur outside of the summer months. Learn more by viewing the infographics that are now available.",
     "type": "NarrativeText",
+    "element_id": "d1fa2a66a4df9759bdf01f6f1ec51d8e",
+    "text": "Second, encourage others to become Weather-Ready as well. Share the message by taking advantage of our vast array of weather safety content \u2013 everything posted on our Spring Safety website is freely available, and we encourage sharing on social media networks. Also remember those who are most vulnerable, like an elderly family member or neighbor who might have limited mobility or is isolated. Reach out to those who are at higher risk of being impacted by extreme weather, and help them get prepared. This simple act of caring could become heroic.",
     "metadata": {
-      "page_number": 1
+      "emphasized_text_contents": [
+        "Second, encourage others to become Weather-Ready as well."
+      ],
+      "emphasized_text_tags": [
+        "strong"
+      ],
+      "link_texts": [
+        "Spring Safety website"
+      ],
+      "link_urls": [
+        "https://www.weather.gov/wrn/spring-safety"
+      ],
+      "link_start_indexes": [
+        167
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "fa1b939ef6159d95260bc095f58ebbc2",
+    "type": "NarrativeText",
+    "element_id": "996f1b86d1cb5a02028bd3816f5790f1",
+    "text": "This spring, the campaign is focused on heat dangers. Heat illness and death can occur even in spring\u2019s moderately warm weather. The majority of all heat-related deaths occur outside of heat waves and roughly a third of child hot car deaths occur outside of the summer months. Learn more by viewing the infographics that are now available.",
+    "metadata": {
+      "link_texts": [
+        "infographics"
+      ],
+      "link_urls": [
+        "https://www.weather.gov/wrn/spring-infographics"
+      ],
+      "link_start_indexes": [
+        303
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
+    }
+  },
+  {
+    "type": "NarrativeText",
+    "element_id": "90b31790a9b5fd903e6dbaea50e05f45",
     "text": "Stay safe this spring, and every season, by being informed, prepared, and Weather-Ready.",
-    "type": "NarrativeText",
     "metadata": {
-      "page_number": 1
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "47d5d0d27a35a36d7467dfc8b6e089b3",
-    "text": "US Dept of Commerce\n                        National Oceanic and Atmospheric Administration\n                        National Weather Service\n                        News Around NOAA1325 East West HighwaySilver Spring, MD 20910Comments? Questions? Please Contact Us.",
-    "type": "NarrativeText",
+    "type": "Title",
+    "element_id": "9dcf311a7e6225af9333100c709b7f23",
+    "text": "US Dept of Commerce",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "US Dept of Commerce"
+      ],
+      "link_urls": [
+        "http://www.commerce.gov"
+      ],
+      "link_start_indexes": [
+        -1
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "129c678fce59acee7ac6a6fdb67b6310",
+    "type": "Title",
+    "element_id": "60711b68cb732ecb10f4c05f0f784647",
+    "text": "National Oceanic and Atmospheric Administration",
+    "metadata": {
+      "link_texts": [
+        "National Oceanic and Atmospheric Administration"
+      ],
+      "link_urls": [
+        "http://www.noaa.gov"
+      ],
+      "link_start_indexes": [
+        -1
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
+    }
+  },
+  {
+    "type": "Title",
+    "element_id": "55ca4bf03b04ffacb8ea8cb528c22a6f",
+    "text": "National Weather Service",
+    "metadata": {
+      "link_texts": [
+        "National Weather Service"
+      ],
+      "link_urls": [
+        "https://www.weather.gov"
+      ],
+      "link_start_indexes": [
+        -1
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
+    }
+  },
+  {
+    "type": "Title",
+    "element_id": "3ebaebb5791662dfa6d2e2b8af436f9d",
+    "text": "News Around NOAA",
+    "metadata": {
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
+    }
+  },
+  {
+    "type": "Title",
+    "element_id": "ccf5cdb2984d2ac2d934010960d32aca",
+    "text": "1325 East West Highway",
+    "metadata": {
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
+    }
+  },
+  {
+    "type": "Address",
+    "element_id": "64a081cb854ff90dbc668c2b334d0ae8",
+    "text": "Silver Spring, MD 20910",
+    "metadata": {
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
+    }
+  },
+  {
+    "type": "Title",
+    "element_id": "6af532045e3aa6fe3764590594dc0dd7",
+    "text": "Comments? Questions? Please Contact Us.",
+    "metadata": {
+      "link_texts": [
+        "Comments? Questions? Please Contact Us."
+      ],
+      "link_urls": [
+        "https://www.weather.gov/news/contact"
+      ],
+      "link_start_indexes": [
+        -1
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
+    }
+  },
+  {
+    "type": "Title",
+    "element_id": "a63c69dcc655b1b32bc6157427e9ca8e",
     "text": "Disclaimer",
-    "type": "Title",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "Disclaimer"
+      ],
+      "link_urls": [
+        "https://www.weather.gov/disclaimer"
+      ],
+      "link_start_indexes": [
+        -1
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "3c96caaebd949e39d25b3ccf4133c5d8",
+    "type": "Title",
+    "element_id": "95054785187bcc0cf98cdb17c135ca1d",
     "text": "Information Quality",
-    "type": "Title",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "Information Quality"
+      ],
+      "link_urls": [
+        "http://www.cio.noaa.gov/services_programs/info_quality.html"
+      ],
+      "link_start_indexes": [
+        -1
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "b79cac926e0b2e347e72cc91d5174037",
+    "type": "Title",
+    "element_id": "800d660faa52732cd4d361b187bbd6e2",
     "text": "Help",
-    "type": "Title",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "Help"
+      ],
+      "link_urls": [
+        "https://www.weather.gov/help"
+      ],
+      "link_start_indexes": [
+        -1
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "4c4e436f9a453c776dbf011f98d932d6",
+    "type": "Title",
+    "element_id": "718284e0cdf275514b6aa8fb8976a7cc",
     "text": "Glossary",
-    "type": "Title",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "Glossary"
+      ],
+      "link_urls": [
+        "http://www.weather.gov/glossary"
+      ],
+      "link_start_indexes": [
+        -1
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "506ff394621596dd88138642eddfc1e4",
+    "type": "Title",
+    "element_id": "678ef3e5cd635ba851d2dfd7f6f20d0f",
     "text": "Privacy Policy",
-    "type": "Title",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "Privacy Policy"
+      ],
+      "link_urls": [
+        "https://www.weather.gov/privacy"
+      ],
+      "link_start_indexes": [
+        -1
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "c70ae8c30a61c450d2c5148d1b6a0447",
+    "type": "Title",
+    "element_id": "f66ad83bfffccef0afe60d0aaba55b54",
     "text": "Freedom of Information Act (FOIA)",
-    "type": "Title",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "Freedom of Information Act (FOIA)"
+      ],
+      "link_urls": [
+        "https://www.noaa.gov/foia-freedom-of-information-act"
+      ],
+      "link_start_indexes": [
+        -1
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "5d8c71abc527284cd463aa58f3f48098",
+    "type": "Title",
+    "element_id": "f50c4a988c7336b9d1100227fa7f03a3",
     "text": "About Us",
-    "type": "Title",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "About Us"
+      ],
+      "link_urls": [
+        "https://www.weather.gov/about"
+      ],
+      "link_start_indexes": [
+        -1
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   },
   {
-    "element_id": "a8a00c355d2fa1461d532a1088274f32",
-    "text": "Career Opportunities",
     "type": "Title",
+    "element_id": "a9a5f8ac29adb68999173b4e65a189bd",
+    "text": "Career Opportunities",
     "metadata": {
-      "page_number": 1
+      "link_texts": [
+        "Career Opportunities"
+      ],
+      "link_urls": [
+        "https://www.weather.gov/careers"
+      ],
+      "link_start_indexes": [
+        -1
+      ],
+      "languages": [
+        "eng"
+      ],
+      "filetype": "text/html",
+      "data_source": {
+        "url": "abfs://container1/spring-weather.html",
+        "version": "162215905222974206637545574128436022861",
+        "record_locator": {
+          "protocol": "abfs",
+          "remote_file_path": "abfs://container1/"
+        },
+        "date_created": "1678441216.0",
+        "date_modified": "1678441216.0"
+      }
     }
   }
-]
+]
\ No newline at end of file
diff --git a/test_unstructured/partition/test_auto.py b/test_unstructured/partition/test_auto.py
index 64de49f36..6752e222f 100644
--- a/test_unstructured/partition/test_auto.py
+++ b/test_unstructured/partition/test_auto.py
@@ -5,6 +5,7 @@ from __future__ import annotations
 import json
 import os
 import pathlib
+import sys
 import tempfile
 import warnings
 from importlib import import_module
@@ -51,19 +52,7 @@ from unstructured.partition import auto
 from unstructured.partition.auto import _get_partition_with_extras, partition
 from unstructured.partition.common import convert_office_doc
 from unstructured.partition.utils.constants import PartitionStrategy
-from unstructured.staging.base import elements_to_json
-
-DIRECTORY = pathlib.Path(__file__).parent.resolve()
-EXAMPLE_DOCS_DIRECTORY = os.path.join(DIRECTORY, "..", "..", "example-docs")
-
-EXPECTED_EMAIL_OUTPUT = [
-    NarrativeText(text="This is a test email to use for unit tests."),
-    Title(text="Important points:"),
-    ListItem(text="Roses are red"),
-    ListItem(text="Violets are blue"),
-]
-
-EML_TEST_FILE = "eml/fake-email.eml"
+from unstructured.staging.base import elements_from_json, elements_to_dicts, elements_to_json
 
 is_in_docker = os.path.exists("/.dockerenv")
 
@@ -98,7 +87,6 @@ def test_auto_partition_csv_from_file():
 # ================================================================================================
 
 
-@pytest.mark.skipif(is_in_docker, reason="Skipping this test in Docker container")
 @pytest.mark.parametrize(
     ("pass_metadata_filename", "content_type"),
     [(False, None), (False, "application/msword"), (True, "application/msword"), (True, None)],
@@ -126,20 +114,30 @@ def test_auto_partition_doc_with_filename(
     assert elements[0].metadata.file_directory == str(tmp_path)
 
 
-# NOTE(robinson) - the application/x-ole-storage mime type is not specific enough to
-# determine that the file is an .doc document
-@pytest.mark.xfail()
-def test_auto_partition_doc_with_file(
-    mock_docx_document: Document, expected_docx_elements: list[Element], tmp_path: pathlib.Path
-):
-    docx_filename = str(tmp_path / "mock_document.docx")
-    doc_filename = str(tmp_path / "mock_document.doc")
-    mock_docx_document.save(docx_filename)
-    convert_office_doc(docx_filename, str(tmp_path), "doc")
+@pytest.mark.skipif(is_in_docker, reason="Passes in CI but not Docker. Remove skip on #3364 fix.")
+@pytest.mark.xfail(sys.platform == "darwin", reason="#3364", raises=KeyError, strict=True)
+def test_auto_partition_doc_with_file():
+    # -- NOTE(scanny): https://github.com/Unstructured-IO/unstructured/issues/3364
+    # -- detect_filetype() identifies .doc as `application/x-ole-storage` which is true but not
+    # -- specific enough. The `FileType.MSG` file-type is assigned (which is also an OLE file)
+    # -- and `partition()` routes the document to `partition_msg` which is where the `KeyError`
+    # -- comes from.
+    # -- For some reason, this xfail problem only occurs locally, not in CI, possibly because we
+    # -- use two different `libmagic` sourcs (`libmagic` on CI and `libmagic1` on Mac). Doesn't
+    # -- matter much though because when we add disambiguation they'll both get it right.
+    with open(example_doc_path("simple.doc"), "rb") as f:
+        elements = partition(file=f)
 
-    with open(doc_filename, "rb") as f:
-        elements = partition(file=f, strategy=PartitionStrategy.HI_RES)
-    assert elements == expected_docx_elements
+    assert elements == [
+        Title("These are a few of my favorite things:"),
+        ListItem("Parrots"),
+        ListItem("Hockey"),
+        Title("Analysis"),
+        NarrativeText("This is my first thought. This is my second thought."),
+        NarrativeText("This is my third thought."),
+        Text("2023"),
+        Address("DOYLESTOWN, PA 18901"),
+    ]
 
 
 # ================================================================================================
@@ -184,21 +182,21 @@ def expected_docx_elements():
 def test_auto_partition_docx_with_filename(
     mock_docx_document: Document, expected_docx_elements: list[Element], tmp_path: pathlib.Path
 ):
-    filename = str(tmp_path / "mock_document.docx")
-    mock_docx_document.save(filename)
+    file_path = str(tmp_path / "mock_document.docx")
+    mock_docx_document.save(file_path)
 
-    elements = partition(filename=filename, strategy=PartitionStrategy.HI_RES)
+    elements = partition(filename=file_path, strategy=PartitionStrategy.HI_RES)
     assert elements == expected_docx_elements
-    assert elements[0].metadata.filename == os.path.basename(filename)
+    assert elements[0].metadata.filename == os.path.basename(file_path)
 
 
 def test_auto_partition_docx_with_file(
     mock_docx_document: Document, expected_docx_elements: list[Element], tmp_path: pathlib.Path
 ):
-    filename = str(tmp_path / "mock_document.docx")
-    mock_docx_document.save(filename)
+    file_path = str(tmp_path / "mock_document.docx")
+    mock_docx_document.save(file_path)
 
-    with open(filename, "rb") as f:
+    with open(file_path, "rb") as f:
         elements = partition(file=f, strategy=PartitionStrategy.HI_RES)
     assert elements == expected_docx_elements
 
@@ -246,34 +244,32 @@ def test_partition_forwards_strategy_arg_to_partition_docx_and_its_brokers(
 # EML
 # ================================================================================================
 
+EXPECTED_EMAIL_OUTPUT = [
+    NarrativeText(text="This is a test email to use for unit tests."),
+    Title(text="Important points:"),
+    ListItem(text="Roses are red"),
+    ListItem(text="Violets are blue"),
+]
+
 
 def test_auto_partition_email_from_filename():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, EML_TEST_FILE)
-    elements = partition(filename=filename, strategy=PartitionStrategy.HI_RES)
+    file_path = example_doc_path("eml/fake-email.eml")
+    elements = partition(file_path, strategy=PartitionStrategy.HI_RES)
     assert len(elements) > 0
     assert elements == EXPECTED_EMAIL_OUTPUT
-    assert elements[0].metadata.filename == os.path.basename(filename)
-    assert elements[0].metadata.file_directory == os.path.split(filename)[0]
+    assert elements[0].metadata.filename == os.path.basename(file_path)
+    assert elements[0].metadata.file_directory == os.path.split(file_path)[0]
 
 
 def test_auto_partition_email_from_file():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, EML_TEST_FILE)
-    with open(filename, "rb") as f:
-        elements = partition(file=f, strategy=PartitionStrategy.HI_RES)
-    assert len(elements) > 0
-    assert elements == EXPECTED_EMAIL_OUTPUT
-
-
-def test_auto_partition_email_from_file_rb():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, EML_TEST_FILE)
-    with open(filename, "rb") as f:
+    with open(example_doc_path("eml/fake-email.eml"), "rb") as f:
         elements = partition(file=f, strategy=PartitionStrategy.HI_RES)
     assert len(elements) > 0
     assert elements == EXPECTED_EMAIL_OUTPUT
 
 
 def test_auto_partition_eml_add_signature_to_metadata():
-    elements = partition(filename="example-docs/eml/signed-doc.p7s")
+    elements = partition(example_doc_path("eml/signed-doc.p7s"))
     assert len(elements) == 1
     assert elements[0].text == "This is a test"
     assert elements[0].metadata.signature == "<SIGNATURE>\n"
@@ -285,15 +281,13 @@ def test_auto_partition_eml_add_signature_to_metadata():
 
 
 def test_auto_partition_epub_from_filename():
-    filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
-    elements = partition(filename=filename, strategy=PartitionStrategy.HI_RES)
+    elements = partition(example_doc_path("winter-sports.epub"), strategy=PartitionStrategy.HI_RES)
     assert len(elements) > 0
     assert elements[0].text.startswith("The Project Gutenberg eBook of Winter Sports")
 
 
 def test_auto_partition_epub_from_file():
-    filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
-    with open(filename, "rb") as f:
+    with open(example_doc_path("winter-sports.epub"), "rb") as f:
         elements = partition(file=f, strategy=PartitionStrategy.HI_RES)
     assert len(elements) > 0
     assert elements[0].text.startswith("The Project Gutenberg eBook of Winter Sports")
@@ -309,17 +303,17 @@ def test_auto_partition_epub_from_file():
     [(False, None), (False, "text/html"), (True, "text/html"), (True, None)],
 )
 def test_auto_partition_html_from_filename(pass_metadata_filename: bool, content_type: str | None):
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "example-10k.html")
-    metadata_filename = filename if pass_metadata_filename else None
+    file_path = example_doc_path("example-10k.html")
+    metadata_filename = file_path if pass_metadata_filename else None
     elements = partition(
-        filename=filename,
+        filename=file_path,
         metadata_filename=metadata_filename,
         content_type=content_type,
         strategy=PartitionStrategy.HI_RES,
     )
     assert len(elements) > 0
-    assert elements[0].metadata.filename == os.path.basename(filename)
-    assert elements[0].metadata.file_directory == os.path.split(filename)[0]
+    assert elements[0].metadata.filename == os.path.basename(file_path)
+    assert elements[0].metadata.file_directory == os.path.split(file_path)[0]
 
 
 @pytest.mark.parametrize(
@@ -327,9 +321,9 @@ def test_auto_partition_html_from_filename(pass_metadata_filename: bool, content
     [(False, None), (False, "text/html"), (True, "text/html"), (True, None)],
 )
 def test_auto_partition_html_from_file(pass_metadata_filename: bool, content_type: str | None):
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-html.html")
-    metadata_filename = filename if pass_metadata_filename else None
-    with open(filename, "rb") as f:
+    file_path = example_doc_path("fake-html.html")
+    metadata_filename = file_path if pass_metadata_filename else None
+    with open(file_path, "rb") as f:
         elements = partition(
             file=f,
             metadata_filename=metadata_filename,
@@ -340,8 +334,7 @@ def test_auto_partition_html_from_file(pass_metadata_filename: bool, content_typ
 
 
 def test_auto_partition_html_from_file_rb():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-html.html")
-    with open(filename, "rb") as f:
+    with open(example_doc_path("fake-html.html"), "rb") as f:
         elements = partition(file=f, strategy=PartitionStrategy.HI_RES)
     assert len(elements) > 0
 
@@ -367,10 +360,10 @@ def test_auto_partition_html_pre_from_file():
     [(False, None), (False, "image/jpeg"), (True, "image/jpeg"), (True, None)],
 )
 def test_auto_partition_image(pass_metadata_filename: bool, content_type: str | None):
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "layout-parser-paper-fast.jpg")
-    metadata_filename = filename if pass_metadata_filename else None
+    file_path = example_doc_path("layout-parser-paper-fast.jpg")
+    metadata_filename = file_path if pass_metadata_filename else None
     elements = partition(
-        filename=filename,
+        filename=file_path,
         metadata_filename=metadata_filename,
         content_type=content_type,
         strategy=PartitionStrategy.AUTO,
@@ -405,10 +398,10 @@ def test_auto_partition_image_element_extraction(extract_image_block_to_payload:
     [(False, None), (False, "image/jpeg"), (True, "image/jpeg"), (True, None)],
 )
 def test_auto_partition_jpg(pass_metadata_filename: bool, content_type: str | None):
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "layout-parser-paper-fast.jpg")
-    metadata_filename = filename if pass_metadata_filename else None
+    file_path = example_doc_path("layout-parser-paper-fast.jpg")
+    metadata_filename = file_path if pass_metadata_filename else None
     elements = partition(
-        filename=filename,
+        filename=file_path,
         metadata_filename=metadata_filename,
         content_type=content_type,
         strategy=PartitionStrategy.AUTO,
@@ -421,9 +414,9 @@ def test_auto_partition_jpg(pass_metadata_filename: bool, content_type: str | No
     [(False, None), (False, "image/jpeg"), (True, "image/jpeg"), (True, None)],
 )
 def test_auto_partition_jpg_from_file(pass_metadata_filename: bool, content_type: str | None):
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "layout-parser-paper-fast.jpg")
-    metadata_filename = filename if pass_metadata_filename else None
-    with open(filename, "rb") as f:
+    file_path = example_doc_path("layout-parser-paper-fast.jpg")
+    metadata_filename = file_path if pass_metadata_filename else None
+    with open(file_path, "rb") as f:
         elements = partition(
             file=f,
             metadata_filename=metadata_filename,
@@ -454,19 +447,10 @@ def test_partition_image_with_bmp_with_auto(tmp_path: pathlib.Path):
 # ================================================================================================
 
 
-# NOTE(robinson) - skipping this test with docker image to avoid putting the
-# test fixtures into the image
-@pytest.mark.skipif(is_in_docker, reason="Skipping this test in Docker container")
 def test_auto_partitioned_json_output_maintains_consistency_with_fixture_elements():
     """Test auto-processing an unstructured json output file by filename."""
+    json_file_path = example_doc_path("spring-weather.html.json")
     original_file_name = "spring-weather.html"
-    json_file_path = (
-        pathlib.Path(DIRECTORY).parents[1]
-        / "test_unstructured_ingest"
-        / "expected-structured-output"
-        / "azure"
-        / f"{original_file_name}.json"
-    )
     with open(json_file_path) as json_f:
         expected_result = json.load(json_f)
 
@@ -495,52 +479,41 @@ def test_auto_partition_json_raises_with_unprocessable_json(tmp_path: pathlib.Pa
     # per the Unstructured ISD format
     text = '{"hi": "there"}'
 
-    filename = str(tmp_path / "unprocessable.json")
-    with open(filename, "w") as f:
+    file_path = str(tmp_path / "unprocessable.json")
+    with open(file_path, "w") as f:
         f.write(text)
 
     with pytest.raises(ValueError):
-        partition(filename=filename)
+        partition(filename=file_path)
 
 
 @pytest.mark.xfail(
-    reason="parsed as text not json, https://github.com/Unstructured-IO/unstructured/issues/492",
+    reason=(
+        "https://github.com/Unstructured-IO/unstructured/issues/3365"
+        " partition_json() does not preserve original element-id or metadata"
+    ),
+    raises=AssertionError,
+    strict=True,
 )
-def test_auto_partition_json_from_file():
-    """Test auto-processing an unstructured json output file by file handle."""
-    filename = os.path.join(
-        EXAMPLE_DOCS_DIRECTORY,
-        "..",
-        "test_unstructured_ingest",
-        "expected-structured-output",
-        "azure-blob-storage",
-        "spring-weather.html.json",
-    )
-    with open(filename) as json_f:
-        json_data = json.load(json_f)
-    with open(filename, "rb") as partition_f:
-        json_elems = json.loads(
-            cast(
-                str,
-                elements_to_json(partition(file=partition_f, strategy=PartitionStrategy.HI_RES)),
-            )
-        )
-    for elem in json_elems:
-        # coordinates are always in the element data structures, even if None
-        elem.pop("coordinates")
-        elem.pop("coordinate_system")
-    assert json_data == json_elems
+def test_auto_partition_json_from_file_preserves_original_elements():
+    file_path = example_doc_path("simple.json")
+    original_elements = elements_from_json(file_path)
+
+    with open(file_path, "rb") as f:
+        partitioned_elements = partition(file=f)
+
+    assert elements_to_dicts(partitioned_elements) == elements_to_dicts(original_elements)
 
 
 def test_auto_partition_works_with_unstructured_jsons():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "spring-weather.html.json")
-    elements = partition(filename=filename, strategy=PartitionStrategy.HI_RES)
+    elements = partition(
+        example_doc_path("spring-weather.html.json"), strategy=PartitionStrategy.HI_RES
+    )
     assert elements[0].text == "News Around NOAA"
 
 
 def test_auto_partition_works_with_unstructured_jsons_from_file():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "spring-weather.html.json")
-    with open(filename, "rb") as f:
+    with open(example_doc_path("spring-weather.html.json"), "rb") as f:
         elements = partition(file=f, strategy=PartitionStrategy.HI_RES)
     assert elements[0].text == "News Around NOAA"
 
@@ -570,8 +543,7 @@ EXPECTED_MSG_OUTPUT = [
 
 
 def test_auto_partition_msg_from_filename():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-email.msg")
-    elements = partition(filename=filename, strategy=PartitionStrategy.HI_RES)
+    elements = partition(example_doc_path("fake-email.msg"), strategy=PartitionStrategy.HI_RES)
     assert elements == EXPECTED_MSG_OUTPUT
 
 
@@ -581,14 +553,12 @@ def test_auto_partition_msg_from_filename():
 
 
 def test_auto_partition_odt_from_filename():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt")
-    elements = partition(filename=filename, strategy=PartitionStrategy.HI_RES)
+    elements = partition(example_doc_path("fake.odt"), strategy=PartitionStrategy.HI_RES)
     assert elements[0] == Title("Lorem ipsum dolor sit amet.")
 
 
 def test_auto_partition_odt_from_file():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt")
-    with open(filename, "rb") as f:
+    with open(example_doc_path("fake.odt"), "rb") as f:
         elements = partition(file=f, strategy=PartitionStrategy.HI_RES)
 
     assert elements[0] == Title("Lorem ipsum dolor sit amet.")
@@ -623,54 +593,56 @@ def test_auto_partition_org_from_file():
     ("pass_metadata_filename", "content_type"),
     [(False, None), (False, "application/pdf"), (True, "application/pdf"), (True, None)],
 )
-def test_auto_partition_pdf_from_filename(
-    request: FixtureRequest, pass_metadata_filename: bool, content_type: str | None
-):
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "layout-parser-paper-fast.pdf")
-    metadata_filename = filename if pass_metadata_filename else None
+def test_auto_partition_pdf_from_filename(pass_metadata_filename: bool, content_type: str | None):
+    file_path = example_doc_path("layout-parser-paper-fast.pdf")
+    metadata_filename = file_path if pass_metadata_filename else None
 
     elements = partition(
-        filename=filename,
+        filename=file_path,
         metadata_filename=metadata_filename,
         content_type=content_type,
         strategy=PartitionStrategy.HI_RES,
     )
 
-    # NOTE(alan): Xfail since new model skips the word Zejiang
-    request.applymarker(pytest.mark.xfail)
+    # NOTE(scanny): gave up trying to figure out why, but this file partitions differently locally
+    # (on Mac) than it does in CI. Basically the first element when partitioning locally is split
+    # in two when partitioning on CI. Other than that split the text is exactly the same.
+    idx = 2 if sys.platform == "darwin" else 3
 
-    idx = 3
-    assert isinstance(elements[idx], Title)
-    assert elements[idx].text.startswith("LayoutParser")
+    e = elements[idx]
+    assert isinstance(e, Title)
+    assert e.text.startswith("LayoutParser")
+    assert e.metadata.filename == os.path.basename(file_path)
+    assert e.metadata.file_directory == os.path.split(file_path)[0]
 
-    assert elements[idx].metadata.filename == os.path.basename(filename)
-    assert elements[idx].metadata.file_directory == os.path.split(filename)[0]
-
-    idx += 1
-    assert isinstance(elements[idx], NarrativeText)
-    assert elements[idx].text.startswith("Zejiang Shen")
+    e = elements[idx + 1]
+    assert isinstance(e, NarrativeText)
+    assert e.text.startswith("Zejiang Shen")
 
 
 def test_auto_partition_pdf_uses_table_extraction():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "layout-parser-paper-fast.pdf")
     with patch(
         "unstructured.partition.pdf_image.ocr.process_file_with_ocr",
     ) as mock_process_file_with_model:
-        partition(filename, pdf_infer_table_structure=True, strategy=PartitionStrategy.HI_RES)
+        partition(
+            example_doc_path("layout-parser-paper-fast.pdf"),
+            pdf_infer_table_structure=True,
+            strategy=PartitionStrategy.HI_RES,
+        )
         assert mock_process_file_with_model.call_args[1]["infer_table_structure"]
 
 
 def test_auto_partition_pdf_with_fast_strategy(monkeypatch: MonkeyPatch):
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "layout-parser-paper-fast.pdf")
+    file_path = example_doc_path("layout-parser-paper-fast.pdf")
 
     mock_return = [NarrativeText("Hello there!")]
     with patch.object(auto, "partition_pdf", return_value=mock_return) as mock_partition:
         mock_partition_with_extras_map = {"pdf": mock_partition}
         monkeypatch.setattr(auto, "PARTITION_WITH_EXTRAS_MAP", mock_partition_with_extras_map)
-        partition(filename=filename, strategy=PartitionStrategy.FAST)
+        partition(filename=file_path, strategy=PartitionStrategy.FAST)
 
     mock_partition.assert_called_once_with(
-        filename=filename,
+        filename=file_path,
         file=None,
         url=None,
         strategy=PartitionStrategy.FAST,
@@ -692,13 +664,11 @@ def test_auto_partition_pdf_with_fast_strategy(monkeypatch: MonkeyPatch):
     ("pass_metadata_filename", "content_type"),
     [(False, None), (False, "application/pdf"), (True, "application/pdf"), (True, None)],
 )
-def test_auto_partition_pdf_from_file(
-    request: FixtureRequest, pass_metadata_filename: bool, content_type: str | None
-):
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "layout-parser-paper-fast.pdf")
-    metadata_filename = filename if pass_metadata_filename else None
+def test_auto_partition_pdf_from_file(pass_metadata_filename: bool, content_type: str | None):
+    file_path = example_doc_path("layout-parser-paper-fast.pdf")
+    metadata_filename = file_path if pass_metadata_filename else None
 
-    with open(filename, "rb") as f:
+    with open(file_path, "rb") as f:
         elements = partition(
             file=f,
             metadata_filename=metadata_filename,
@@ -706,27 +676,28 @@ def test_auto_partition_pdf_from_file(
             strategy=PartitionStrategy.HI_RES,
         )
 
-    # NOTE(alan): Xfail since new model skips the word Zejiang
-    request.applymarker(pytest.mark.xfail)
+    # NOTE(scanny): see "with_filename" version of this test above for more on this oddness
+    idx = 2 if sys.platform == "darwin" else 3
 
-    idx = 3
-    assert isinstance(elements[idx], Title)
-    assert elements[idx].text.startswith("LayoutParser")
+    e = elements[idx]
+    assert isinstance(e, Title)
+    assert e.text.startswith("LayoutParser")
 
-    idx += 1
-    assert isinstance(elements[idx], NarrativeText)
-    assert elements[idx].text.startswith("Zejiang Shen")
+    e = elements[idx + 1]
+    assert isinstance(e, NarrativeText)
+    assert e.text.startswith("Zejiang Shen")
 
 
 def test_partition_pdf_does_not_raise_warning():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "layout-parser-paper-fast.pdf")
     # NOTE(robinson): This is the recommended way to check that no warning is emitted,
     # per the pytest docs.
     # ref: https://docs.pytest.org/en/7.0.x/how-to/capture-warnings.html
     #      #additional-use-cases-of-warnings-in-tests
     with warnings.catch_warnings():
         warnings.simplefilter("error")
-        partition(filename=filename, strategy=PartitionStrategy.HI_RES)
+        partition(
+            example_doc_path("layout-parser-paper-fast.pdf"), strategy=PartitionStrategy.HI_RES
+        )
 
 
 @pytest.mark.parametrize("extract_image_block_to_payload", [False, True])
@@ -753,11 +724,11 @@ def test_auto_partition_pdf_element_extraction(extract_image_block_to_payload: b
 
 @pytest.mark.skipif(is_in_docker, reason="Skipping this test in Docker container")
 def test_auto_partition_ppt_from_filename():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-power-point.ppt")
-    elements = partition(filename=filename, strategy=PartitionStrategy.HI_RES)
+    file_path = example_doc_path("fake-power-point.ppt")
+    elements = partition(file_path, strategy=PartitionStrategy.HI_RES)
     assert elements == EXPECTED_PPTX_OUTPUT
-    assert elements[0].metadata.filename == os.path.basename(filename)
-    assert elements[0].metadata.file_directory == os.path.split(filename)[0]
+    assert elements[0].metadata.filename == os.path.basename(file_path)
+    assert elements[0].metadata.file_directory == os.path.split(file_path)[0]
 
 
 # ================================================================================================
@@ -776,11 +747,11 @@ EXPECTED_PPTX_OUTPUT = [
 
 
 def test_auto_partition_pptx_from_filename():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-power-point.pptx")
-    elements = partition(filename=filename, strategy=PartitionStrategy.HI_RES)
+    file_path = example_doc_path("fake-power-point.pptx")
+    elements = partition(file_path, strategy=PartitionStrategy.HI_RES)
     assert elements == EXPECTED_PPTX_OUTPUT
-    assert elements[0].metadata.filename == os.path.basename(filename)
-    assert elements[0].metadata.file_directory == os.path.split(filename)[0]
+    assert elements[0].metadata.filename == os.path.basename(file_path)
+    assert elements[0].metadata.file_directory == os.path.split(file_path)[0]
 
 
 @pytest.mark.parametrize("file_name", ["simple.pptx", "fake-power-point.ppt"])
@@ -848,8 +819,7 @@ def test_auto_partition_rst_from_file():
 
 
 def test_auto_partition_rtf_from_filename():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-doc.rtf")
-    elements = partition(filename=filename, strategy=PartitionStrategy.HI_RES)
+    elements = partition(example_doc_path("fake-doc.rtf"), strategy=PartitionStrategy.HI_RES)
     assert elements[0] == Title("My First Heading")
 
 
@@ -883,17 +853,16 @@ EXPECTED_TEXT_OUTPUT = [
 
 
 def test_auto_partition_text_from_filename():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-text.txt")
-    elements = partition(filename=filename, strategy=PartitionStrategy.HI_RES)
+    file_path = example_doc_path("fake-text.txt")
+    elements = partition(filename=file_path, strategy=PartitionStrategy.HI_RES)
     assert len(elements) > 0
     assert elements == EXPECTED_TEXT_OUTPUT
-    assert elements[0].metadata.filename == os.path.basename(filename)
-    assert elements[0].metadata.file_directory == os.path.split(filename)[0]
+    assert elements[0].metadata.filename == os.path.basename(file_path)
+    assert elements[0].metadata.file_directory == os.path.split(file_path)[0]
 
 
 def test_auto_partition_text_from_file():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-text.txt")
-    with open(filename, "rb") as f:
+    with open(example_doc_path("fake-text.txt"), "rb") as f:
         elements = partition(file=f, strategy=PartitionStrategy.HI_RES)
     assert len(elements) > 0
     assert elements == EXPECTED_TEXT_OUTPUT
@@ -903,10 +872,8 @@ def test_auto_partition_text_from_file():
 # XLS
 # ================================================================================================
 
-
 EXPECTED_XLS_TEXT_LEN = 550
 
-
 EXPECTED_XLS_INITIAL_45_CLEAN_TEXT = "MC What is 2+2? 4 correct 3 incorrect MA What"
 
 EXPECTED_XLS_TABLE = (
@@ -1054,7 +1021,7 @@ def test_auto_partition_xlsx_from_file():
 
 
 def test_auto_partition_respects_starting_page_number_argument_for_xlsx():
-    elements = partition("example-docs/stanley-cups.xlsx", starting_page_number=3)
+    elements = partition(example_doc_path("stanley-cups.xlsx"), starting_page_number=3)
     assert elements[1].metadata.page_number == 3
 
 
@@ -1140,9 +1107,10 @@ def test_auto_partition_from_url_without_providing_content_type():
 
 
 def test_auto_partition_warns_if_header_set_and_not_url(caplog: LogCaptureFixture):
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, EML_TEST_FILE)
     partition(
-        filename=filename, headers={"Accept": "application/pdf"}, strategy=PartitionStrategy.HI_RES
+        example_doc_path("eml/fake-email.eml"),
+        headers={"Accept": "application/pdf"},
+        strategy=PartitionStrategy.HI_RES,
     )
     assert caplog.records[0].levelname == "WARNING"
 
@@ -1169,22 +1137,22 @@ def test_partition_timeout_gets_routed():
 
 
 def test_add_chunking_strategy_on_partition_auto():
-    filename = "example-docs/example-10k-1p.html"
-    elements = partition(filename)
-    chunk_elements = partition(filename, chunking_strategy="by_title")
+    file_path = example_doc_path("example-10k-1p.html")
+    elements = partition(file_path)
+    chunk_elements = partition(file_path, chunking_strategy="by_title")
     chunks = chunk_by_title(elements)
     assert chunk_elements != elements
     assert chunk_elements == chunks
 
 
 def test_add_chunking_strategy_on_partition_auto_respects_max_chars():
-    filename = "example-docs/example-10k-1p.html"
+    file_path = example_doc_path("example-10k-1p.html")
 
     # default chunk size in chars is 200
     partitioned_table_elements_200_chars = [
         e
         for e in partition(
-            filename,
+            file_path,
             chunking_strategy="by_title",
             max_characters=200,
             combine_text_under_n_chars=5,
@@ -1195,7 +1163,7 @@ def test_add_chunking_strategy_on_partition_auto_respects_max_chars():
     partitioned_table_elements_5_chars = [
         e
         for e in partition(
-            filename,
+            file_path,
             chunking_strategy="by_title",
             max_characters=5,
             combine_text_under_n_chars=5,
@@ -1203,7 +1171,7 @@ def test_add_chunking_strategy_on_partition_auto_respects_max_chars():
         if isinstance(e, (Table, TableChunk))
     ]
 
-    elements = partition(filename)
+    elements = partition(file_path)
 
     table_elements = [e for e in elements if isinstance(e, Table)]
 
@@ -1224,12 +1192,12 @@ def test_add_chunking_strategy_on_partition_auto_respects_max_chars():
 
 
 def test_add_chunking_strategy_chars_on_partition_auto_adds_is_continuation():
-    filename = "example-docs/example-10k-1p.html"
+    file_path = example_doc_path("example-10k-1p.html")
 
-    table_elements = [e for e in partition(filename) if isinstance(e, Table)]
+    table_elements = [e for e in partition(file_path) if isinstance(e, Table)]
     table_chunks = [
         e
-        for e in partition(filename, chunking_strategy="by_title")
+        for e in partition(file_path, chunking_strategy="by_title")
         if isinstance(e, (Table, TableChunk))
     ]
 
@@ -1249,8 +1217,9 @@ def test_add_chunking_strategy_chars_on_partition_auto_adds_is_continuation():
 
 
 def test_partition_respects_detect_language_per_element_arg():
-    filename = "example-docs/language-docs/eng_spa_mult.txt"
-    elements = partition(filename=filename, detect_language_per_element=True)
+    elements = partition(
+        example_doc_path("language-docs/eng_spa_mult.txt"), detect_language_per_element=True
+    )
     langs = [element.metadata.languages for element in elements]
     assert langs == [["eng"], ["spa", "eng"], ["eng"], ["eng"], ["spa"]]
 
@@ -1288,9 +1257,10 @@ def test_partition_respects_language_arg(file_extension: str):
 
 
 def test_auto_with_page_breaks():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "layout-parser-paper-fast.pdf")
     elements = partition(
-        filename=filename, include_page_breaks=True, strategy=PartitionStrategy.HI_RES
+        example_doc_path("layout-parser-paper-fast.pdf"),
+        include_page_breaks=True,
+        strategy=PartitionStrategy.HI_RES,
     )
     assert "PageBreak" in [elem.category for elem in elements]
 
@@ -1299,36 +1269,39 @@ def test_auto_with_page_breaks():
 
 
 def test_auto_partition_metadata_filename():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-text.txt")
-    with open(filename, "rb") as f:
-        elements = partition(file=f, metadata_filename=filename)
-    assert elements[0].metadata.filename == os.path.split(filename)[-1]
+    file_path = example_doc_path("fake-text.txt")
+    with open(file_path, "rb") as f:
+        elements = partition(file=f, metadata_filename=file_path)
+    assert elements[0].metadata.filename == os.path.split(file_path)[-1]
 
 
 def test_auto_partition_warns_about_file_filename_deprecation(caplog: LogCaptureFixture):
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-text.txt")
-    with open(filename, "rb") as f:
-        elements = partition(file=f, file_filename=filename)
-    assert elements[0].metadata.filename == os.path.split(filename)[-1]
+    file_path = example_doc_path("fake-text.txt")
+    with open(file_path, "rb") as f:
+        elements = partition(file=f, file_filename=file_path)
+    assert elements[0].metadata.filename == os.path.split(file_path)[-1]
     assert "WARNING" in caplog.text
     assert "The file_filename kwarg will be deprecated" in caplog.text
 
 
 def test_auto_partition_raises_with_file_and_metadata_filename():
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-text.txt")
-    with open(filename, "rb") as f, pytest.raises(ValueError):
-        partition(file=f, file_filename=filename, metadata_filename=filename)
+    file_path = example_doc_path("fake-text.txt")
+    with open(file_path, "rb") as f, pytest.raises(ValueError):
+        partition(file=f, file_filename=file_path, metadata_filename=file_path)
 
 
 # -- ocr_languages --------------------------------------------------------
 
 
 def test_auto_partition_formats_languages_for_tesseract():
-    filename = "example-docs/chi_sim_image.jpeg"
     with patch(
         "unstructured.partition.pdf_image.ocr.process_file_with_ocr",
     ) as mock_process_file_with_ocr:
-        partition(filename, strategy=PartitionStrategy.HI_RES, languages=["zh"])
+        partition(
+            example_doc_path("chi_sim_image.jpeg"),
+            strategy=PartitionStrategy.HI_RES,
+            languages=["zh"],
+        )
         _, kwargs = mock_process_file_with_ocr.call_args_list[0]
         assert "ocr_languages" in kwargs
         assert kwargs["ocr_languages"] == "chi_sim+chi_sim_vert+chi_tra+chi_tra_vert"
@@ -1338,9 +1311,8 @@ def test_auto_partition_formats_languages_for_tesseract():
 def test_auto_partition_ignores_empty_string_for_ocr_languages(
     languages: list[str], ocr_languages: str
 ):
-    filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "book-war-and-peace-1p.txt")
     elements = partition(
-        filename=filename,
+        example_doc_path("book-war-and-peace-1p.txt"),
         strategy=PartitionStrategy.OCR_ONLY,
         ocr_languages=ocr_languages,
         languages=languages,
@@ -1349,8 +1321,9 @@ def test_auto_partition_ignores_empty_string_for_ocr_languages(
 
 
 def test_auto_partition_warns_with_ocr_languages(caplog: LogCaptureFixture):
-    filename = "example-docs/chevron-page.pdf"
-    partition(filename=filename, strategy=PartitionStrategy.HI_RES, ocr_languages="eng")
+    partition(
+        example_doc_path("chevron-page.pdf"), strategy=PartitionStrategy.HI_RES, ocr_languages="eng"
+    )
     assert "The ocr_languages kwarg will be deprecated" in caplog.text
 
 
@@ -1463,7 +1436,7 @@ def test_file_specific_produces_correct_filetype(filetype: FileType):
     fun_name = "partition_" + filetype_module
     module = import_module(f"unstructured.partition.{filetype_module}")
     fun = getattr(module, fun_name)
-    for file in pathlib.Path("example-docs").iterdir():
+    for file in pathlib.Path(example_doc_path("")).iterdir():
         if file.is_file() and file.suffix == f".{extension}":
             elements = fun(str(file))
             assert all(
@@ -1478,8 +1451,11 @@ def test_file_specific_produces_correct_filetype(filetype: FileType):
 
 
 def test_auto_partition_element_metadata_user_provided_languages():
-    filename = "example-docs/chevron-page.pdf"
-    elements = partition(filename=filename, strategy=PartitionStrategy.OCR_ONLY, languages=["eng"])
+    elements = partition(
+        example_doc_path("chevron-page.pdf"),
+        strategy=PartitionStrategy.OCR_ONLY,
+        languages=["eng"],
+    )
     assert elements[0].metadata.languages == ["eng"]
 
 
@@ -1495,8 +1471,7 @@ def test_partition_languages_incorrectly_defaults_to_English(tmp_path: pathlib.P
 
 
 def test_partition_languages_default_to_None():
-    filename = "example-docs/handbook-1p.docx"
-    elements = partition(filename=filename, detect_language_per_element=True)
+    elements = partition(example_doc_path("handbook-1p.docx"), detect_language_per_element=True)
     # PageBreak and other elements with no text will have `None` for `languages`
     none_langs = [element for element in elements if element.metadata.languages is None]
     assert none_langs[0].text == ""
@@ -1508,11 +1483,11 @@ def test_partition_default_does_not_overwrite_other_defaults():
     from unstructured.partition.text import partition_text
 
     # Use a document that is primarily in a language other than English
-    filename = "example-docs/language-docs/UDHR_first_article_all.txt"
-    text_elements = partition_text(filename)
+    file_path = example_doc_path("language-docs/UDHR_first_article_all.txt")
+    text_elements = partition_text(file_path)
     assert text_elements[0].metadata.languages != ["eng"]
 
-    auto_elements = partition(filename)
+    auto_elements = partition(file_path)
     assert auto_elements[0].metadata.languages != ["eng"]
     assert auto_elements[0].metadata.languages == text_elements[0].metadata.languages
 
diff --git a/unstructured/__version__.py b/unstructured/__version__.py
index 4e6562d61..cca525922 100644
--- a/unstructured/__version__.py
+++ b/unstructured/__version__.py
@@ -1 +1 @@
-__version__ = "0.14.11-dev4"  # pragma: no cover
+__version__ = "0.14.11-dev5"  # pragma: no cover