build: drop remaining Python 3.9 refs (#4049)

Dropped variables that said we support Python 3.9 in `setup.py`, as well as any remaining references to Python 3.9. I also checked the pins and removed several that don't seem necessary any more.
2025-12-24 21:55:33 +00:00 · 2025-07-10 11:43:15 -05:00 · 2025-07-10 11:43:15 -05:00 · 7764fb6fd4
commit 7764fb6fd4
parent 92965fb286
21 changed files with 162 additions and 215 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,7 +1,9 @@
-## 0.18.5-dev0
+## 0.18.5-dev1

 ### Enhancements

+- **Bump dependencies and remove lingering Python 3.9 artifacts** Cleaned up some references to 3.9 that were left When we dropped Python 3.9 support.
+
 ### Features

 ### Fixes
--- a/pyproject.toml
+++ b/pyproject.toml
@ -22,7 +22,7 @@ unstructured/nlp/patterns\.py

 [tool.pyright]
 pythonPlatform = "Linux"
-pythonVersion = "3.9"
+pythonVersion = "3.10"
 reportUnnecessaryCast = true
 reportUnnecessaryTypeIgnoreComment = true
 stubPath = "./typings"
@ -31,7 +31,7 @@ verboseOutput = true

 [tool.ruff]
 line-length = 100
-target-version = "py39"
+target-version = "py310"

 [tool.ruff.lint]
 ignore = [
--- a/requirements/base.txt
+++ b/requirements/base.txt
@ -10,7 +10,7 @@ backoff==2.2.1
    # via -r ./base.in
 beautifulsoup4==4.13.4
    # via -r ./base.in
-certifi==2025.6.15
+certifi==2025.7.9
    # via
    #   httpcore
    #   httpx
@ -28,7 +28,7 @@ click==8.2.1
    # via
    #   nltk
    #   python-oxmsg
-cryptography==45.0.4
+cryptography==45.0.5
    # via unstructured-client
 dataclasses-json==0.6.7
    # via
@ -62,7 +62,7 @@ jsonpath-python==1.0.6
    # via unstructured-client
 langdetect==1.0.9
    # via -r ./base.in
-lxml==5.4.0
+lxml==6.0.0
    # via -r ./base.in
 marshmallow==3.26.1
    # via
@ -90,7 +90,7 @@ psutil==7.0.0
    # via -r ./base.in
 pycparser==2.22
    # via cffi
-pypdf==5.6.0
+pypdf==5.7.0
    # via unstructured-client
 python-dateutil==2.9.0.post0
    # via unstructured-client
@ -125,7 +125,7 @@ tqdm==4.67.1
    # via
    #   -r ./base.in
    #   nltk
-typing-extensions==4.14.0
+typing-extensions==4.14.1
    # via
    #   -r ./base.in
    #   anyio
@ -143,7 +143,7 @@ unstructured-client==0.25.9
    # via
    #   -c requirements/deps/constraints.txt
    #   -r ./base.in
-urllib3==2.4.0
+urllib3==2.5.0
    # via
    #   -c requirements/deps/constraints.txt
    #   requests
--- a/requirements/deps/constraints.txt
+++ b/requirements/deps/constraints.txt
@ -5,19 +5,10 @@
 ####################################################################################################
 # we are using v3 client https://weaviate.io/developers/weaviate/client-libraries/python/python_v3
 weaviate-client>=3.26.7,<4.0.0
-# TODO: Constriant due to multiple versions being installed during pip-compile
-protobuf>=6.30.0
-# TODO: Constriant due to multiple versions being installed during pip-compile
-grpcio>=1.65.5
 # TODO: Pinned in transformers package, remove when that gets updated (https://github.com/huggingface/transformers/blob/main/setup.py)
 tokenizers>=0.21,<0.22
-# TODO: Constaint due to boto, with python before 3.10 not requiring openssl 1.1.1, remove when that gets
-# updated or we drop support for 3.9
+# NOTE(alan): Okay to drop pin once this version exists and we verify compatibility.
 urllib3<3.0.0
-# TODO: Constriant due to aiobotocore, remove when that gets updates:
-botocore<1.34.132
-# TODO: Constriant due to both 8.5.0 and 8.4.0 being installed during pip-compile
-importlib-metadata>=8.5.0
 # (austin): Versions below this have a different interface for passing parameters
 unstructured-client>=0.23.0,<0.26.0
 # paddle constrains protobuf; maybe we should put paddle here since its version is pinned in .in file
--- a/requirements/extra-csv.txt
+++ b/requirements/extra-csv.txt
@ -8,7 +8,7 @@ numpy==2.2.6
    # via
    #   -c requirements/base.txt
    #   pandas
-pandas==2.3.0
+pandas==2.3.1
    # via -r ./extra-csv.in
 python-dateutil==2.9.0.post0
    # via
--- a/requirements/extra-docx.txt
+++ b/requirements/extra-docx.txt
@ -4,13 +4,13 @@
 #
 #    pip-compile ./extra-docx.in
 #
-lxml==5.4.0
+lxml==6.0.0
    # via
    #   -c requirements/base.txt
    #   python-docx
 python-docx==1.2.0
    # via -r ./extra-docx.in
-typing-extensions==4.14.0
+typing-extensions==4.14.1
    # via
    #   -c requirements/base.txt
    #   python-docx
--- a/requirements/extra-markdown.txt
+++ b/requirements/extra-markdown.txt
@ -4,5 +4,5 @@
 #
 #    pip-compile ./extra-markdown.in
 #
-markdown==3.8
+markdown==3.8.2
    # via -r ./extra-markdown.in
--- a/requirements/extra-odt.txt
+++ b/requirements/extra-odt.txt
@ -4,7 +4,7 @@
 #
 #    pip-compile ./extra-odt.in
 #
-lxml==5.4.0
+lxml==6.0.0
    # via
    #   -c requirements/base.txt
    #   python-docx
@ -12,7 +12,7 @@ pypandoc==1.15
    # via -r ./extra-odt.in
 python-docx==1.2.0
    # via -r ./extra-odt.in
-typing-extensions==4.14.0
+typing-extensions==4.14.1
    # via
    #   -c requirements/base.txt
    #   python-docx
--- a/requirements/extra-paddleocr.txt
+++ b/requirements/extra-paddleocr.txt
@ -16,13 +16,11 @@ anyio==4.9.0
    # via
    #   -c requirements/base.txt
    #   httpx
-astor==0.8.1
-    # via paddlepaddle
 beautifulsoup4==4.13.4
    # via
    #   -c requirements/base.txt
    #   unstructured-paddleocr
-certifi==2025.6.15
+certifi==2025.7.9
    # via
    #   -c requirements/base.txt
    #   httpcore
@ -42,7 +40,7 @@ exceptiongroup==1.3.0
    #   anyio
 fire==0.7.0
    # via unstructured-paddleocr
-fonttools==4.58.4
+fonttools==4.58.5
    # via unstructured-paddleocr
 h11==0.16.0
    # via
@ -66,7 +64,7 @@ imageio==2.37.0
    # via scikit-image
 lazy-loader==0.4
    # via scikit-image
-lxml==5.4.0
+lxml==6.0.0
    # via
    #   -c requirements/base.txt
    #   python-docx
@ -90,11 +88,11 @@ numpy==2.2.6
    #   shapely
    #   tifffile
    #   unstructured-paddleocr
-opencv-contrib-python==4.11.0.86
+opencv-contrib-python==4.12.0.88
    # via unstructured-paddleocr
-opencv-python==4.11.0.86
+opencv-python==4.12.0.88
    # via unstructured-paddleocr
-opencv-python-headless==4.11.0.86
+opencv-python-headless==4.12.0.88
    # via
    #   albucore
    #   albumentations
@ -105,7 +103,7 @@ packaging==25.0
    #   -c requirements/base.txt
    #   lazy-loader
    #   scikit-image
-paddlepaddle==3.0.0
+paddlepaddle==3.1.0
    # via -r ./extra-paddleocr.in
 pillow==11.3.0
    # via
@ -145,7 +143,7 @@ scipy==1.15.3
    #   scikit-image
 shapely==2.1.1
    # via unstructured-paddleocr
-simsimd==6.4.9
+simsimd==6.5.0
    # via albucore
 sniffio==1.3.1
    # via
@ -165,7 +163,7 @@ tqdm==4.67.1
    # via
    #   -c requirements/base.txt
    #   unstructured-paddleocr
-typing-extensions==4.14.0
+typing-extensions==4.14.1
    # via
    #   -c requirements/base.txt
    #   anyio
@ -180,7 +178,7 @@ typing-inspection==0.4.1
    # via pydantic
 unstructured-paddleocr==2.10.0
    # via -r ./extra-paddleocr.in
-urllib3==2.4.0
+urllib3==2.5.0
    # via
    #   -c requirements/base.txt
    #   -c requirements/deps/constraints.txt
--- a/requirements/extra-pdf-image.txt
+++ b/requirements/extra-pdf-image.txt
@ -4,13 +4,13 @@
 #
 #    pip-compile ./extra-pdf-image.in
 #
-accelerate==1.7.0
+accelerate==1.8.1
    # via unstructured-inference
 antlr4-python3-runtime==4.9.3
    # via omegaconf
 cachetools==5.5.2
    # via google-auth
-certifi==2025.6.15
+certifi==2025.7.9
    # via
    #   -c requirements/base.txt
    #   requests
@ -27,7 +27,7 @@ coloredlogs==15.0.1
    # via onnxruntime
 contourpy==1.3.2
    # via matplotlib
-cryptography==45.0.4
+cryptography==45.0.5
    # via
    #   -c requirements/base.txt
    #   pdfminer-six
@ -44,7 +44,7 @@ filelock==3.18.0
    #   transformers
 flatbuffers==25.2.10
    # via onnxruntime
-fonttools==4.58.4
+fonttools==4.58.5
    # via matplotlib
 fsspec==2025.5.1
    # via
@ -62,16 +62,16 @@ googleapis-common-protos==1.70.0
    # via
    #   google-api-core
    #   grpcio-status
-grpcio==1.73.0
+grpcio==1.73.1
    # via
    #   -c requirements/deps/constraints.txt
    #   google-api-core
    #   grpcio-status
-grpcio-status==1.73.0
+grpcio-status==1.73.1
    # via google-api-core
-hf-xet==1.1.4
+hf-xet==1.1.5
    # via huggingface-hub
-huggingface-hub==0.33.0
+huggingface-hub==0.33.2
    # via
    #   accelerate
    #   timm
@ -88,7 +88,7 @@ jinja2==3.1.6
    # via torch
 kiwisolver==1.4.8
    # via matplotlib
-lxml==5.4.0
+lxml==6.0.0
    # via
    #   -c requirements/base.txt
    #   pikepdf
@ -125,7 +125,7 @@ onnxruntime==1.22.0
    # via
    #   -r ./extra-pdf-image.in
    #   unstructured-inference
-opencv-python==4.11.0.86
+opencv-python==4.12.0.88
    # via unstructured-inference
 packaging==25.0
    # via
@ -137,7 +137,7 @@ packaging==25.0
    #   pikepdf
    #   transformers
    #   unstructured-pytesseract
-pandas==2.3.0
+pandas==2.3.1
    # via unstructured-inference
 pdf2image==1.17.0
    # via -r ./extra-pdf-image.in
@ -146,9 +146,9 @@ pdfminer-six==20250327
    #   -c requirements/deps/constraints.txt
    #   -r ./extra-pdf-image.in
    #   unstructured-inference
-pi-heif==0.22.0
+pi-heif==1.0.0
    # via -r ./extra-pdf-image.in
-pikepdf==9.8.1
+pikepdf==9.9.0
    # via -r ./extra-pdf-image.in
 pillow==11.3.0
    # via
@ -190,7 +190,7 @@ pycparser==2.22
    #   cffi
 pyparsing==3.2.3
    # via matplotlib
-pypdf==5.6.0
+pypdf==5.7.0
    # via
    #   -c requirements/base.txt
    #   -r ./extra-pdf-image.in
@ -243,11 +243,11 @@ sympy==1.14.0
    # via
    #   onnxruntime
    #   torch
-timm==1.0.15
+timm==1.0.16
    # via
    #   effdet
    #   unstructured-inference
-tokenizers==0.21.1
+tokenizers==0.21.2
    # via
    #   -c requirements/deps/constraints.txt
    #   transformers
@ -267,9 +267,9 @@ tqdm==4.67.1
    #   -c requirements/base.txt
    #   huggingface-hub
    #   transformers
-transformers==4.52.4
+transformers==4.53.1
    # via unstructured-inference
-typing-extensions==4.14.0
+typing-extensions==4.14.1
    # via
    #   -c requirements/base.txt
    #   huggingface-hub
@ -282,7 +282,7 @@ unstructured-inference==1.0.5
    # via -r ./extra-pdf-image.in
 unstructured-pytesseract==0.3.15
    # via -r ./extra-pdf-image.in
-urllib3==2.4.0
+urllib3==2.5.0
    # via
    #   -c requirements/base.txt
    #   -c requirements/deps/constraints.txt
--- a/requirements/extra-pptx.txt
+++ b/requirements/extra-pptx.txt
@ -4,13 +4,13 @@
 #
 #    pip-compile ./extra-pptx.in
 #
-lxml==5.4.0
+lxml==6.0.0
    # via python-pptx
 pillow==11.3.0
    # via python-pptx
 python-pptx==1.0.2
    # via -r ./extra-pptx.in
-typing-extensions==4.14.0
+typing-extensions==4.14.1
    # via python-pptx
-xlsxwriter==3.2.3
+xlsxwriter==3.2.5
    # via python-pptx
--- a/requirements/extra-xlsx.txt
+++ b/requirements/extra-xlsx.txt
@ -14,7 +14,7 @@ numpy==2.2.6
    #   pandas
 openpyxl==3.1.5
    # via -r ./extra-xlsx.in
-pandas==2.3.0
+pandas==2.3.1
    # via -r ./extra-xlsx.in
 python-dateutil==2.9.0.post0
    # via
--- a/requirements/huggingface.txt
+++ b/requirements/huggingface.txt
@ -4,7 +4,7 @@
 #
 #    pip-compile ./huggingface.in
 #
-certifi==2025.6.15
+certifi==2025.7.9
    # via
    #   -c requirements/base.txt
    #   requests
@ -25,9 +25,9 @@ fsspec==2025.5.1
    # via
    #   huggingface-hub
    #   torch
-hf-xet==1.1.4
+hf-xet==1.1.5
    # via huggingface-hub
-huggingface-hub==0.33.0
+huggingface-hub==0.33.2
    # via
    #   tokenizers
    #   transformers
@ -86,7 +86,7 @@ six==1.17.0
    #   langdetect
 sympy==1.14.0
    # via torch
-tokenizers==0.21.1
+tokenizers==0.21.2
    # via
    #   -c requirements/deps/constraints.txt
    #   transformers
@ -98,14 +98,14 @@ tqdm==4.67.1
    #   huggingface-hub
    #   sacremoses
    #   transformers
-transformers==4.52.4
+transformers==4.53.1
    # via -r ./huggingface.in
-typing-extensions==4.14.0
+typing-extensions==4.14.1
    # via
    #   -c requirements/base.txt
    #   huggingface-hub
    #   torch
-urllib3==2.4.0
+urllib3==2.5.0
    # via
    #   -c requirements/base.txt
    #   -c requirements/deps/constraints.txt
--- a/requirements/test.txt
+++ b/requirements/test.txt
@ -14,7 +14,7 @@ click==8.2.1
    # via
    #   -c requirements/base.txt
    #   black
-coverage[toml]==7.9.1
+coverage[toml]==7.9.2
    # via
    #   -r ./test.in
    #   pytest-cov
@ -24,7 +24,7 @@ exceptiongroup==1.3.0
    #   pytest
 execnet==2.1.1
    # via pytest-xdist
-flake8==7.2.0
+flake8==7.3.0
    # via
    #   -r ./test.in
    #   flake8-print
@ -32,7 +32,7 @@ flake8-print==5.0.0
    # via -r ./test.in
 freezegun==1.5.2
    # via -r ./test.in
-grpcio==1.73.0
+grpcio==1.73.1
    # via
    #   -c requirements/deps/constraints.txt
    #   -r ./test.in
@ -64,7 +64,7 @@ pluggy==1.6.0
    # via
    #   pytest
    #   pytest-cov
-pycodestyle==2.13.0
+pycodestyle==2.14.0
    # via
    #   flake8
    #   flake8-print
@ -72,13 +72,13 @@ pydantic==2.11.7
    # via -r ./test.in
 pydantic-core==2.33.2
    # via pydantic
-pyflakes==3.3.2
+pyflakes==3.4.0
    # via
    #   autoflake
    #   flake8
-pygments==2.19.1
+pygments==2.19.2
    # via pytest
-pytest==8.4.0
+pytest==8.4.1
    # via
    #   pytest-cov
    #   pytest-mock
@ -87,13 +87,13 @@ pytest-cov==6.2.1
    # via -r ./test.in
 pytest-mock==3.14.1
    # via -r ./test.in
-pytest-xdist==3.7.0
+pytest-xdist==3.8.0
    # via -r ./test.in
 python-dateutil==2.9.0.post0
    # via
    #   -c requirements/base.txt
    #   freezegun
-ruff==0.11.13
+ruff==0.12.2
    # via -r ./test.in
 semantic-version==2.10.0
    # via liccheck
@ -112,13 +112,13 @@ tomli==2.2.1
    #   pytest
 types-click==7.1.8
    # via -r ./test.in
-types-markdown==3.8.0.20250415
+types-markdown==3.8.0.20250708
    # via -r ./test.in
 types-requests==2.32.4.20250611
    # via -r ./test.in
 types-tabulate==0.9.0.20241207
    # via -r ./test.in
-typing-extensions==4.14.0
+typing-extensions==4.14.1
    # via
    #   -c requirements/base.txt
    #   black
@ -129,7 +129,7 @@ typing-extensions==4.14.0
    #   typing-inspection
 typing-inspection==0.4.1
    # via pydantic
-urllib3==2.4.0
+urllib3==2.5.0
    # via
    #   -c requirements/base.txt
    #   -c requirements/deps/constraints.txt
--- a/scripts/pip-compile.sh
+++ b/scripts/pip-compile.sh
@ -1,6 +1,6 @@
 #!/usr/bin/env bash

-# python version must match lowest supported (3.9)
+# python version must match lowest supported (3.10)
 major=3
 minor=10
 if ! python -c "import sys; assert sys.version_info.major == $major and sys.version_info.minor == $minor"; then
--- a/setup.py
+++ b/setup.py
@ -82,7 +82,7 @@ setup(
    long_description_content_type="text/markdown",
    keywords="NLP PDF HTML CV XML parsing preprocessing",
    url="https://github.com/Unstructured-IO/unstructured",
-    python_requires=">=3.9.0",
+    python_requires=">=3.10.0",
    classifiers=[
        "Development Status :: 4 - Beta",
        "Intended Audience :: Developers",
@ -91,7 +91,6 @@ setup(
        "License :: OSI Approved :: Apache Software License",
        "Operating System :: OS Independent",
        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.9",
        "Programming Language :: Python :: 3.10",
        "Programming Language :: Python :: 3.11",
        "Programming Language :: Python :: 3.12",
--- a/test_unstructured/partition/common/test_common.py
+++ b/test_unstructured/partition/common/test_common.py
@ -401,9 +401,8 @@ def test_get_page_image_metadata_and_coordinate_system():
    assert isinstance(metadata, dict)


-def test_ocr_data_to_elements(
-    filename=example_doc_path("img/layout-parser-paper-fast.jpg"),
-):
+def test_ocr_data_to_elements():
+    filename = example_doc_path("img/layout-parser-paper-fast.jpg")
    text_regions = [
        TextRegion.from_coords(
            163.0,
--- a/test_unstructured/partition/pdf_image/test_image.py
+++ b/test_unstructured/partition/pdf_image/test_image.py
@ -133,9 +133,8 @@ def test_partition_image_local_raises_with_no_filename():
        pdf._partition_pdf_or_image_local(filename="", file=None, is_image=True)


-def test_partition_image_with_auto_strategy(
-    filename=example_doc_path("img/layout-parser-paper-fast.jpg"),
-):
+def test_partition_image_with_auto_strategy():
+    filename = example_doc_path("img/layout-parser-paper-fast.jpg")
    elements = image.partition_image(filename=filename, strategy=PartitionStrategy.AUTO)
    titles = [
        el for el in elements if el.category == ElementType.TITLE and len(el.text.split(" ")) > 10
@ -147,9 +146,8 @@ def test_partition_image_with_auto_strategy(
    assert isinstance(elements[idx].metadata.detection_class_prob, float)


-def test_partition_image_with_table_extraction(
-    filename=example_doc_path("img/layout-parser-paper-with-table.jpg"),
-):
+def test_partition_image_with_table_extraction():
+    filename = example_doc_path("img/layout-parser-paper-with-table.jpg")
    elements = image.partition_image(
        filename=filename,
        strategy=PartitionStrategy.HI_RES,
@ -161,17 +159,14 @@ def test_partition_image_with_table_extraction(
    assert "</thead><tbody><tr>" in table[0]


-def test_partition_image_with_multipage_tiff(
-    filename=example_doc_path("img/layout-parser-paper-combined.tiff"),
-):
+def test_partition_image_with_multipage_tiff():
+    filename = example_doc_path("img/layout-parser-paper-combined.tiff")
    elements = image.partition_image(filename=filename, strategy=PartitionStrategy.AUTO)
    assert elements[-1].metadata.page_number == 2


-def test_partition_image_with_bmp(
-    tmpdir,
-    filename=example_doc_path("img/layout-parser-paper-with-table.jpg"),
-):
+def test_partition_image_with_bmp(tmpdir):
+    filename = example_doc_path("img/layout-parser-paper-with-table.jpg")
    bmp_filename = os.path.join(tmpdir.dirname, "example.bmp")
    img = Image.open(filename)
    img.save(bmp_filename)
@ -187,7 +182,8 @@ def test_partition_image_with_bmp(
    assert "</thead><tbody><tr>" in table[0]


-def test_partition_image_with_language_passed(filename=example_doc_path("img/example.jpg")):
+def test_partition_image_with_language_passed():
+    filename = example_doc_path("img/example.jpg")
    with mock.patch.object(
        ocr,
        "process_file_with_ocr",
@ -202,9 +198,8 @@ def test_partition_image_with_language_passed(filename=example_doc_path("img/exa
    assert mock_partition.call_args.kwargs.get("ocr_languages") == "eng+swe"


-def test_partition_image_from_file_with_language_passed(
-    filename=example_doc_path("img/example.jpg"),
-):
+def test_partition_image_from_file_with_language_passed():
+    filename = example_doc_path("img/example.jpg")
    with mock.patch.object(
        ocr,
        "process_data_with_ocr",
@ -217,9 +212,8 @@ def test_partition_image_from_file_with_language_passed(

 # NOTE(crag): see https://github.com/Unstructured-IO/unstructured/issues/1086
@pytest.mark.skip(reason="Current catching too many tesseract errors")
-def test_partition_image_raises_with_invalid_language(
-    filename=example_doc_path("img/example.jpg"),
-):
+def test_partition_image_raises_with_invalid_language():
+    filename = example_doc_path("img/example.jpg")
    with pytest.raises(TesseractError):
        image.partition_image(
            filename=filename,
@ -414,9 +408,8 @@ def test_partition_msg_with_json():
    assert_round_trips_through_JSON(elements)


-def test_partition_image_with_ocr_has_coordinates_from_filename(
-    filename=example_doc_path("img/english-and-korean.png"),
-):
+def test_partition_image_with_ocr_has_coordinates_from_filename():
+    filename = example_doc_path("img/english-and-korean.png")
    elements = image.partition_image(filename=filename, strategy=PartitionStrategy.OCR_ONLY)
    int_coordinates = [(int(x), int(y)) for x, y in elements[0].metadata.coordinates.points]
    assert int_coordinates == [(14, 16), (14, 37), (381, 37), (381, 16)]
@ -467,9 +460,8 @@ def test_partition_image_warns_with_ocr_languages(caplog):
    assert "The ocr_languages kwarg will be deprecated" in caplog.text


-def test_add_chunking_strategy_on_partition_image(
-    filename=example_doc_path("img/layout-parser-paper-fast.jpg"),
-):
+def test_add_chunking_strategy_on_partition_image():
+    filename = example_doc_path("img/layout-parser-paper-fast.jpg")
    elements = image.partition_image(filename=filename)
    chunk_elements = image.partition_image(filename, chunking_strategy="by_title")
    chunks = chunk_by_title(elements)
@ -477,9 +469,8 @@ def test_add_chunking_strategy_on_partition_image(
    assert chunk_elements == chunks


-def test_add_chunking_strategy_on_partition_image_hi_res(
-    filename=example_doc_path("img/layout-parser-paper-with-table.jpg"),
-):
+def test_add_chunking_strategy_on_partition_image_hi_res():
+    filename = example_doc_path("img/layout-parser-paper-with-table.jpg")
    elements = image.partition_image(
        filename=filename,
        strategy=PartitionStrategy.HI_RES,
@ -615,8 +606,8 @@ def test_partition_image_has_filename(inference_results):
 def test_partition_image_element_extraction(
    file_mode,
    extract_image_block_to_payload,
-    filename=example_doc_path("img/embedded-images-tables.jpg"),
 ):
+    filename = example_doc_path("img/embedded-images-tables.jpg")
    extract_image_block_types = ["Image", "Table"]

    with tempfile.TemporaryDirectory() as tmpdir:
@ -641,9 +632,8 @@ def test_partition_image_element_extraction(
        )


-def test_partition_image_works_on_heic_file(
-    filename=example_doc_path("img/DA-1p.heic"),
-):
+def test_partition_image_works_on_heic_file():
+    filename = example_doc_path("img/DA-1p.heic")
    elements = image.partition_image(filename=filename, strategy=PartitionStrategy.AUTO)
    titles = [el.text for el in elements if el.category == ElementType.TITLE]
    assert "CREATURES" in titles
--- a/test_unstructured/partition/pdf_image/test_pdf.py
+++ b/test_unstructured/partition/pdf_image/test_pdf.py
@ -226,8 +226,9 @@ def test_partition_pdf_outputs_valid_amount_of_elements_and_metadata_values(
    starting_page_number,
    expected_page_numbers,
    origin,
-    filename=example_doc_path("pdf/layout-parser-paper-with-empty-pages.pdf"),
 ):
+    filename = example_doc_path("pdf/layout-parser-paper-with-empty-pages.pdf")
+
    # Test that the partition_pdf function can handle filename
    def _test(result):
        # validate that the result is a non-empty list of dicts
@ -270,8 +271,8 @@ def test_partition_pdf_outputs_valid_amount_of_elements_and_metadata_values(
@mock.patch.dict(os.environ, {"UNSTRUCTURED_HI_RES_MODEL_NAME": "checkbox"})
 def test_partition_pdf_with_model_name_env_var(
    monkeypatch,
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
 ):
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
    monkeypatch.setattr(pdf, "extractable_elements", lambda *args, **kwargs: [])
    with mock.patch.object(
        layout,
@ -286,8 +287,8 @@ def test_partition_pdf_with_model_name_env_var(
 def test_partition_pdf_with_model_name(
    monkeypatch,
    model_name,
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
 ):
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
    monkeypatch.setattr(pdf, "extractable_elements", lambda *args, **kwargs: [])
    with mock.patch.object(
        layout,
@ -315,10 +316,8 @@ def test_partition_pdf_with_model_name(
            assert mock_process.call_args[1]["model_name"] == model_name


-def test_partition_pdf_with_hi_res_model_name(
-    monkeypatch,
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_with_hi_res_model_name(monkeypatch):
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
    monkeypatch.setattr(pdf, "extractable_elements", lambda *args, **kwargs: [])
    with mock.patch.object(
        layout,
@ -332,10 +331,8 @@ def test_partition_pdf_with_hi_res_model_name(
        assert mock_process.call_args[1]["model_name"] == "checkbox"


-def test_partition_pdf_or_image_with_hi_res_model_name(
-    monkeypatch,
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_or_image_with_hi_res_model_name(monkeypatch):
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
    monkeypatch.setattr(pdf, "extractable_elements", lambda *args, **kwargs: [])
    with mock.patch.object(
        layout,
@ -349,9 +346,8 @@ def test_partition_pdf_or_image_with_hi_res_model_name(
        assert mock_process.call_args[1]["model_name"] == "checkbox"


-def test_partition_pdf_with_auto_strategy(
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_with_auto_strategy():
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
    elements = pdf.partition_pdf(filename=filename, strategy=PartitionStrategy.AUTO)
    title = "LayoutParser: A Uniﬁed Toolkit for Deep Learning Based Document Image Analysis"
    assert elements[6].text == title
@ -359,23 +355,20 @@ def test_partition_pdf_with_auto_strategy(
    assert elements[6].metadata.file_directory == os.path.dirname(filename)


-def test_partition_pdf_with_page_breaks(
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_with_page_breaks():
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
    elements = pdf.partition_pdf(filename=filename, url=None, include_page_breaks=True)
    assert "PageBreak" in [elem.category for elem in elements]


-def test_partition_pdf_with_no_page_breaks(
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_with_no_page_breaks():
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
    elements = pdf.partition_pdf(filename=filename, url=None)
    assert "PageBreak" not in [elem.category for elem in elements]


-def test_partition_pdf_with_fast_strategy(
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_with_fast_strategy():
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
    elements = pdf.partition_pdf(
        filename=filename, url=None, strategy=PartitionStrategy.FAST, starting_page_number=3
    )
@ -394,9 +387,8 @@ def test_partition_pdf_with_fast_neg_coordinates():
    assert elements[0].metadata.coordinates.points[1][0] < 0


-def test_partition_pdf_with_fast_groups_text(
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_with_fast_groups_text():
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
    elements = pdf.partition_pdf(filename=filename, url=None, strategy=PartitionStrategy.FAST)

    first_narrative_element = None
@ -410,18 +402,15 @@ def test_partition_pdf_with_fast_groups_text(
    assert first_narrative_element.metadata.filename == "layout-parser-paper-fast.pdf"


-def test_partition_pdf_with_fast_strategy_from_file(
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_with_fast_strategy_from_file():
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
    with open(filename, "rb") as f:
        elements = pdf.partition_pdf(file=f, url=None, strategy=PartitionStrategy.FAST)
    assert len(elements) > 10


-def test_partition_pdf_with_fast_strategy_and_page_breaks(
-    caplog,
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_with_fast_strategy_and_page_breaks(caplog):
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
    elements = pdf.partition_pdf(
        filename=filename,
        url=None,
@ -436,18 +425,15 @@ def test_partition_pdf_with_fast_strategy_and_page_breaks(
        assert element.metadata.filename == "layout-parser-paper-fast.pdf"


-def test_partition_pdf_raises_with_bad_strategy(
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_raises_with_bad_strategy():
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
    with pytest.raises(ValueError):
        pdf.partition_pdf(filename=filename, url=None, strategy="made_up")


-def test_partition_pdf_falls_back_to_fast(
-    monkeypatch,
-    caplog,
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_falls_back_to_fast(monkeypatch, caplog):
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
+
    def mock_exists(dep):
        return dep not in ["unstructured_inference", "unstructured_pytesseract"]

@ -465,11 +451,9 @@ def test_partition_pdf_falls_back_to_fast(
    assert "unstructured_inference is not installed" in caplog.text


-def test_partition_pdf_falls_back_to_fast_from_ocr_only(
-    monkeypatch,
-    caplog,
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_falls_back_to_fast_from_ocr_only(monkeypatch, caplog):
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
+
    def mock_exists(dep):
        return dep not in ["unstructured_pytesseract"]

@ -491,11 +475,9 @@ def test_partition_pdf_falls_back_to_fast_from_ocr_only(
    assert "pytesseract is not installed" in caplog.text


-def test_partition_pdf_falls_back_to_hi_res_from_ocr_only(
-    monkeypatch,
-    caplog,
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_falls_back_to_hi_res_from_ocr_only(monkeypatch, caplog):
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
+
    def mock_exists(dep):
        return dep not in ["unstructured_pytesseract"]

@ -514,11 +496,9 @@ def test_partition_pdf_falls_back_to_hi_res_from_ocr_only(
    assert "pytesseract is not installed" in caplog.text


-def test_partition_pdf_falls_back_to_ocr_only(
-    monkeypatch,
-    caplog,
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_falls_back_to_ocr_only(monkeypatch, caplog):
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
+
    def mock_exists(dep):
        return dep not in ["unstructured_inference"]

@ -633,7 +613,8 @@ def test_partition_pdf_with_dpi():
        assert mock_process.call_args[1]["pdf_image_dpi"] == 100


-def test_partition_pdf_requiring_recursive_text_grab(filename=example_doc_path("pdf/reliance.pdf")):
+def test_partition_pdf_requiring_recursive_text_grab():
+    filename = example_doc_path("pdf/reliance.pdf")
    elements = pdf.partition_pdf(filename=filename, strategy=PartitionStrategy.FAST)
    assert len(elements) > 50
    assert elements[0].metadata.page_number == 1
@ -646,10 +627,9 @@ def test_partition_pdf_text_not_extractable():
    assert len(elements) == 0


-def test_partition_pdf_fails_if_pdf_not_processable(
-    monkeypatch,
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_fails_if_pdf_not_processable(monkeypatch):
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
+
    def mock_exists(dep):
        return dep not in ["unstructured_inference", "unstructured_pytesseract"]

@ -700,9 +680,8 @@ def test_partition_pdf_fast_groups_text_in_text_box():
    assert elements[2] == Text("2.5", metadata=expected_elem_metadata_3)


-def test_partition_pdf_with_metadata_filename(
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_with_metadata_filename():
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
    elements = pdf.partition_pdf(
        filename=filename,
        url=None,
@ -713,9 +692,8 @@ def test_partition_pdf_with_metadata_filename(
        assert element.metadata.filename == "test"


-def test_partition_pdf_with_fast_strategy_from_file_with_metadata_filename(
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_partition_pdf_with_fast_strategy_from_file_with_metadata_filename():
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
    with open(filename, "rb") as f:
        elements = pdf.partition_pdf(
            file=f,
@ -793,9 +771,8 @@ def test_partition_pdf_with_json(strategy: str):
    assert_round_trips_through_JSON(elements)


-def test_add_chunking_strategy_by_title_on_partition_pdf(
-    filename=example_doc_path("pdf/layout-parser-paper-fast.pdf"),
-):
+def test_add_chunking_strategy_by_title_on_partition_pdf():
+    filename = example_doc_path("pdf/layout-parser-paper-fast.pdf")
    elements = pdf.partition_pdf(filename=filename)
    chunk_elements = pdf.partition_pdf(filename, chunking_strategy="by_title")
    chunks = chunk_by_title(elements)
@ -920,9 +897,8 @@ def test_partition_pdf_uses_hi_res_model_name():
        assert mockpartition.call_args.kwargs["hi_res_model_name"]


-def test_partition_pdf_word_bbox_not_char(
-    filename=example_doc_path("pdf/interface-config-guide-p93.pdf"),
-):
+def test_partition_pdf_word_bbox_not_char():
+    filename = example_doc_path("pdf/interface-config-guide-p93.pdf")
    try:
        elements = pdf.partition_pdf(filename=filename, strategy="fast")
    except Exception as e:
@ -930,9 +906,8 @@ def test_partition_pdf_word_bbox_not_char(
    assert len(elements) == 17


-def test_partition_pdf_fast_no_mapping_errors(
-    filename=example_doc_path("pdf/a1977-backus-p21.pdf"),
-):
+def test_partition_pdf_fast_no_mapping_errors():
+    filename = example_doc_path("pdf/a1977-backus-p21.pdf")
    """Verify there is no regression for https://github.com/Unstructured-IO/unstructured/pull/2940,
    failing to map old parent_id's to new"""
    pdf.partition_pdf(filename=filename, strategy="fast")
@ -1190,9 +1165,8 @@ def test_partition_pdf_with_bad_color_profile():
    assert pdf.partition_pdf(filename, strategy="fast")


-def test_partition_pdf_with_fast_finds_headers_footers(
-    filename=example_doc_path("pdf/header-test-doc.pdf"),
-):
+def test_partition_pdf_with_fast_finds_headers_footers():
+    filename = example_doc_path("pdf/header-test-doc.pdf")
    elements = pdf.partition_pdf(filename, strategy="fast")
    assert isinstance(elements[0], Header)
    assert isinstance(elements[-1], Footer)
@ -1266,11 +1240,8 @@ def assert_element_extraction(

@pytest.mark.parametrize("file_mode", ["filename", "rb"])
@pytest.mark.parametrize("extract_image_block_to_payload", [False, True])
-def test_partition_pdf_element_extraction(
-    file_mode,
-    extract_image_block_to_payload,
-    filename=example_doc_path("pdf/embedded-images-tables.pdf"),
-):
+def test_partition_pdf_element_extraction(file_mode, extract_image_block_to_payload):
+    filename = example_doc_path("pdf/embedded-images-tables.pdf")
    extract_image_block_types = ["Image", "Table"]

    with tempfile.TemporaryDirectory() as tmpdir:
@ -1299,9 +1270,8 @@ def test_partition_pdf_element_extraction(
        )


-def test_partition_pdf_always_keep_all_image_elements(
-    filename=example_doc_path("pdf/embedded-images.pdf"),
-):
+def test_partition_pdf_always_keep_all_image_elements():
+    filename = example_doc_path("pdf/embedded-images.pdf")
    elements = pdf.partition_pdf(
        filename=filename,
        strategy="hi_res",
@ -1559,11 +1529,9 @@ def test_document_to_element_list_sets_category_depth_titles():
        PartitionStrategy.OCR_ONLY,
    ],
 )
-def test_partition_pdf_with_password(
-    file_mode,
-    strategy,
-    filename=example_doc_path("pdf/password.pdf"),
-):
+def test_partition_pdf_with_password(file_mode, strategy):
+    filename = example_doc_path("pdf/password.pdf")
+
    # Test that the partition_pdf function can handle filename
    def _test(result):
        # validate that the result is a non-empty list of dicts
--- a/test_unstructured/partition/pdf_image/test_pdf_image_utils.py
+++ b/test_unstructured/partition/pdf_image/test_pdf_image_utils.py
@ -35,9 +35,8 @@ def test_write_image(image_type):

@pytest.mark.parametrize("file_mode", ["filename", "rb"])
@pytest.mark.parametrize("path_only", [True, False])
-def test_convert_pdf_to_image(
-    file_mode, path_only, filename=example_doc_path("pdf/embedded-images.pdf")
-):
+def test_convert_pdf_to_image(file_mode, path_only):
+    filename = example_doc_path("pdf/embedded-images.pdf")
    with tempfile.TemporaryDirectory() as tmpdir:
        if file_mode == "filename":
            images = pdf_image_utils.convert_pdf_to_image(
@ -61,7 +60,8 @@ def test_convert_pdf_to_image(
            assert isinstance(images[0], PILImg.Image)


-def test_convert_pdf_to_image_raises_error(filename=example_doc_path("embedded-images.pdf")):
+def test_convert_pdf_to_image_raises_error():
+    filename = example_doc_path("embedded-images.pdf")
    with pytest.raises(ValueError) as exc_info:
        pdf_image_utils.convert_pdf_to_image(filename=filename, path_only=True, output_folder=None)

--- a/unstructured/version.py
+++ b/unstructured/version.py
@ -1 +1 @@
-__version__ = "0.18.5-dev0"  # pragma: no cover
+__version__ = "0.18.5-dev1"  # pragma: no cover