From 7de630e45e1830fa1b22979258a64ebe5ff511bd Mon Sep 17 00:00:00 2001
From: Yao You <theyaoyou@gmail.com>
Date: Tue, 18 Mar 2025 16:33:48 -0500
Subject: [PATCH] Feat/bump numpy to 2 (#3961)

This PR updates a few dependencies so that they are compatible with
`numpy>=2`.
---
 CHANGELOG.md                             |   5 +-
 requirements/base.in                     |   4 +-
 requirements/base.txt                    |   8 +-
 requirements/dev.txt                     |   4 +-
 requirements/extra-csv.txt               |   2 +-
 requirements/extra-paddleocr.in          |   4 +-
 requirements/extra-paddleocr.txt         | 115 ++++++++++++-----------
 requirements/extra-pdf-image.in          |   5 +-
 requirements/extra-pdf-image.txt         |  37 ++++----
 requirements/extra-xlsx.txt              |   2 +-
 requirements/huggingface.txt             |  10 +-
 requirements/test.txt                    |  97 +++++--------------
 test_unstructured/partition/test_auto.py |  18 +++-
 unstructured/__version__.py              |   2 +-
 14 files changed, 146 insertions(+), 167 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cbadb01de..bff09cf81 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,13 @@
-## 0.17.1-dev1
+## 0.17.1
 
 ### Enhancements
 
 - **Add image_url of images in html partitioner** `<img>` tags with non-data content include a new image_url metadata field with the content of the src attribute.
+
 - **Use `lxml` instead of `bs4` to parse hOCR data.** `lxml` is much faster than `bs4` given the hOCR data format is regular (garanteed because it is programatically generated)
 
+- **bump `numpy` to `>2`**. And upgrade `paddlepaddle`, `unstructured-paddleocr`, `onnx` so they are compatible with `numpy>2`.
+
 ### Features
 
 ### Fixes
diff --git a/requirements/base.in b/requirements/base.in
index cc2b27d8a..320a77226 100644
--- a/requirements/base.in
+++ b/requirements/base.in
@@ -10,9 +10,7 @@ emoji
 dataclasses-json
 python-iso639
 langdetect
-# NOTE(robinson) - numpy pin is because ONNX model weights are only compatible
-# with numpy 1.x.x
-numpy<2
+numpy
 rapidfuzz
 backoff
 typing-extensions
diff --git a/requirements/base.txt b/requirements/base.txt
index a29c4a1a3..17a25c4d4 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile ./base.in
 #
-anyio==4.8.0
+anyio==4.9.0
     # via httpx
 backoff==2.2.1
     # via -r ./base.in
@@ -34,7 +34,7 @@ dataclasses-json==0.6.7
     # via
     #   -r ./base.in
     #   unstructured-client
-deepdiff==8.3.0
+deepdiff==8.4.2
     # via unstructured-client
 emoji==2.14.1
     # via -r ./base.in
@@ -76,7 +76,7 @@ nest-asyncio==1.6.0
     # via unstructured-client
 nltk==3.9.1
     # via -r ./base.in
-numpy==1.26.4
+numpy==2.0.2
     # via -r ./base.in
 olefile==0.47
     # via python-oxmsg
@@ -90,7 +90,7 @@ psutil==7.0.0
     # via -r ./base.in
 pycparser==2.22
     # via cffi
-pypdf==5.3.1
+pypdf==5.4.0
     # via unstructured-client
 python-dateutil==2.9.0.post0
     # via unstructured-client
diff --git a/requirements/dev.txt b/requirements/dev.txt
index ecc41e978..0de6c4eb0 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -15,9 +15,9 @@ click==8.1.8
     #   pip-tools
 distlib==0.3.9
     # via virtualenv
-filelock==3.17.0
+filelock==3.18.0
     # via virtualenv
-identify==2.6.8
+identify==2.6.9
     # via pre-commit
 importlib-metadata==8.6.1
     # via
diff --git a/requirements/extra-csv.txt b/requirements/extra-csv.txt
index cb7c7efc5..a5779f0a8 100644
--- a/requirements/extra-csv.txt
+++ b/requirements/extra-csv.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile ./extra-csv.in
 #
-numpy==1.26.4
+numpy==2.0.2
     # via
     #   -c ./base.txt
     #   pandas
diff --git a/requirements/extra-paddleocr.in b/requirements/extra-paddleocr.in
index b1cf3ee2e..ec0c3bf32 100644
--- a/requirements/extra-paddleocr.in
+++ b/requirements/extra-paddleocr.in
@@ -1,5 +1,5 @@
 -c ./deps/constraints.txt
 -c base.txt
 
-paddlepaddle==3.0.0b1
-unstructured.paddleocr==2.8.1.0
+paddlepaddle>=3.0.0b1
+unstructured.paddleocr==2.10.0
diff --git a/requirements/extra-paddleocr.txt b/requirements/extra-paddleocr.txt
index 70bdb7b72..a5264d784 100644
--- a/requirements/extra-paddleocr.txt
+++ b/requirements/extra-paddleocr.txt
@@ -4,12 +4,24 @@
 #
 #    pip-compile ./extra-paddleocr.in
 #
-anyio==4.8.0
+albucore==0.0.23
+    # via
+    #   albumentations
+    #   unstructured-paddleocr
+albumentations==2.0.5
+    # via unstructured-paddleocr
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.9.0
     # via
     #   -c ./base.txt
     #   httpx
 astor==0.8.1
     # via paddlepaddle
+beautifulsoup4==4.13.3
+    # via
+    #   -c ./base.txt
+    #   unstructured-paddleocr
 certifi==2025.1.31
     # via
     #   -c ./base.txt
@@ -20,20 +32,20 @@ charset-normalizer==3.4.1
     # via
     #   -c ./base.txt
     #   requests
-contourpy==1.3.0
-    # via matplotlib
-cycler==0.12.1
-    # via matplotlib
 cython==3.0.12
     # via unstructured-paddleocr
 decorator==5.2.1
     # via paddlepaddle
+eval-type-backport==0.2.2
+    # via albumentations
 exceptiongroup==1.2.2
     # via
     #   -c ./base.txt
     #   anyio
+fire==0.7.0
+    # via unstructured-paddleocr
 fonttools==4.56.0
-    # via matplotlib
+    # via unstructured-paddleocr
 h11==0.14.0
     # via
     #   -c ./base.txt
@@ -53,32 +65,26 @@ idna==3.10
     #   httpx
     #   requests
 imageio==2.37.0
-    # via
-    #   imgaug
-    #   scikit-image
-imgaug==0.4.0
-    # via unstructured-paddleocr
-importlib-resources==6.5.2
-    # via matplotlib
-kiwisolver==1.4.7
-    # via matplotlib
+    # via scikit-image
 lazy-loader==0.4
     # via scikit-image
-matplotlib==3.9.4
-    # via imgaug
+lxml==5.3.1
+    # via
+    #   -c ./base.txt
+    #   python-docx
 networkx==3.2.1
     # via
     #   paddlepaddle
     #   scikit-image
-numpy==1.26.4
+numpy==2.0.2
     # via
     #   -c ./base.txt
-    #   contourpy
+    #   albucore
+    #   albumentations
     #   imageio
-    #   imgaug
-    #   matplotlib
     #   opencv-contrib-python
     #   opencv-python
+    #   opencv-python-headless
     #   opt-einsum
     #   paddlepaddle
     #   scikit-image
@@ -89,44 +95,42 @@ numpy==1.26.4
 opencv-contrib-python==4.11.0.86
     # via unstructured-paddleocr
 opencv-python==4.11.0.86
+    # via unstructured-paddleocr
+opencv-python-headless==4.11.0.86
     # via
-    #   imgaug
-    #   unstructured-paddleocr
+    #   albucore
+    #   albumentations
 opt-einsum==3.3.0
     # via paddlepaddle
 packaging==24.2
     # via
     #   -c ./base.txt
     #   lazy-loader
-    #   matplotlib
     #   scikit-image
-paddlepaddle==3.0.0b1
+paddlepaddle==3.0.0rc1
     # via -r ./extra-paddleocr.in
-pdf2image==1.17.0
-    # via unstructured-paddleocr
 pillow==11.1.0
     # via
     #   imageio
-    #   imgaug
-    #   matplotlib
     #   paddlepaddle
-    #   pdf2image
     #   scikit-image
     #   unstructured-paddleocr
-protobuf==6.30.0
+protobuf==6.30.1
     # via
     #   -c ././deps/constraints.txt
     #   paddlepaddle
 pyclipper==1.3.0.post6
     # via unstructured-paddleocr
-pyparsing==3.2.1
-    # via matplotlib
-python-dateutil==2.9.0.post0
-    # via
-    #   -c ./base.txt
-    #   matplotlib
-pyyaml==6.0.2
+pydantic==2.10.6
+    # via albumentations
+pydantic-core==2.27.2
+    # via pydantic
+python-docx==1.1.2
     # via unstructured-paddleocr
+pyyaml==6.0.2
+    # via
+    #   albumentations
+    #   unstructured-paddleocr
 rapidfuzz==3.12.2
     # via
     #   -c ./base.txt
@@ -136,26 +140,27 @@ requests==2.32.3
     #   -c ./base.txt
     #   unstructured-paddleocr
 scikit-image==0.24.0
-    # via
-    #   imgaug
-    #   unstructured-paddleocr
+    # via unstructured-paddleocr
 scipy==1.13.1
     # via
-    #   imgaug
+    #   albumentations
     #   scikit-image
 shapely==2.0.7
-    # via
-    #   imgaug
-    #   unstructured-paddleocr
-six==1.17.0
-    # via
-    #   -c ./base.txt
-    #   imgaug
-    #   python-dateutil
+    # via unstructured-paddleocr
+simsimd==6.2.1
+    # via albucore
 sniffio==1.3.1
     # via
     #   -c ./base.txt
     #   anyio
+soupsieve==2.6
+    # via
+    #   -c ./base.txt
+    #   beautifulsoup4
+stringzilla==3.12.3
+    # via albucore
+termcolor==2.5.0
+    # via fire
 tifffile==2024.8.30
     # via scikit-image
 tqdm==4.67.1
@@ -165,14 +170,18 @@ tqdm==4.67.1
 typing-extensions==4.12.2
     # via
     #   -c ./base.txt
+    #   albucore
+    #   albumentations
     #   anyio
+    #   beautifulsoup4
     #   paddlepaddle
-unstructured-paddleocr==2.8.1.0
+    #   pydantic
+    #   pydantic-core
+    #   python-docx
+unstructured-paddleocr==2.10.0
     # via -r ./extra-paddleocr.in
 urllib3==1.26.20
     # via
     #   -c ././deps/constraints.txt
     #   -c ./base.txt
     #   requests
-zipp==3.21.0
-    # via importlib-resources
diff --git a/requirements/extra-pdf-image.in b/requirements/extra-pdf-image.in
index 332ca01b6..4f3aef930 100644
--- a/requirements/extra-pdf-image.in
+++ b/requirements/extra-pdf-image.in
@@ -1,7 +1,8 @@
 -c ./deps/constraints.txt
 -c base.txt
 
-onnx
+onnx>=1.17.0
+onnxruntime>=1.19.0
 pdf2image
 pdfminer.six
 pikepdf
@@ -11,5 +12,5 @@ google-cloud-vision
 effdet
 # Do not move to constraints.in, otherwise unstructured-inference will not be upgraded
 # when unstructured library is.
-unstructured-inference>=0.8.9
+unstructured-inference>=0.8.10
 unstructured.pytesseract>=0.3.12
diff --git a/requirements/extra-pdf-image.txt b/requirements/extra-pdf-image.txt
index 3e0561c16..0226cee3e 100644
--- a/requirements/extra-pdf-image.txt
+++ b/requirements/extra-pdf-image.txt
@@ -35,7 +35,7 @@ deprecated==1.2.18
     # via pikepdf
 effdet==0.4.1
     # via -r ./extra-pdf-image.in
-filelock==3.17.0
+filelock==3.18.0
     # via
     #   huggingface-hub
     #   torch
@@ -44,30 +44,30 @@ flatbuffers==25.2.10
     # via onnxruntime
 fonttools==4.56.0
     # via matplotlib
-fsspec==2025.2.0
+fsspec==2025.3.0
     # via
     #   huggingface-hub
     #   torch
-google-api-core[grpc]==2.8.0
+google-api-core[grpc]==2.24.2
     # via google-cloud-vision
 google-auth==2.38.0
     # via
     #   google-api-core
     #   google-cloud-vision
-google-cloud-vision==2.7.2
+google-cloud-vision==3.10.1
     # via -r ./extra-pdf-image.in
-googleapis-common-protos==1.56.1
+googleapis-common-protos==1.69.2
     # via
     #   google-api-core
     #   grpcio-status
-grpcio==1.70.0
+grpcio==1.71.0
     # via
     #   -c ././deps/constraints.txt
     #   google-api-core
     #   grpcio-status
 grpcio-status==1.62.3
     # via google-api-core
-huggingface-hub==0.29.2
+huggingface-hub==0.29.3
     # via
     #   timm
     #   tokenizers
@@ -99,7 +99,7 @@ mpmath==1.3.0
     # via sympy
 networkx==3.2.1
     # via torch
-numpy==1.26.4
+numpy==2.0.2
     # via
     #   -c ./base.txt
     #   contourpy
@@ -120,7 +120,9 @@ onnx==1.17.0
     #   -r ./extra-pdf-image.in
     #   unstructured-inference
 onnxruntime==1.19.2
-    # via unstructured-inference
+    # via
+    #   -r ./extra-pdf-image.in
+    #   unstructured-inference
 opencv-python==4.11.0.86
     # via unstructured-inference
 packaging==24.2
@@ -140,7 +142,7 @@ pdfminer-six==20240706
     # via
     #   -r ./extra-pdf-image.in
     #   unstructured-inference
-pi-heif==0.21.0
+pi-heif==0.22.0
     # via -r ./extra-pdf-image.in
 pikepdf==9.5.2
     # via -r ./extra-pdf-image.in
@@ -152,12 +154,15 @@ pillow==11.1.0
     #   pikepdf
     #   torchvision
     #   unstructured-pytesseract
-proto-plus==1.20.4
-    # via google-cloud-vision
-protobuf==6.30.0
+proto-plus==1.26.1
+    # via
+    #   google-api-core
+    #   google-cloud-vision
+protobuf==6.30.1
     # via
     #   -c ././deps/constraints.txt
     #   google-api-core
+    #   google-cloud-vision
     #   googleapis-common-protos
     #   grpcio-status
     #   onnx
@@ -177,7 +182,7 @@ pycparser==2.22
     #   cffi
 pyparsing==3.2.1
     # via matplotlib
-pypdf==5.3.1
+pypdf==5.4.0
     # via
     #   -c ./base.txt
     #   -r ./extra-pdf-image.in
@@ -232,7 +237,7 @@ timm==1.0.15
     # via
     #   effdet
     #   unstructured-inference
-tokenizers==0.21.0
+tokenizers==0.21.1
     # via
     #   -c ././deps/constraints.txt
     #   transformers
@@ -261,7 +266,7 @@ typing-extensions==4.12.2
     #   torch
 tzdata==2025.1
     # via pandas
-unstructured-inference==0.8.9
+unstructured-inference==0.8.10
     # via -r ./extra-pdf-image.in
 unstructured-pytesseract==0.3.15
     # via -r ./extra-pdf-image.in
diff --git a/requirements/extra-xlsx.txt b/requirements/extra-xlsx.txt
index cc1bda37c..895935708 100644
--- a/requirements/extra-xlsx.txt
+++ b/requirements/extra-xlsx.txt
@@ -8,7 +8,7 @@ et-xmlfile==2.0.0
     # via openpyxl
 networkx==3.2.1
     # via -r ./extra-xlsx.in
-numpy==1.26.4
+numpy==2.0.2
     # via
     #   -c ./base.txt
     #   pandas
diff --git a/requirements/huggingface.txt b/requirements/huggingface.txt
index 68b3956c0..829a0448d 100644
--- a/requirements/huggingface.txt
+++ b/requirements/huggingface.txt
@@ -16,16 +16,16 @@ click==8.1.8
     # via
     #   -c ./base.txt
     #   sacremoses
-filelock==3.17.0
+filelock==3.18.0
     # via
     #   huggingface-hub
     #   torch
     #   transformers
-fsspec==2025.2.0
+fsspec==2025.3.0
     # via
     #   huggingface-hub
     #   torch
-huggingface-hub==0.29.2
+huggingface-hub==0.29.3
     # via
     #   tokenizers
     #   transformers
@@ -49,7 +49,7 @@ mpmath==1.3.0
     # via sympy
 networkx==3.2.1
     # via torch
-numpy==1.26.4
+numpy==2.0.2
     # via
     #   -c ./base.txt
     #   transformers
@@ -84,7 +84,7 @@ six==1.17.0
     #   langdetect
 sympy==1.13.1
     # via torch
-tokenizers==0.21.0
+tokenizers==0.21.1
     # via
     #   -c ././deps/constraints.txt
     #   transformers
diff --git a/requirements/test.txt b/requirements/test.txt
index 9853be184..b64b5d52f 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -6,24 +6,18 @@
 #
 annotated-types==0.7.0
     # via pydantic
-anyio==4.8.0
+anyio==4.9.0
     # via
     #   -c ./base.txt
     #   httpx
 appdirs==1.4.4
     # via label-studio-sdk
-argcomplete==3.6.0
-    # via datamodel-code-generator
-attrs==25.1.0
-    # via
-    #   jsonschema
-    #   referencing
+attrs==25.3.0
+    # via jsonschema
 autoflake==2.3.1
     # via -r ./test.in
 black==25.1.0
-    # via
-    #   -r ./test.in
-    #   datamodel-code-generator
+    # via -r ./test.in
 certifi==2025.1.31
     # via
     #   -c ./base.txt
@@ -39,23 +33,15 @@ click==8.1.8
     #   -c ./base.txt
     #   black
     #   nltk
-coverage[toml]==7.6.12
+coverage[toml]==7.7.0
     # via
     #   -r ./test.in
     #   pytest-cov
-datamodel-code-generator==0.26.1
-    # via label-studio-sdk
-dnspython==2.7.0
-    # via email-validator
-email-validator==2.2.0
-    # via pydantic
 exceptiongroup==1.2.2
     # via
     #   -c ./base.txt
     #   anyio
     #   pytest
-faker==36.2.2
-    # via jsf
 flake8==7.1.2
     # via
     #   -r ./test.in
@@ -64,9 +50,7 @@ flake8-print==5.0.0
     # via -r ./test.in
 freezegun==1.5.1
     # via -r ./test.in
-genson==1.3.0
-    # via datamodel-code-generator
-grpcio==1.70.0
+grpcio==1.71.0
     # via
     #   -c ././deps/constraints.txt
     #   -r ./test.in
@@ -86,33 +70,20 @@ idna==3.10
     # via
     #   -c ./base.txt
     #   anyio
-    #   email-validator
     #   httpx
     #   requests
     #   yarl
 ijson==3.3.0
     # via label-studio-sdk
-inflect==5.6.2
-    # via datamodel-code-generator
 iniconfig==2.0.0
     # via pytest
-isort==5.13.2
-    # via datamodel-code-generator
-jinja2==3.1.6
-    # via datamodel-code-generator
 joblib==1.4.2
     # via
     #   -c ./base.txt
     #   nltk
-jsf==0.11.2
+jsonschema==3.2.0
     # via label-studio-sdk
-jsonschema==4.23.0
-    # via
-    #   jsf
-    #   label-studio-sdk
-jsonschema-specifications==2024.10.1
-    # via jsonschema
-label-studio-sdk==1.0.10
+label-studio-sdk==1.0.5
     # via -r ./test.in
 liccheck==0.9.2
     # via -r ./test.in
@@ -120,11 +91,9 @@ lxml==5.3.1
     # via
     #   -c ./base.txt
     #   label-studio-sdk
-markupsafe==3.0.2
-    # via jinja2
 mccabe==0.7.0
     # via flake8
-multidict==6.1.0
+multidict==6.2.0
     # via yarl
 mypy==1.15.0
     # via -r ./test.in
@@ -137,16 +106,14 @@ nltk==3.9.1
     # via
     #   -c ./base.txt
     #   label-studio-sdk
-numpy==1.26.4
+numpy==2.0.2
     # via
     #   -c ./base.txt
-    #   label-studio-sdk
     #   pandas
 packaging==24.2
     # via
     #   -c ./base.txt
     #   black
-    #   datamodel-code-generator
     #   pytest
 pandas==2.2.3
     # via label-studio-sdk
@@ -164,20 +131,18 @@ pycodestyle==2.12.1
     # via
     #   flake8
     #   flake8-print
-pydantic[email]==2.10.6
+pydantic==2.10.6
     # via
     #   -r ./test.in
-    #   datamodel-code-generator
-    #   jsf
     #   label-studio-sdk
 pydantic-core==2.27.2
-    # via
-    #   label-studio-sdk
-    #   pydantic
+    # via pydantic
 pyflakes==3.2.0
     # via
     #   autoflake
     #   flake8
+pyrsistent==0.20.0
+    # via jsonschema
 pytest==8.3.5
     # via
     #   pytest-cov
@@ -194,13 +159,7 @@ python-dateutil==2.9.0.post0
 pytz==2025.1
     # via pandas
 pyyaml==6.0.2
-    # via
-    #   datamodel-code-generator
-    #   vcrpy
-referencing==0.36.2
-    # via
-    #   jsonschema
-    #   jsonschema-specifications
+    # via vcrpy
 regex==2024.11.6
     # via
     #   -c ./base.txt
@@ -210,33 +169,23 @@ requests==2.32.3
     #   -c ./base.txt
     #   label-studio-sdk
     #   requests-mock
-    #   smart-open
 requests-mock==1.12.1
     # via label-studio-sdk
-rpds-py==0.23.1
-    # via
-    #   jsonschema
-    #   referencing
-rstr==3.2.2
-    # via jsf
-ruff==0.9.9
+ruff==0.11.0
     # via -r ./test.in
 semantic-version==2.10.0
     # via liccheck
 six==1.17.0
     # via
     #   -c ./base.txt
+    #   jsonschema
     #   python-dateutil
-smart-open[http]==7.1.0
-    # via jsf
 sniffio==1.3.1
     # via
     #   -c ./base.txt
     #   anyio
 toml==0.10.2
-    # via
-    #   datamodel-code-generator
-    #   liccheck
+    # via liccheck
 tomli==2.2.1
     # via
     #   autoflake
@@ -263,17 +212,13 @@ typing-extensions==4.12.2
     #   -c ./base.txt
     #   anyio
     #   black
-    #   jsf
     #   label-studio-sdk
     #   multidict
     #   mypy
     #   pydantic
     #   pydantic-core
-    #   referencing
 tzdata==2025.1
-    # via
-    #   faker
-    #   pandas
+    # via pandas
 ujson==5.10.0
     # via label-studio-sdk
 urllib3==1.26.20
@@ -287,9 +232,11 @@ vcrpy==7.0.0
 wrapt==1.17.2
     # via
     #   -c ./base.txt
-    #   smart-open
     #   vcrpy
 xmljson==0.2.1
     # via label-studio-sdk
 yarl==1.18.3
     # via vcrpy
+
+# The following packages are considered to be unsafe in a requirements file:
+# setuptools
diff --git a/test_unstructured/partition/test_auto.py b/test_unstructured/partition/test_auto.py
index f29f600b4..27701fcf8 100644
--- a/test_unstructured/partition/test_auto.py
+++ b/test_unstructured/partition/test_auto.py
@@ -1052,7 +1052,23 @@ def test_auto_partition_respects_detect_language_per_element_arg():
 
 
 @pytest.mark.parametrize(
-    "file_extension", "doc docx eml epub html md odt org ppt pptx rst rtf txt xml".split()
+    "file_extension",
+    [
+        "doc",
+        "docx",
+        "eml",
+        "epub",
+        "html",
+        "md",
+        "odt",
+        "org",
+        "ppt",
+        "pptx",
+        "rst",
+        "rtf",
+        "txt",
+        "xml",
+    ],
 )
 def test_auto_partition_respects_language_arg(file_extension: str):
     elements = partition(
diff --git a/unstructured/__version__.py b/unstructured/__version__.py
index 0389ef24a..80fa78aa7 100644
--- a/unstructured/__version__.py
+++ b/unstructured/__version__.py
@@ -1 +1 @@
-__version__ = "0.17.1-dev1"  # pragma: no cover
+__version__ = "0.17.1"  # pragma: no cover