mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
Feat/bump numpy to 2 (#3961)
This PR updates a few dependencies so that they are compatible with `numpy>=2`.
This commit is contained in:
parent
4e424efd22
commit
7de630e45e
@ -1,10 +1,13 @@
|
||||
## 0.17.1-dev1
|
||||
## 0.17.1
|
||||
|
||||
### Enhancements
|
||||
|
||||
- **Add image_url of images in html partitioner** `<img>` tags with non-data content include a new image_url metadata field with the content of the src attribute.
|
||||
|
||||
- **Use `lxml` instead of `bs4` to parse hOCR data.** `lxml` is much faster than `bs4` given the hOCR data format is regular (garanteed because it is programatically generated)
|
||||
|
||||
- **bump `numpy` to `>2`**. And upgrade `paddlepaddle`, `unstructured-paddleocr`, `onnx` so they are compatible with `numpy>2`.
|
||||
|
||||
### Features
|
||||
|
||||
### Fixes
|
||||
|
@ -10,9 +10,7 @@ emoji
|
||||
dataclasses-json
|
||||
python-iso639
|
||||
langdetect
|
||||
# NOTE(robinson) - numpy pin is because ONNX model weights are only compatible
|
||||
# with numpy 1.x.x
|
||||
numpy<2
|
||||
numpy
|
||||
rapidfuzz
|
||||
backoff
|
||||
typing-extensions
|
||||
|
@ -4,7 +4,7 @@
|
||||
#
|
||||
# pip-compile ./base.in
|
||||
#
|
||||
anyio==4.8.0
|
||||
anyio==4.9.0
|
||||
# via httpx
|
||||
backoff==2.2.1
|
||||
# via -r ./base.in
|
||||
@ -34,7 +34,7 @@ dataclasses-json==0.6.7
|
||||
# via
|
||||
# -r ./base.in
|
||||
# unstructured-client
|
||||
deepdiff==8.3.0
|
||||
deepdiff==8.4.2
|
||||
# via unstructured-client
|
||||
emoji==2.14.1
|
||||
# via -r ./base.in
|
||||
@ -76,7 +76,7 @@ nest-asyncio==1.6.0
|
||||
# via unstructured-client
|
||||
nltk==3.9.1
|
||||
# via -r ./base.in
|
||||
numpy==1.26.4
|
||||
numpy==2.0.2
|
||||
# via -r ./base.in
|
||||
olefile==0.47
|
||||
# via python-oxmsg
|
||||
@ -90,7 +90,7 @@ psutil==7.0.0
|
||||
# via -r ./base.in
|
||||
pycparser==2.22
|
||||
# via cffi
|
||||
pypdf==5.3.1
|
||||
pypdf==5.4.0
|
||||
# via unstructured-client
|
||||
python-dateutil==2.9.0.post0
|
||||
# via unstructured-client
|
||||
|
@ -15,9 +15,9 @@ click==8.1.8
|
||||
# pip-tools
|
||||
distlib==0.3.9
|
||||
# via virtualenv
|
||||
filelock==3.17.0
|
||||
filelock==3.18.0
|
||||
# via virtualenv
|
||||
identify==2.6.8
|
||||
identify==2.6.9
|
||||
# via pre-commit
|
||||
importlib-metadata==8.6.1
|
||||
# via
|
||||
|
@ -4,7 +4,7 @@
|
||||
#
|
||||
# pip-compile ./extra-csv.in
|
||||
#
|
||||
numpy==1.26.4
|
||||
numpy==2.0.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# pandas
|
||||
|
@ -1,5 +1,5 @@
|
||||
-c ./deps/constraints.txt
|
||||
-c base.txt
|
||||
|
||||
paddlepaddle==3.0.0b1
|
||||
unstructured.paddleocr==2.8.1.0
|
||||
paddlepaddle>=3.0.0b1
|
||||
unstructured.paddleocr==2.10.0
|
||||
|
@ -4,12 +4,24 @@
|
||||
#
|
||||
# pip-compile ./extra-paddleocr.in
|
||||
#
|
||||
anyio==4.8.0
|
||||
albucore==0.0.23
|
||||
# via
|
||||
# albumentations
|
||||
# unstructured-paddleocr
|
||||
albumentations==2.0.5
|
||||
# via unstructured-paddleocr
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
anyio==4.9.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# httpx
|
||||
astor==0.8.1
|
||||
# via paddlepaddle
|
||||
beautifulsoup4==4.13.3
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# unstructured-paddleocr
|
||||
certifi==2025.1.31
|
||||
# via
|
||||
# -c ./base.txt
|
||||
@ -20,20 +32,20 @@ charset-normalizer==3.4.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# requests
|
||||
contourpy==1.3.0
|
||||
# via matplotlib
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
cython==3.0.12
|
||||
# via unstructured-paddleocr
|
||||
decorator==5.2.1
|
||||
# via paddlepaddle
|
||||
eval-type-backport==0.2.2
|
||||
# via albumentations
|
||||
exceptiongroup==1.2.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# anyio
|
||||
fire==0.7.0
|
||||
# via unstructured-paddleocr
|
||||
fonttools==4.56.0
|
||||
# via matplotlib
|
||||
# via unstructured-paddleocr
|
||||
h11==0.14.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
@ -53,32 +65,26 @@ idna==3.10
|
||||
# httpx
|
||||
# requests
|
||||
imageio==2.37.0
|
||||
# via
|
||||
# imgaug
|
||||
# scikit-image
|
||||
imgaug==0.4.0
|
||||
# via unstructured-paddleocr
|
||||
importlib-resources==6.5.2
|
||||
# via matplotlib
|
||||
kiwisolver==1.4.7
|
||||
# via matplotlib
|
||||
# via scikit-image
|
||||
lazy-loader==0.4
|
||||
# via scikit-image
|
||||
matplotlib==3.9.4
|
||||
# via imgaug
|
||||
lxml==5.3.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# python-docx
|
||||
networkx==3.2.1
|
||||
# via
|
||||
# paddlepaddle
|
||||
# scikit-image
|
||||
numpy==1.26.4
|
||||
numpy==2.0.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# contourpy
|
||||
# albucore
|
||||
# albumentations
|
||||
# imageio
|
||||
# imgaug
|
||||
# matplotlib
|
||||
# opencv-contrib-python
|
||||
# opencv-python
|
||||
# opencv-python-headless
|
||||
# opt-einsum
|
||||
# paddlepaddle
|
||||
# scikit-image
|
||||
@ -89,44 +95,42 @@ numpy==1.26.4
|
||||
opencv-contrib-python==4.11.0.86
|
||||
# via unstructured-paddleocr
|
||||
opencv-python==4.11.0.86
|
||||
# via unstructured-paddleocr
|
||||
opencv-python-headless==4.11.0.86
|
||||
# via
|
||||
# imgaug
|
||||
# unstructured-paddleocr
|
||||
# albucore
|
||||
# albumentations
|
||||
opt-einsum==3.3.0
|
||||
# via paddlepaddle
|
||||
packaging==24.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# lazy-loader
|
||||
# matplotlib
|
||||
# scikit-image
|
||||
paddlepaddle==3.0.0b1
|
||||
paddlepaddle==3.0.0rc1
|
||||
# via -r ./extra-paddleocr.in
|
||||
pdf2image==1.17.0
|
||||
# via unstructured-paddleocr
|
||||
pillow==11.1.0
|
||||
# via
|
||||
# imageio
|
||||
# imgaug
|
||||
# matplotlib
|
||||
# paddlepaddle
|
||||
# pdf2image
|
||||
# scikit-image
|
||||
# unstructured-paddleocr
|
||||
protobuf==6.30.0
|
||||
protobuf==6.30.1
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# paddlepaddle
|
||||
pyclipper==1.3.0.post6
|
||||
# via unstructured-paddleocr
|
||||
pyparsing==3.2.1
|
||||
# via matplotlib
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# matplotlib
|
||||
pyyaml==6.0.2
|
||||
pydantic==2.10.6
|
||||
# via albumentations
|
||||
pydantic-core==2.27.2
|
||||
# via pydantic
|
||||
python-docx==1.1.2
|
||||
# via unstructured-paddleocr
|
||||
pyyaml==6.0.2
|
||||
# via
|
||||
# albumentations
|
||||
# unstructured-paddleocr
|
||||
rapidfuzz==3.12.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
@ -136,26 +140,27 @@ requests==2.32.3
|
||||
# -c ./base.txt
|
||||
# unstructured-paddleocr
|
||||
scikit-image==0.24.0
|
||||
# via
|
||||
# imgaug
|
||||
# unstructured-paddleocr
|
||||
# via unstructured-paddleocr
|
||||
scipy==1.13.1
|
||||
# via
|
||||
# imgaug
|
||||
# albumentations
|
||||
# scikit-image
|
||||
shapely==2.0.7
|
||||
# via
|
||||
# imgaug
|
||||
# unstructured-paddleocr
|
||||
six==1.17.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# imgaug
|
||||
# python-dateutil
|
||||
# via unstructured-paddleocr
|
||||
simsimd==6.2.1
|
||||
# via albucore
|
||||
sniffio==1.3.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# anyio
|
||||
soupsieve==2.6
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# beautifulsoup4
|
||||
stringzilla==3.12.3
|
||||
# via albucore
|
||||
termcolor==2.5.0
|
||||
# via fire
|
||||
tifffile==2024.8.30
|
||||
# via scikit-image
|
||||
tqdm==4.67.1
|
||||
@ -165,14 +170,18 @@ tqdm==4.67.1
|
||||
typing-extensions==4.12.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# albucore
|
||||
# albumentations
|
||||
# anyio
|
||||
# beautifulsoup4
|
||||
# paddlepaddle
|
||||
unstructured-paddleocr==2.8.1.0
|
||||
# pydantic
|
||||
# pydantic-core
|
||||
# python-docx
|
||||
unstructured-paddleocr==2.10.0
|
||||
# via -r ./extra-paddleocr.in
|
||||
urllib3==1.26.20
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# -c ./base.txt
|
||||
# requests
|
||||
zipp==3.21.0
|
||||
# via importlib-resources
|
||||
|
@ -1,7 +1,8 @@
|
||||
-c ./deps/constraints.txt
|
||||
-c base.txt
|
||||
|
||||
onnx
|
||||
onnx>=1.17.0
|
||||
onnxruntime>=1.19.0
|
||||
pdf2image
|
||||
pdfminer.six
|
||||
pikepdf
|
||||
@ -11,5 +12,5 @@ google-cloud-vision
|
||||
effdet
|
||||
# Do not move to constraints.in, otherwise unstructured-inference will not be upgraded
|
||||
# when unstructured library is.
|
||||
unstructured-inference>=0.8.9
|
||||
unstructured-inference>=0.8.10
|
||||
unstructured.pytesseract>=0.3.12
|
||||
|
@ -35,7 +35,7 @@ deprecated==1.2.18
|
||||
# via pikepdf
|
||||
effdet==0.4.1
|
||||
# via -r ./extra-pdf-image.in
|
||||
filelock==3.17.0
|
||||
filelock==3.18.0
|
||||
# via
|
||||
# huggingface-hub
|
||||
# torch
|
||||
@ -44,30 +44,30 @@ flatbuffers==25.2.10
|
||||
# via onnxruntime
|
||||
fonttools==4.56.0
|
||||
# via matplotlib
|
||||
fsspec==2025.2.0
|
||||
fsspec==2025.3.0
|
||||
# via
|
||||
# huggingface-hub
|
||||
# torch
|
||||
google-api-core[grpc]==2.8.0
|
||||
google-api-core[grpc]==2.24.2
|
||||
# via google-cloud-vision
|
||||
google-auth==2.38.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-vision
|
||||
google-cloud-vision==2.7.2
|
||||
google-cloud-vision==3.10.1
|
||||
# via -r ./extra-pdf-image.in
|
||||
googleapis-common-protos==1.56.1
|
||||
googleapis-common-protos==1.69.2
|
||||
# via
|
||||
# google-api-core
|
||||
# grpcio-status
|
||||
grpcio==1.70.0
|
||||
grpcio==1.71.0
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# google-api-core
|
||||
# grpcio-status
|
||||
grpcio-status==1.62.3
|
||||
# via google-api-core
|
||||
huggingface-hub==0.29.2
|
||||
huggingface-hub==0.29.3
|
||||
# via
|
||||
# timm
|
||||
# tokenizers
|
||||
@ -99,7 +99,7 @@ mpmath==1.3.0
|
||||
# via sympy
|
||||
networkx==3.2.1
|
||||
# via torch
|
||||
numpy==1.26.4
|
||||
numpy==2.0.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# contourpy
|
||||
@ -120,7 +120,9 @@ onnx==1.17.0
|
||||
# -r ./extra-pdf-image.in
|
||||
# unstructured-inference
|
||||
onnxruntime==1.19.2
|
||||
# via unstructured-inference
|
||||
# via
|
||||
# -r ./extra-pdf-image.in
|
||||
# unstructured-inference
|
||||
opencv-python==4.11.0.86
|
||||
# via unstructured-inference
|
||||
packaging==24.2
|
||||
@ -140,7 +142,7 @@ pdfminer-six==20240706
|
||||
# via
|
||||
# -r ./extra-pdf-image.in
|
||||
# unstructured-inference
|
||||
pi-heif==0.21.0
|
||||
pi-heif==0.22.0
|
||||
# via -r ./extra-pdf-image.in
|
||||
pikepdf==9.5.2
|
||||
# via -r ./extra-pdf-image.in
|
||||
@ -152,12 +154,15 @@ pillow==11.1.0
|
||||
# pikepdf
|
||||
# torchvision
|
||||
# unstructured-pytesseract
|
||||
proto-plus==1.20.4
|
||||
# via google-cloud-vision
|
||||
protobuf==6.30.0
|
||||
proto-plus==1.26.1
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-vision
|
||||
protobuf==6.30.1
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# google-api-core
|
||||
# google-cloud-vision
|
||||
# googleapis-common-protos
|
||||
# grpcio-status
|
||||
# onnx
|
||||
@ -177,7 +182,7 @@ pycparser==2.22
|
||||
# cffi
|
||||
pyparsing==3.2.1
|
||||
# via matplotlib
|
||||
pypdf==5.3.1
|
||||
pypdf==5.4.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -r ./extra-pdf-image.in
|
||||
@ -232,7 +237,7 @@ timm==1.0.15
|
||||
# via
|
||||
# effdet
|
||||
# unstructured-inference
|
||||
tokenizers==0.21.0
|
||||
tokenizers==0.21.1
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# transformers
|
||||
@ -261,7 +266,7 @@ typing-extensions==4.12.2
|
||||
# torch
|
||||
tzdata==2025.1
|
||||
# via pandas
|
||||
unstructured-inference==0.8.9
|
||||
unstructured-inference==0.8.10
|
||||
# via -r ./extra-pdf-image.in
|
||||
unstructured-pytesseract==0.3.15
|
||||
# via -r ./extra-pdf-image.in
|
||||
|
@ -8,7 +8,7 @@ et-xmlfile==2.0.0
|
||||
# via openpyxl
|
||||
networkx==3.2.1
|
||||
# via -r ./extra-xlsx.in
|
||||
numpy==1.26.4
|
||||
numpy==2.0.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# pandas
|
||||
|
@ -16,16 +16,16 @@ click==8.1.8
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# sacremoses
|
||||
filelock==3.17.0
|
||||
filelock==3.18.0
|
||||
# via
|
||||
# huggingface-hub
|
||||
# torch
|
||||
# transformers
|
||||
fsspec==2025.2.0
|
||||
fsspec==2025.3.0
|
||||
# via
|
||||
# huggingface-hub
|
||||
# torch
|
||||
huggingface-hub==0.29.2
|
||||
huggingface-hub==0.29.3
|
||||
# via
|
||||
# tokenizers
|
||||
# transformers
|
||||
@ -49,7 +49,7 @@ mpmath==1.3.0
|
||||
# via sympy
|
||||
networkx==3.2.1
|
||||
# via torch
|
||||
numpy==1.26.4
|
||||
numpy==2.0.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# transformers
|
||||
@ -84,7 +84,7 @@ six==1.17.0
|
||||
# langdetect
|
||||
sympy==1.13.1
|
||||
# via torch
|
||||
tokenizers==0.21.0
|
||||
tokenizers==0.21.1
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# transformers
|
||||
|
@ -6,24 +6,18 @@
|
||||
#
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
anyio==4.8.0
|
||||
anyio==4.9.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# httpx
|
||||
appdirs==1.4.4
|
||||
# via label-studio-sdk
|
||||
argcomplete==3.6.0
|
||||
# via datamodel-code-generator
|
||||
attrs==25.1.0
|
||||
# via
|
||||
# jsonschema
|
||||
# referencing
|
||||
attrs==25.3.0
|
||||
# via jsonschema
|
||||
autoflake==2.3.1
|
||||
# via -r ./test.in
|
||||
black==25.1.0
|
||||
# via
|
||||
# -r ./test.in
|
||||
# datamodel-code-generator
|
||||
# via -r ./test.in
|
||||
certifi==2025.1.31
|
||||
# via
|
||||
# -c ./base.txt
|
||||
@ -39,23 +33,15 @@ click==8.1.8
|
||||
# -c ./base.txt
|
||||
# black
|
||||
# nltk
|
||||
coverage[toml]==7.6.12
|
||||
coverage[toml]==7.7.0
|
||||
# via
|
||||
# -r ./test.in
|
||||
# pytest-cov
|
||||
datamodel-code-generator==0.26.1
|
||||
# via label-studio-sdk
|
||||
dnspython==2.7.0
|
||||
# via email-validator
|
||||
email-validator==2.2.0
|
||||
# via pydantic
|
||||
exceptiongroup==1.2.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# anyio
|
||||
# pytest
|
||||
faker==36.2.2
|
||||
# via jsf
|
||||
flake8==7.1.2
|
||||
# via
|
||||
# -r ./test.in
|
||||
@ -64,9 +50,7 @@ flake8-print==5.0.0
|
||||
# via -r ./test.in
|
||||
freezegun==1.5.1
|
||||
# via -r ./test.in
|
||||
genson==1.3.0
|
||||
# via datamodel-code-generator
|
||||
grpcio==1.70.0
|
||||
grpcio==1.71.0
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# -r ./test.in
|
||||
@ -86,33 +70,20 @@ idna==3.10
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# anyio
|
||||
# email-validator
|
||||
# httpx
|
||||
# requests
|
||||
# yarl
|
||||
ijson==3.3.0
|
||||
# via label-studio-sdk
|
||||
inflect==5.6.2
|
||||
# via datamodel-code-generator
|
||||
iniconfig==2.0.0
|
||||
# via pytest
|
||||
isort==5.13.2
|
||||
# via datamodel-code-generator
|
||||
jinja2==3.1.6
|
||||
# via datamodel-code-generator
|
||||
joblib==1.4.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# nltk
|
||||
jsf==0.11.2
|
||||
jsonschema==3.2.0
|
||||
# via label-studio-sdk
|
||||
jsonschema==4.23.0
|
||||
# via
|
||||
# jsf
|
||||
# label-studio-sdk
|
||||
jsonschema-specifications==2024.10.1
|
||||
# via jsonschema
|
||||
label-studio-sdk==1.0.10
|
||||
label-studio-sdk==1.0.5
|
||||
# via -r ./test.in
|
||||
liccheck==0.9.2
|
||||
# via -r ./test.in
|
||||
@ -120,11 +91,9 @@ lxml==5.3.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# label-studio-sdk
|
||||
markupsafe==3.0.2
|
||||
# via jinja2
|
||||
mccabe==0.7.0
|
||||
# via flake8
|
||||
multidict==6.1.0
|
||||
multidict==6.2.0
|
||||
# via yarl
|
||||
mypy==1.15.0
|
||||
# via -r ./test.in
|
||||
@ -137,16 +106,14 @@ nltk==3.9.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# label-studio-sdk
|
||||
numpy==1.26.4
|
||||
numpy==2.0.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# label-studio-sdk
|
||||
# pandas
|
||||
packaging==24.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# black
|
||||
# datamodel-code-generator
|
||||
# pytest
|
||||
pandas==2.2.3
|
||||
# via label-studio-sdk
|
||||
@ -164,20 +131,18 @@ pycodestyle==2.12.1
|
||||
# via
|
||||
# flake8
|
||||
# flake8-print
|
||||
pydantic[email]==2.10.6
|
||||
pydantic==2.10.6
|
||||
# via
|
||||
# -r ./test.in
|
||||
# datamodel-code-generator
|
||||
# jsf
|
||||
# label-studio-sdk
|
||||
pydantic-core==2.27.2
|
||||
# via
|
||||
# label-studio-sdk
|
||||
# pydantic
|
||||
# via pydantic
|
||||
pyflakes==3.2.0
|
||||
# via
|
||||
# autoflake
|
||||
# flake8
|
||||
pyrsistent==0.20.0
|
||||
# via jsonschema
|
||||
pytest==8.3.5
|
||||
# via
|
||||
# pytest-cov
|
||||
@ -194,13 +159,7 @@ python-dateutil==2.9.0.post0
|
||||
pytz==2025.1
|
||||
# via pandas
|
||||
pyyaml==6.0.2
|
||||
# via
|
||||
# datamodel-code-generator
|
||||
# vcrpy
|
||||
referencing==0.36.2
|
||||
# via
|
||||
# jsonschema
|
||||
# jsonschema-specifications
|
||||
# via vcrpy
|
||||
regex==2024.11.6
|
||||
# via
|
||||
# -c ./base.txt
|
||||
@ -210,33 +169,23 @@ requests==2.32.3
|
||||
# -c ./base.txt
|
||||
# label-studio-sdk
|
||||
# requests-mock
|
||||
# smart-open
|
||||
requests-mock==1.12.1
|
||||
# via label-studio-sdk
|
||||
rpds-py==0.23.1
|
||||
# via
|
||||
# jsonschema
|
||||
# referencing
|
||||
rstr==3.2.2
|
||||
# via jsf
|
||||
ruff==0.9.9
|
||||
ruff==0.11.0
|
||||
# via -r ./test.in
|
||||
semantic-version==2.10.0
|
||||
# via liccheck
|
||||
six==1.17.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# jsonschema
|
||||
# python-dateutil
|
||||
smart-open[http]==7.1.0
|
||||
# via jsf
|
||||
sniffio==1.3.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# anyio
|
||||
toml==0.10.2
|
||||
# via
|
||||
# datamodel-code-generator
|
||||
# liccheck
|
||||
# via liccheck
|
||||
tomli==2.2.1
|
||||
# via
|
||||
# autoflake
|
||||
@ -263,17 +212,13 @@ typing-extensions==4.12.2
|
||||
# -c ./base.txt
|
||||
# anyio
|
||||
# black
|
||||
# jsf
|
||||
# label-studio-sdk
|
||||
# multidict
|
||||
# mypy
|
||||
# pydantic
|
||||
# pydantic-core
|
||||
# referencing
|
||||
tzdata==2025.1
|
||||
# via
|
||||
# faker
|
||||
# pandas
|
||||
# via pandas
|
||||
ujson==5.10.0
|
||||
# via label-studio-sdk
|
||||
urllib3==1.26.20
|
||||
@ -287,9 +232,11 @@ vcrpy==7.0.0
|
||||
wrapt==1.17.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# smart-open
|
||||
# vcrpy
|
||||
xmljson==0.2.1
|
||||
# via label-studio-sdk
|
||||
yarl==1.18.3
|
||||
# via vcrpy
|
||||
|
||||
# The following packages are considered to be unsafe in a requirements file:
|
||||
# setuptools
|
||||
|
@ -1052,7 +1052,23 @@ def test_auto_partition_respects_detect_language_per_element_arg():
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"file_extension", "doc docx eml epub html md odt org ppt pptx rst rtf txt xml".split()
|
||||
"file_extension",
|
||||
[
|
||||
"doc",
|
||||
"docx",
|
||||
"eml",
|
||||
"epub",
|
||||
"html",
|
||||
"md",
|
||||
"odt",
|
||||
"org",
|
||||
"ppt",
|
||||
"pptx",
|
||||
"rst",
|
||||
"rtf",
|
||||
"txt",
|
||||
"xml",
|
||||
],
|
||||
)
|
||||
def test_auto_partition_respects_language_arg(file_extension: str):
|
||||
elements = partition(
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "0.17.1-dev1" # pragma: no cover
|
||||
__version__ = "0.17.1" # pragma: no cover
|
||||
|
Loading…
x
Reference in New Issue
Block a user