mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-30 09:09:53 +00:00
Luke/CVE bump (#3928)
bumping dependancies and updated the tokenizer constraint
This commit is contained in:
parent
3403db1ad4
commit
147add9a04
10
CHANGELOG.md
10
CHANGELOG.md
@ -1,3 +1,13 @@
|
||||
## 0.16.22
|
||||
|
||||
### Enhancements
|
||||
|
||||
### Features
|
||||
|
||||
### Fixes
|
||||
|
||||
- **Fix open CVES in and bump dependencies
|
||||
|
||||
## 0.16.21
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -27,4 +27,4 @@ clean: clean-base
|
||||
|
||||
.PHONY: clean-base
|
||||
clean-base:
|
||||
rm $(BASE_REQUIREMENTSTXT)
|
||||
rm $(BASE_REQUIREMENTSTXT)
|
||||
|
||||
@ -2,15 +2,15 @@
|
||||
# This file is autogenerated by pip-compile with Python 3.9
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile ./base.in
|
||||
# pip-compile base.in
|
||||
#
|
||||
anyio==4.8.0
|
||||
# via httpx
|
||||
backoff==2.2.1
|
||||
# via -r ./base.in
|
||||
beautifulsoup4==4.12.3
|
||||
# via -r ./base.in
|
||||
certifi==2024.12.14
|
||||
# via -r base.in
|
||||
beautifulsoup4==4.13.3
|
||||
# via -r base.in
|
||||
certifi==2025.1.31
|
||||
# via
|
||||
# httpcore
|
||||
# httpx
|
||||
@ -19,7 +19,7 @@ certifi==2024.12.14
|
||||
cffi==1.17.1
|
||||
# via cryptography
|
||||
chardet==5.2.0
|
||||
# via -r ./base.in
|
||||
# via -r base.in
|
||||
charset-normalizer==3.4.1
|
||||
# via
|
||||
# requests
|
||||
@ -28,24 +28,24 @@ click==8.1.8
|
||||
# via
|
||||
# nltk
|
||||
# python-oxmsg
|
||||
cryptography==44.0.0
|
||||
cryptography==44.0.1
|
||||
# via unstructured-client
|
||||
dataclasses-json==0.6.7
|
||||
# via
|
||||
# -r ./base.in
|
||||
# -r base.in
|
||||
# unstructured-client
|
||||
deepdiff==8.1.1
|
||||
deepdiff==8.2.0
|
||||
# via unstructured-client
|
||||
emoji==2.14.1
|
||||
# via -r ./base.in
|
||||
# via -r base.in
|
||||
exceptiongroup==1.2.2
|
||||
# via anyio
|
||||
filetype==1.2.0
|
||||
# via -r ./base.in
|
||||
# via -r base.in
|
||||
h11==0.14.0
|
||||
# via httpcore
|
||||
html5lib==1.1
|
||||
# via -r ./base.in
|
||||
# via -r base.in
|
||||
httpcore==1.0.7
|
||||
# via httpx
|
||||
httpx==0.28.1
|
||||
@ -61,10 +61,10 @@ joblib==1.4.2
|
||||
jsonpath-python==1.0.6
|
||||
# via unstructured-client
|
||||
langdetect==1.0.9
|
||||
# via -r ./base.in
|
||||
lxml==5.3.0
|
||||
# via -r ./base.in
|
||||
marshmallow==3.26.0
|
||||
# via -r base.in
|
||||
lxml==5.3.1
|
||||
# via -r base.in
|
||||
marshmallow==3.26.1
|
||||
# via
|
||||
# dataclasses-json
|
||||
# unstructured-client
|
||||
@ -75,38 +75,38 @@ mypy-extensions==1.0.0
|
||||
nest-asyncio==1.6.0
|
||||
# via unstructured-client
|
||||
nltk==3.9.1
|
||||
# via -r ./base.in
|
||||
# via -r base.in
|
||||
numpy==1.26.4
|
||||
# via -r ./base.in
|
||||
# via -r base.in
|
||||
olefile==0.47
|
||||
# via python-oxmsg
|
||||
orderly-set==5.2.3
|
||||
orderly-set==5.3.0
|
||||
# via deepdiff
|
||||
packaging==24.2
|
||||
# via
|
||||
# marshmallow
|
||||
# unstructured-client
|
||||
psutil==6.1.1
|
||||
# via -r ./base.in
|
||||
psutil==7.0.0
|
||||
# via -r base.in
|
||||
pycparser==2.22
|
||||
# via cffi
|
||||
pypdf==5.2.0
|
||||
pypdf==5.3.0
|
||||
# via unstructured-client
|
||||
python-dateutil==2.9.0.post0
|
||||
# via unstructured-client
|
||||
python-iso639==2025.1.28
|
||||
# via -r ./base.in
|
||||
python-iso639==2025.2.18
|
||||
# via -r base.in
|
||||
python-magic==0.4.27
|
||||
# via -r ./base.in
|
||||
python-oxmsg==0.0.1
|
||||
# via -r ./base.in
|
||||
rapidfuzz==3.11.0
|
||||
# via -r ./base.in
|
||||
# via -r base.in
|
||||
python-oxmsg==0.0.2
|
||||
# via -r base.in
|
||||
rapidfuzz==3.12.1
|
||||
# via -r base.in
|
||||
regex==2024.11.6
|
||||
# via nltk
|
||||
requests==2.32.3
|
||||
# via
|
||||
# -r ./base.in
|
||||
# -r base.in
|
||||
# requests-toolbelt
|
||||
# unstructured-client
|
||||
requests-toolbelt==1.0.0
|
||||
@ -123,12 +123,13 @@ soupsieve==2.6
|
||||
# via beautifulsoup4
|
||||
tqdm==4.67.1
|
||||
# via
|
||||
# -r ./base.in
|
||||
# -r base.in
|
||||
# nltk
|
||||
typing-extensions==4.12.2
|
||||
# via
|
||||
# -r ./base.in
|
||||
# -r base.in
|
||||
# anyio
|
||||
# beautifulsoup4
|
||||
# pypdf
|
||||
# python-oxmsg
|
||||
# typing-inspect
|
||||
@ -139,14 +140,14 @@ typing-inspect==0.9.0
|
||||
# unstructured-client
|
||||
unstructured-client==0.25.9
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# -r ./base.in
|
||||
# -c ./deps/constraints.txt
|
||||
# -r base.in
|
||||
urllib3==1.26.20
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# -c ./deps/constraints.txt
|
||||
# requests
|
||||
# unstructured-client
|
||||
webencodings==0.5.1
|
||||
# via html5lib
|
||||
wrapt==1.17.2
|
||||
# via -r ./base.in
|
||||
# via -r base.in
|
||||
|
||||
@ -8,7 +8,7 @@ weaviate-client>=3.26.7,<4.0.0
|
||||
# TODO: Constriant due to multiple versions being installed during pip-compile
|
||||
grpcio>=1.65.5
|
||||
# TODO: Pinned in transformers package, remove when that gets updated (https://github.com/huggingface/transformers/blob/main/setup.py)
|
||||
tokenizers>=0.19,<0.20
|
||||
tokenizers>=0.21,<0.22
|
||||
# TODO: Constaint due to boto, with python before 3.10 not requiring openssl 1.1.1, remove when that gets
|
||||
# updated or we drop support for 3.9
|
||||
urllib3<1.27
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# This file is autogenerated by pip-compile with Python 3.9
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile ./dev.in
|
||||
# pip-compile dev.in
|
||||
#
|
||||
build==1.2.2.post1
|
||||
# via pip-tools
|
||||
@ -10,48 +10,48 @@ cfgv==3.4.0
|
||||
# via pre-commit
|
||||
click==8.1.8
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c ./test.txt
|
||||
# -c base.txt
|
||||
# -c test.txt
|
||||
# pip-tools
|
||||
distlib==0.3.9
|
||||
# via virtualenv
|
||||
filelock==3.17.0
|
||||
# via virtualenv
|
||||
identify==2.6.6
|
||||
identify==2.6.7
|
||||
# via pre-commit
|
||||
importlib-metadata==8.6.1
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# -c ./deps/constraints.txt
|
||||
# build
|
||||
nodeenv==1.9.1
|
||||
# via pre-commit
|
||||
packaging==24.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c ./test.txt
|
||||
# -c base.txt
|
||||
# -c test.txt
|
||||
# build
|
||||
pip-tools==7.4.1
|
||||
# via -r ./dev.in
|
||||
# via -r dev.in
|
||||
platformdirs==4.3.6
|
||||
# via
|
||||
# -c ./test.txt
|
||||
# -c test.txt
|
||||
# virtualenv
|
||||
pre-commit==4.1.0
|
||||
# via -r ./dev.in
|
||||
# via -r dev.in
|
||||
pyproject-hooks==1.2.0
|
||||
# via
|
||||
# build
|
||||
# pip-tools
|
||||
pyyaml==6.0.2
|
||||
# via
|
||||
# -c ./test.txt
|
||||
# -c test.txt
|
||||
# pre-commit
|
||||
tomli==2.2.1
|
||||
# via
|
||||
# -c ./test.txt
|
||||
# -c test.txt
|
||||
# build
|
||||
# pip-tools
|
||||
virtualenv==20.29.1
|
||||
virtualenv==20.29.2
|
||||
# via pre-commit
|
||||
wheel==0.45.1
|
||||
# via pip-tools
|
||||
|
||||
@ -2,23 +2,23 @@
|
||||
# This file is autogenerated by pip-compile with Python 3.9
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile ./extra-csv.in
|
||||
# pip-compile extra-csv.in
|
||||
#
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# pandas
|
||||
pandas==2.2.3
|
||||
# via -r ./extra-csv.in
|
||||
# via -r extra-csv.in
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# pandas
|
||||
pytz==2024.2
|
||||
pytz==2025.1
|
||||
# via pandas
|
||||
six==1.17.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# python-dateutil
|
||||
tzdata==2025.1
|
||||
# via pandas
|
||||
|
||||
@ -2,15 +2,15 @@
|
||||
# This file is autogenerated by pip-compile with Python 3.9
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile ./extra-docx.in
|
||||
# pip-compile extra-docx.in
|
||||
#
|
||||
lxml==5.3.0
|
||||
lxml==5.3.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# python-docx
|
||||
python-docx==1.1.2
|
||||
# via -r ./extra-docx.in
|
||||
# via -r extra-docx.in
|
||||
typing-extensions==4.12.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# python-docx
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# This file is autogenerated by pip-compile with Python 3.9
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile ./extra-epub.in
|
||||
# pip-compile extra-epub.in
|
||||
#
|
||||
pypandoc==1.15
|
||||
# via -r ./extra-epub.in
|
||||
# via -r extra-epub.in
|
||||
|
||||
@ -2,13 +2,13 @@
|
||||
# This file is autogenerated by pip-compile with Python 3.9
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile ./extra-markdown.in
|
||||
# pip-compile extra-markdown.in
|
||||
#
|
||||
importlib-metadata==8.6.1
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# -c ./deps/constraints.txt
|
||||
# markdown
|
||||
markdown==3.7
|
||||
# via -r ./extra-markdown.in
|
||||
# via -r extra-markdown.in
|
||||
zipp==3.21.0
|
||||
# via importlib-metadata
|
||||
|
||||
@ -2,17 +2,17 @@
|
||||
# This file is autogenerated by pip-compile with Python 3.9
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile ./extra-odt.in
|
||||
# pip-compile extra-odt.in
|
||||
#
|
||||
lxml==5.3.0
|
||||
lxml==5.3.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# python-docx
|
||||
pypandoc==1.15
|
||||
# via -r ./extra-odt.in
|
||||
# via -r extra-odt.in
|
||||
python-docx==1.1.2
|
||||
# via -r ./extra-odt.in
|
||||
# via -r extra-odt.in
|
||||
typing-extensions==4.12.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# python-docx
|
||||
|
||||
@ -2,53 +2,53 @@
|
||||
# This file is autogenerated by pip-compile with Python 3.9
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile ./extra-paddleocr.in
|
||||
# pip-compile extra-paddleocr.in
|
||||
#
|
||||
anyio==4.8.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# httpx
|
||||
astor==0.8.1
|
||||
# via paddlepaddle
|
||||
certifi==2024.12.14
|
||||
certifi==2025.1.31
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# httpcore
|
||||
# httpx
|
||||
# requests
|
||||
charset-normalizer==3.4.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# requests
|
||||
contourpy==1.3.0
|
||||
# via matplotlib
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
cython==3.0.11
|
||||
cython==3.0.12
|
||||
# via unstructured-paddleocr
|
||||
decorator==5.1.1
|
||||
# via paddlepaddle
|
||||
exceptiongroup==1.2.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# anyio
|
||||
fonttools==4.55.8
|
||||
fonttools==4.56.0
|
||||
# via matplotlib
|
||||
h11==0.14.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# httpcore
|
||||
httpcore==1.0.7
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# httpx
|
||||
httpx==0.28.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# paddlepaddle
|
||||
idna==3.10
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# anyio
|
||||
# httpx
|
||||
# requests
|
||||
@ -72,7 +72,7 @@ networkx==3.2.1
|
||||
# scikit-image
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# contourpy
|
||||
# imageio
|
||||
# imgaug
|
||||
@ -96,12 +96,12 @@ opt-einsum==3.3.0
|
||||
# via paddlepaddle
|
||||
packaging==24.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# lazy-loader
|
||||
# matplotlib
|
||||
# scikit-image
|
||||
paddlepaddle==3.0.0b1
|
||||
# via -r ./extra-paddleocr.in
|
||||
# via -r extra-paddleocr.in
|
||||
pdf2image==1.17.0
|
||||
# via unstructured-paddleocr
|
||||
pillow==11.1.0
|
||||
@ -121,17 +121,17 @@ pyparsing==3.2.1
|
||||
# via matplotlib
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# matplotlib
|
||||
pyyaml==6.0.2
|
||||
# via unstructured-paddleocr
|
||||
rapidfuzz==3.11.0
|
||||
rapidfuzz==3.12.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# unstructured-paddleocr
|
||||
requests==2.32.3
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# unstructured-paddleocr
|
||||
scikit-image==0.24.0
|
||||
# via
|
||||
@ -141,36 +141,36 @@ scipy==1.13.1
|
||||
# via
|
||||
# imgaug
|
||||
# scikit-image
|
||||
shapely==2.0.6
|
||||
shapely==2.0.7
|
||||
# via
|
||||
# imgaug
|
||||
# unstructured-paddleocr
|
||||
six==1.17.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# imgaug
|
||||
# python-dateutil
|
||||
sniffio==1.3.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# anyio
|
||||
tifffile==2024.8.30
|
||||
# via scikit-image
|
||||
tqdm==4.67.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# unstructured-paddleocr
|
||||
typing-extensions==4.12.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# anyio
|
||||
# paddlepaddle
|
||||
unstructured-paddleocr==2.8.1.0
|
||||
# via -r ./extra-paddleocr.in
|
||||
# via -r extra-paddleocr.in
|
||||
urllib3==1.26.20
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# -c ./base.txt
|
||||
# -c ./deps/constraints.txt
|
||||
# -c base.txt
|
||||
# requests
|
||||
zipp==3.21.0
|
||||
# via importlib-resources
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# This file is autogenerated by pip-compile with Python 3.9
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile ./extra-pandoc.in
|
||||
# pip-compile extra-pandoc.in
|
||||
#
|
||||
pypandoc==1.15
|
||||
# via -r ./extra-pandoc.in
|
||||
# via -r extra-pandoc.in
|
||||
|
||||
@ -2,49 +2,49 @@
|
||||
# This file is autogenerated by pip-compile with Python 3.9
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile ./extra-pdf-image.in
|
||||
# pip-compile extra-pdf-image.in
|
||||
#
|
||||
antlr4-python3-runtime==4.9.3
|
||||
# via omegaconf
|
||||
cachetools==5.5.1
|
||||
# via google-auth
|
||||
certifi==2024.12.14
|
||||
certifi==2025.1.31
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# requests
|
||||
cffi==1.17.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# cryptography
|
||||
charset-normalizer==3.4.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# pdfminer-six
|
||||
# requests
|
||||
coloredlogs==15.0.1
|
||||
# via onnxruntime
|
||||
contourpy==1.3.0
|
||||
# via matplotlib
|
||||
cryptography==44.0.0
|
||||
cryptography==44.0.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# pdfminer-six
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
deprecated==1.2.18
|
||||
# via pikepdf
|
||||
effdet==0.4.1
|
||||
# via -r ./extra-pdf-image.in
|
||||
# via -r extra-pdf-image.in
|
||||
filelock==3.17.0
|
||||
# via
|
||||
# huggingface-hub
|
||||
# torch
|
||||
# transformers
|
||||
flatbuffers==25.1.24
|
||||
flatbuffers==25.2.10
|
||||
# via onnxruntime
|
||||
fonttools==4.55.8
|
||||
fonttools==4.56.0
|
||||
# via matplotlib
|
||||
fsspec==2024.12.0
|
||||
fsspec==2025.2.0
|
||||
# via
|
||||
# huggingface-hub
|
||||
# torch
|
||||
@ -54,20 +54,20 @@ google-auth==2.38.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-vision
|
||||
google-cloud-vision==3.9.0
|
||||
# via -r ./extra-pdf-image.in
|
||||
googleapis-common-protos==1.66.0
|
||||
google-cloud-vision==3.10.0
|
||||
# via -r extra-pdf-image.in
|
||||
googleapis-common-protos==1.67.0
|
||||
# via
|
||||
# google-api-core
|
||||
# grpcio-status
|
||||
grpcio==1.70.0
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# -c ./deps/constraints.txt
|
||||
# google-api-core
|
||||
# grpcio-status
|
||||
grpcio-status==1.70.0
|
||||
# via google-api-core
|
||||
huggingface-hub==0.28.0
|
||||
huggingface-hub==0.28.1
|
||||
# via
|
||||
# timm
|
||||
# tokenizers
|
||||
@ -77,7 +77,7 @@ humanfriendly==10.0
|
||||
# via coloredlogs
|
||||
idna==3.10
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# requests
|
||||
importlib-resources==6.5.2
|
||||
# via matplotlib
|
||||
@ -85,9 +85,9 @@ jinja2==3.1.5
|
||||
# via torch
|
||||
kiwisolver==1.4.7
|
||||
# via matplotlib
|
||||
lxml==5.3.0
|
||||
lxml==5.3.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# pikepdf
|
||||
markupsafe==3.0.2
|
||||
# via jinja2
|
||||
@ -101,7 +101,7 @@ networkx==3.2.1
|
||||
# via torch
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# contourpy
|
||||
# matplotlib
|
||||
# onnx
|
||||
@ -117,7 +117,7 @@ omegaconf==2.3.0
|
||||
# via effdet
|
||||
onnx==1.17.0
|
||||
# via
|
||||
# -r ./extra-pdf-image.in
|
||||
# -r extra-pdf-image.in
|
||||
# unstructured-inference
|
||||
onnxruntime==1.19.2
|
||||
# via unstructured-inference
|
||||
@ -125,7 +125,7 @@ opencv-python==4.11.0.86
|
||||
# via unstructured-inference
|
||||
packaging==24.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# huggingface-hub
|
||||
# matplotlib
|
||||
# onnxruntime
|
||||
@ -135,15 +135,15 @@ packaging==24.2
|
||||
pandas==2.2.3
|
||||
# via unstructured-inference
|
||||
pdf2image==1.17.0
|
||||
# via -r ./extra-pdf-image.in
|
||||
# via -r extra-pdf-image.in
|
||||
pdfminer-six==20240706
|
||||
# via
|
||||
# -r ./extra-pdf-image.in
|
||||
# -r extra-pdf-image.in
|
||||
# unstructured-inference
|
||||
pi-heif==0.21.0
|
||||
# via -r ./extra-pdf-image.in
|
||||
pikepdf==9.5.1
|
||||
# via -r ./extra-pdf-image.in
|
||||
# via -r extra-pdf-image.in
|
||||
pikepdf==9.5.2
|
||||
# via -r extra-pdf-image.in
|
||||
pillow==11.1.0
|
||||
# via
|
||||
# matplotlib
|
||||
@ -175,24 +175,24 @@ pycocotools==2.0.8
|
||||
# via effdet
|
||||
pycparser==2.22
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# cffi
|
||||
pyparsing==3.2.1
|
||||
# via matplotlib
|
||||
pypdf==5.2.0
|
||||
pypdf==5.3.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -r ./extra-pdf-image.in
|
||||
# -c base.txt
|
||||
# -r extra-pdf-image.in
|
||||
pypdfium2==4.30.1
|
||||
# via unstructured-inference
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# matplotlib
|
||||
# pandas
|
||||
python-multipart==0.0.20
|
||||
# via unstructured-inference
|
||||
pytz==2024.2
|
||||
pytz==2025.1
|
||||
# via pandas
|
||||
pyyaml==6.0.2
|
||||
# via
|
||||
@ -200,17 +200,17 @@ pyyaml==6.0.2
|
||||
# omegaconf
|
||||
# timm
|
||||
# transformers
|
||||
rapidfuzz==3.11.0
|
||||
rapidfuzz==3.12.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# unstructured-inference
|
||||
regex==2024.11.6
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# transformers
|
||||
requests==2.32.3
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# google-api-core
|
||||
# huggingface-hub
|
||||
# transformers
|
||||
@ -224,7 +224,7 @@ scipy==1.13.1
|
||||
# via unstructured-inference
|
||||
six==1.17.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# python-dateutil
|
||||
sympy==1.13.1
|
||||
# via
|
||||
@ -234,9 +234,9 @@ timm==1.0.14
|
||||
# via
|
||||
# effdet
|
||||
# unstructured-inference
|
||||
tokenizers==0.19.1
|
||||
tokenizers==0.21.0
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# -c ./deps/constraints.txt
|
||||
# transformers
|
||||
torch==2.6.0
|
||||
# via
|
||||
@ -250,31 +250,31 @@ torchvision==0.21.0
|
||||
# timm
|
||||
tqdm==4.67.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# huggingface-hub
|
||||
# transformers
|
||||
transformers==4.44.2
|
||||
transformers==4.49.0
|
||||
# via unstructured-inference
|
||||
typing-extensions==4.12.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# huggingface-hub
|
||||
# pypdf
|
||||
# torch
|
||||
tzdata==2025.1
|
||||
# via pandas
|
||||
unstructured-inference==0.8.7
|
||||
# via -r ./extra-pdf-image.in
|
||||
# via -r extra-pdf-image.in
|
||||
unstructured-pytesseract==0.3.13
|
||||
# via -r ./extra-pdf-image.in
|
||||
# via -r extra-pdf-image.in
|
||||
urllib3==1.26.20
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# -c ./base.txt
|
||||
# -c ./deps/constraints.txt
|
||||
# -c base.txt
|
||||
# requests
|
||||
wrapt==1.17.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# deprecated
|
||||
zipp==3.21.0
|
||||
# via importlib-resources
|
||||
|
||||
@ -2,14 +2,14 @@
|
||||
# This file is autogenerated by pip-compile with Python 3.9
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile ./extra-pptx.in
|
||||
# pip-compile extra-pptx.in
|
||||
#
|
||||
lxml==5.3.0
|
||||
lxml==5.3.1
|
||||
# via python-pptx
|
||||
pillow==11.1.0
|
||||
# via python-pptx
|
||||
python-pptx==1.0.2
|
||||
# via -r ./extra-pptx.in
|
||||
# via -r extra-pptx.in
|
||||
typing-extensions==4.12.2
|
||||
# via python-pptx
|
||||
xlsxwriter==3.2.2
|
||||
|
||||
@ -2,31 +2,31 @@
|
||||
# This file is autogenerated by pip-compile with Python 3.9
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile ./extra-xlsx.in
|
||||
# pip-compile extra-xlsx.in
|
||||
#
|
||||
et-xmlfile==2.0.0
|
||||
# via openpyxl
|
||||
networkx==3.2.1
|
||||
# via -r ./extra-xlsx.in
|
||||
# via -r extra-xlsx.in
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# pandas
|
||||
openpyxl==3.1.5
|
||||
# via -r ./extra-xlsx.in
|
||||
# via -r extra-xlsx.in
|
||||
pandas==2.2.3
|
||||
# via -r ./extra-xlsx.in
|
||||
# via -r extra-xlsx.in
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# pandas
|
||||
pytz==2024.2
|
||||
pytz==2025.1
|
||||
# via pandas
|
||||
six==1.17.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# python-dateutil
|
||||
tzdata==2025.1
|
||||
# via pandas
|
||||
xlrd==2.0.1
|
||||
# via -r ./extra-xlsx.in
|
||||
# via -r extra-xlsx.in
|
||||
|
||||
@ -2,47 +2,47 @@
|
||||
# This file is autogenerated by pip-compile with Python 3.9
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile ./huggingface.in
|
||||
# pip-compile huggingface.in
|
||||
#
|
||||
certifi==2024.12.14
|
||||
certifi==2025.1.31
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# requests
|
||||
charset-normalizer==3.4.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# requests
|
||||
click==8.1.8
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# sacremoses
|
||||
filelock==3.17.0
|
||||
# via
|
||||
# huggingface-hub
|
||||
# torch
|
||||
# transformers
|
||||
fsspec==2024.12.0
|
||||
fsspec==2025.2.0
|
||||
# via
|
||||
# huggingface-hub
|
||||
# torch
|
||||
huggingface-hub==0.28.0
|
||||
huggingface-hub==0.28.1
|
||||
# via
|
||||
# tokenizers
|
||||
# transformers
|
||||
idna==3.10
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# requests
|
||||
jinja2==3.1.5
|
||||
# via torch
|
||||
joblib==1.4.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# sacremoses
|
||||
langdetect==1.0.9
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -r ./huggingface.in
|
||||
# -c base.txt
|
||||
# -r huggingface.in
|
||||
markupsafe==3.0.2
|
||||
# via jinja2
|
||||
mpmath==1.3.0
|
||||
@ -51,11 +51,11 @@ networkx==3.2.1
|
||||
# via torch
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# transformers
|
||||
packaging==24.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# huggingface-hub
|
||||
# transformers
|
||||
pyyaml==6.0.2
|
||||
@ -64,47 +64,47 @@ pyyaml==6.0.2
|
||||
# transformers
|
||||
regex==2024.11.6
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# sacremoses
|
||||
# transformers
|
||||
requests==2.32.3
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# huggingface-hub
|
||||
# transformers
|
||||
sacremoses==0.1.1
|
||||
# via -r ./huggingface.in
|
||||
# via -r huggingface.in
|
||||
safetensors==0.5.2
|
||||
# via transformers
|
||||
sentencepiece==0.2.0
|
||||
# via -r ./huggingface.in
|
||||
# via -r huggingface.in
|
||||
six==1.17.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# langdetect
|
||||
sympy==1.13.1
|
||||
# via torch
|
||||
tokenizers==0.19.1
|
||||
tokenizers==0.21.0
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# -c ./deps/constraints.txt
|
||||
# transformers
|
||||
torch==2.6.0
|
||||
# via -r ./huggingface.in
|
||||
# via -r huggingface.in
|
||||
tqdm==4.67.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# huggingface-hub
|
||||
# sacremoses
|
||||
# transformers
|
||||
transformers==4.44.2
|
||||
# via -r ./huggingface.in
|
||||
transformers==4.49.0
|
||||
# via -r huggingface.in
|
||||
typing-extensions==4.12.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# huggingface-hub
|
||||
# torch
|
||||
urllib3==1.26.20
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# -c ./base.txt
|
||||
# -c ./deps/constraints.txt
|
||||
# -c base.txt
|
||||
# requests
|
||||
|
||||
@ -2,13 +2,13 @@
|
||||
# This file is autogenerated by pip-compile with Python 3.9
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile ./test.in
|
||||
# pip-compile test.in
|
||||
#
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
anyio==4.8.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# httpx
|
||||
appdirs==1.4.4
|
||||
# via label-studio-sdk
|
||||
@ -19,29 +19,29 @@ attrs==25.1.0
|
||||
# jsonschema
|
||||
# referencing
|
||||
autoflake==2.3.1
|
||||
# via -r ./test.in
|
||||
# via -r test.in
|
||||
black==25.1.0
|
||||
# via
|
||||
# -r ./test.in
|
||||
# -r test.in
|
||||
# datamodel-code-generator
|
||||
certifi==2024.12.14
|
||||
certifi==2025.1.31
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# httpcore
|
||||
# httpx
|
||||
# requests
|
||||
charset-normalizer==3.4.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# requests
|
||||
click==8.1.8
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# black
|
||||
# nltk
|
||||
coverage[toml]==7.6.10
|
||||
coverage[toml]==7.6.12
|
||||
# via
|
||||
# -r ./test.in
|
||||
# -r test.in
|
||||
# pytest-cov
|
||||
datamodel-code-generator==0.26.1
|
||||
# via label-studio-sdk
|
||||
@ -51,40 +51,40 @@ email-validator==2.2.0
|
||||
# via pydantic
|
||||
exceptiongroup==1.2.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# anyio
|
||||
# pytest
|
||||
faker==35.0.0
|
||||
faker==36.1.1
|
||||
# via jsf
|
||||
flake8==7.1.1
|
||||
flake8==7.1.2
|
||||
# via
|
||||
# -r ./test.in
|
||||
# -r test.in
|
||||
# flake8-print
|
||||
flake8-print==5.0.0
|
||||
# via -r ./test.in
|
||||
# via -r test.in
|
||||
freezegun==1.5.1
|
||||
# via -r ./test.in
|
||||
# via -r test.in
|
||||
genson==1.3.0
|
||||
# via datamodel-code-generator
|
||||
grpcio==1.70.0
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# -r ./test.in
|
||||
# -c ./deps/constraints.txt
|
||||
# -r test.in
|
||||
h11==0.14.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# httpcore
|
||||
httpcore==1.0.7
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# httpx
|
||||
httpx==0.28.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# label-studio-sdk
|
||||
idna==3.10
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# anyio
|
||||
# email-validator
|
||||
# httpx
|
||||
@ -102,7 +102,7 @@ jinja2==3.1.5
|
||||
# via datamodel-code-generator
|
||||
joblib==1.4.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# nltk
|
||||
jsf==0.11.2
|
||||
# via label-studio-sdk
|
||||
@ -112,13 +112,13 @@ jsonschema==4.23.0
|
||||
# label-studio-sdk
|
||||
jsonschema-specifications==2024.10.1
|
||||
# via jsonschema
|
||||
label-studio-sdk==1.0.8
|
||||
# via -r ./test.in
|
||||
label-studio-sdk==1.0.10
|
||||
# via -r test.in
|
||||
liccheck==0.9.2
|
||||
# via -r ./test.in
|
||||
lxml==5.3.0
|
||||
# via -r test.in
|
||||
lxml==5.3.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# label-studio-sdk
|
||||
markupsafe==3.0.2
|
||||
# via jinja2
|
||||
@ -126,25 +126,25 @@ mccabe==0.7.0
|
||||
# via flake8
|
||||
multidict==6.1.0
|
||||
# via yarl
|
||||
mypy==1.14.1
|
||||
# via -r ./test.in
|
||||
mypy==1.15.0
|
||||
# via -r test.in
|
||||
mypy-extensions==1.0.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# black
|
||||
# mypy
|
||||
nltk==3.9.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# label-studio-sdk
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# label-studio-sdk
|
||||
# pandas
|
||||
packaging==24.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# black
|
||||
# datamodel-code-generator
|
||||
# pytest
|
||||
@ -166,12 +166,14 @@ pycodestyle==2.12.1
|
||||
# flake8-print
|
||||
pydantic[email]==2.10.6
|
||||
# via
|
||||
# -r ./test.in
|
||||
# -r test.in
|
||||
# datamodel-code-generator
|
||||
# jsf
|
||||
# label-studio-sdk
|
||||
pydantic-core==2.27.2
|
||||
# via pydantic
|
||||
# via
|
||||
# label-studio-sdk
|
||||
# pydantic
|
||||
pyflakes==3.2.0
|
||||
# via
|
||||
# autoflake
|
||||
@ -181,16 +183,15 @@ pytest==8.3.4
|
||||
# pytest-cov
|
||||
# pytest-mock
|
||||
pytest-cov==6.0.0
|
||||
# via -r ./test.in
|
||||
# via -r test.in
|
||||
pytest-mock==3.14.0
|
||||
# via -r ./test.in
|
||||
# via -r test.in
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# faker
|
||||
# -c base.txt
|
||||
# freezegun
|
||||
# pandas
|
||||
pytz==2024.2
|
||||
pytz==2025.1
|
||||
# via pandas
|
||||
pyyaml==6.0.2
|
||||
# via
|
||||
@ -202,11 +203,11 @@ referencing==0.36.2
|
||||
# jsonschema-specifications
|
||||
regex==2024.11.6
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# nltk
|
||||
requests==2.32.3
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# label-studio-sdk
|
||||
# requests-mock
|
||||
# smart-open
|
||||
@ -218,19 +219,19 @@ rpds-py==0.22.3
|
||||
# referencing
|
||||
rstr==3.2.2
|
||||
# via jsf
|
||||
ruff==0.9.3
|
||||
# via -r ./test.in
|
||||
ruff==0.9.6
|
||||
# via -r test.in
|
||||
semantic-version==2.10.0
|
||||
# via liccheck
|
||||
six==1.17.0
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# python-dateutil
|
||||
smart-open[http]==7.1.0
|
||||
# via jsf
|
||||
sniffio==1.3.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# anyio
|
||||
toml==0.10.2
|
||||
# via
|
||||
@ -245,24 +246,23 @@ tomli==2.2.1
|
||||
# pytest
|
||||
tqdm==4.67.1
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# nltk
|
||||
types-click==7.1.8
|
||||
# via -r ./test.in
|
||||
# via -r test.in
|
||||
types-markdown==3.7.0.20241204
|
||||
# via -r ./test.in
|
||||
# via -r test.in
|
||||
types-requests==2.31.0.6
|
||||
# via -r ./test.in
|
||||
# via -r test.in
|
||||
types-tabulate==0.9.0.20241207
|
||||
# via -r ./test.in
|
||||
# via -r test.in
|
||||
types-urllib3==1.26.25.14
|
||||
# via types-requests
|
||||
typing-extensions==4.12.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# anyio
|
||||
# black
|
||||
# faker
|
||||
# jsf
|
||||
# label-studio-sdk
|
||||
# multidict
|
||||
@ -271,20 +271,22 @@ typing-extensions==4.12.2
|
||||
# pydantic-core
|
||||
# referencing
|
||||
tzdata==2025.1
|
||||
# via pandas
|
||||
# via
|
||||
# faker
|
||||
# pandas
|
||||
ujson==5.10.0
|
||||
# via label-studio-sdk
|
||||
urllib3==1.26.20
|
||||
# via
|
||||
# -c ././deps/constraints.txt
|
||||
# -c ./base.txt
|
||||
# -c ./deps/constraints.txt
|
||||
# -c base.txt
|
||||
# requests
|
||||
# vcrpy
|
||||
vcrpy==7.0.0
|
||||
# via -r ./test.in
|
||||
# via -r test.in
|
||||
wrapt==1.17.2
|
||||
# via
|
||||
# -c ./base.txt
|
||||
# -c base.txt
|
||||
# smart-open
|
||||
# vcrpy
|
||||
xmljson==0.2.1
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "0.16.21" # pragma: no cover
|
||||
__version__ = "0.16.22" # pragma: no cover
|
||||
|
||||
@ -52,6 +52,7 @@ def translate_text(text: str, source_lang: Optional[str] = None, target_lang: st
|
||||
return text
|
||||
|
||||
model_name = _get_opus_mt_model_name(_source_lang, target_lang)
|
||||
print(f"Using model: {model_name}")
|
||||
|
||||
try:
|
||||
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
||||
@ -79,7 +80,7 @@ def _translate_text(text, model, tokenizer):
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
translated = model.generate(
|
||||
**tokenizer([text], return_tensors="pt", padding="max_length", max_length=512),
|
||||
**tokenizer([text], return_tensors="pt", padding=True, truncation=True),
|
||||
)
|
||||
return [tokenizer.decode(t, max_new_tokens=512, skip_special_tokens=True) for t in translated][
|
||||
0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user