Luke/CVE bump (#3928)

bumping dependancies and updated the tokenizer constraint
This commit is contained in:
luke-kucing 2025-02-19 12:23:31 -05:00 committed by GitHub
parent 3403db1ad4
commit 147add9a04
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 264 additions and 250 deletions

View File

@ -1,3 +1,13 @@
## 0.16.22
### Enhancements
### Features
### Fixes
- **Fix open CVES in and bump dependencies
## 0.16.21
### Enhancements

View File

@ -27,4 +27,4 @@ clean: clean-base
.PHONY: clean-base
clean-base:
rm $(BASE_REQUIREMENTSTXT)
rm $(BASE_REQUIREMENTSTXT)

View File

@ -2,15 +2,15 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./base.in
# pip-compile base.in
#
anyio==4.8.0
# via httpx
backoff==2.2.1
# via -r ./base.in
beautifulsoup4==4.12.3
# via -r ./base.in
certifi==2024.12.14
# via -r base.in
beautifulsoup4==4.13.3
# via -r base.in
certifi==2025.1.31
# via
# httpcore
# httpx
@ -19,7 +19,7 @@ certifi==2024.12.14
cffi==1.17.1
# via cryptography
chardet==5.2.0
# via -r ./base.in
# via -r base.in
charset-normalizer==3.4.1
# via
# requests
@ -28,24 +28,24 @@ click==8.1.8
# via
# nltk
# python-oxmsg
cryptography==44.0.0
cryptography==44.0.1
# via unstructured-client
dataclasses-json==0.6.7
# via
# -r ./base.in
# -r base.in
# unstructured-client
deepdiff==8.1.1
deepdiff==8.2.0
# via unstructured-client
emoji==2.14.1
# via -r ./base.in
# via -r base.in
exceptiongroup==1.2.2
# via anyio
filetype==1.2.0
# via -r ./base.in
# via -r base.in
h11==0.14.0
# via httpcore
html5lib==1.1
# via -r ./base.in
# via -r base.in
httpcore==1.0.7
# via httpx
httpx==0.28.1
@ -61,10 +61,10 @@ joblib==1.4.2
jsonpath-python==1.0.6
# via unstructured-client
langdetect==1.0.9
# via -r ./base.in
lxml==5.3.0
# via -r ./base.in
marshmallow==3.26.0
# via -r base.in
lxml==5.3.1
# via -r base.in
marshmallow==3.26.1
# via
# dataclasses-json
# unstructured-client
@ -75,38 +75,38 @@ mypy-extensions==1.0.0
nest-asyncio==1.6.0
# via unstructured-client
nltk==3.9.1
# via -r ./base.in
# via -r base.in
numpy==1.26.4
# via -r ./base.in
# via -r base.in
olefile==0.47
# via python-oxmsg
orderly-set==5.2.3
orderly-set==5.3.0
# via deepdiff
packaging==24.2
# via
# marshmallow
# unstructured-client
psutil==6.1.1
# via -r ./base.in
psutil==7.0.0
# via -r base.in
pycparser==2.22
# via cffi
pypdf==5.2.0
pypdf==5.3.0
# via unstructured-client
python-dateutil==2.9.0.post0
# via unstructured-client
python-iso639==2025.1.28
# via -r ./base.in
python-iso639==2025.2.18
# via -r base.in
python-magic==0.4.27
# via -r ./base.in
python-oxmsg==0.0.1
# via -r ./base.in
rapidfuzz==3.11.0
# via -r ./base.in
# via -r base.in
python-oxmsg==0.0.2
# via -r base.in
rapidfuzz==3.12.1
# via -r base.in
regex==2024.11.6
# via nltk
requests==2.32.3
# via
# -r ./base.in
# -r base.in
# requests-toolbelt
# unstructured-client
requests-toolbelt==1.0.0
@ -123,12 +123,13 @@ soupsieve==2.6
# via beautifulsoup4
tqdm==4.67.1
# via
# -r ./base.in
# -r base.in
# nltk
typing-extensions==4.12.2
# via
# -r ./base.in
# -r base.in
# anyio
# beautifulsoup4
# pypdf
# python-oxmsg
# typing-inspect
@ -139,14 +140,14 @@ typing-inspect==0.9.0
# unstructured-client
unstructured-client==0.25.9
# via
# -c ././deps/constraints.txt
# -r ./base.in
# -c ./deps/constraints.txt
# -r base.in
urllib3==1.26.20
# via
# -c ././deps/constraints.txt
# -c ./deps/constraints.txt
# requests
# unstructured-client
webencodings==0.5.1
# via html5lib
wrapt==1.17.2
# via -r ./base.in
# via -r base.in

View File

@ -8,7 +8,7 @@ weaviate-client>=3.26.7,<4.0.0
# TODO: Constriant due to multiple versions being installed during pip-compile
grpcio>=1.65.5
# TODO: Pinned in transformers package, remove when that gets updated (https://github.com/huggingface/transformers/blob/main/setup.py)
tokenizers>=0.19,<0.20
tokenizers>=0.21,<0.22
# TODO: Constaint due to boto, with python before 3.10 not requiring openssl 1.1.1, remove when that gets
# updated or we drop support for 3.9
urllib3<1.27

View File

@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./dev.in
# pip-compile dev.in
#
build==1.2.2.post1
# via pip-tools
@ -10,48 +10,48 @@ cfgv==3.4.0
# via pre-commit
click==8.1.8
# via
# -c ./base.txt
# -c ./test.txt
# -c base.txt
# -c test.txt
# pip-tools
distlib==0.3.9
# via virtualenv
filelock==3.17.0
# via virtualenv
identify==2.6.6
identify==2.6.7
# via pre-commit
importlib-metadata==8.6.1
# via
# -c ././deps/constraints.txt
# -c ./deps/constraints.txt
# build
nodeenv==1.9.1
# via pre-commit
packaging==24.2
# via
# -c ./base.txt
# -c ./test.txt
# -c base.txt
# -c test.txt
# build
pip-tools==7.4.1
# via -r ./dev.in
# via -r dev.in
platformdirs==4.3.6
# via
# -c ./test.txt
# -c test.txt
# virtualenv
pre-commit==4.1.0
# via -r ./dev.in
# via -r dev.in
pyproject-hooks==1.2.0
# via
# build
# pip-tools
pyyaml==6.0.2
# via
# -c ./test.txt
# -c test.txt
# pre-commit
tomli==2.2.1
# via
# -c ./test.txt
# -c test.txt
# build
# pip-tools
virtualenv==20.29.1
virtualenv==20.29.2
# via pre-commit
wheel==0.45.1
# via pip-tools

View File

@ -2,23 +2,23 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./extra-csv.in
# pip-compile extra-csv.in
#
numpy==1.26.4
# via
# -c ./base.txt
# -c base.txt
# pandas
pandas==2.2.3
# via -r ./extra-csv.in
# via -r extra-csv.in
python-dateutil==2.9.0.post0
# via
# -c ./base.txt
# -c base.txt
# pandas
pytz==2024.2
pytz==2025.1
# via pandas
six==1.17.0
# via
# -c ./base.txt
# -c base.txt
# python-dateutil
tzdata==2025.1
# via pandas

View File

@ -2,15 +2,15 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./extra-docx.in
# pip-compile extra-docx.in
#
lxml==5.3.0
lxml==5.3.1
# via
# -c ./base.txt
# -c base.txt
# python-docx
python-docx==1.1.2
# via -r ./extra-docx.in
# via -r extra-docx.in
typing-extensions==4.12.2
# via
# -c ./base.txt
# -c base.txt
# python-docx

View File

@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./extra-epub.in
# pip-compile extra-epub.in
#
pypandoc==1.15
# via -r ./extra-epub.in
# via -r extra-epub.in

View File

@ -2,13 +2,13 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./extra-markdown.in
# pip-compile extra-markdown.in
#
importlib-metadata==8.6.1
# via
# -c ././deps/constraints.txt
# -c ./deps/constraints.txt
# markdown
markdown==3.7
# via -r ./extra-markdown.in
# via -r extra-markdown.in
zipp==3.21.0
# via importlib-metadata

View File

@ -2,17 +2,17 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./extra-odt.in
# pip-compile extra-odt.in
#
lxml==5.3.0
lxml==5.3.1
# via
# -c ./base.txt
# -c base.txt
# python-docx
pypandoc==1.15
# via -r ./extra-odt.in
# via -r extra-odt.in
python-docx==1.1.2
# via -r ./extra-odt.in
# via -r extra-odt.in
typing-extensions==4.12.2
# via
# -c ./base.txt
# -c base.txt
# python-docx

View File

@ -2,53 +2,53 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./extra-paddleocr.in
# pip-compile extra-paddleocr.in
#
anyio==4.8.0
# via
# -c ./base.txt
# -c base.txt
# httpx
astor==0.8.1
# via paddlepaddle
certifi==2024.12.14
certifi==2025.1.31
# via
# -c ./base.txt
# -c base.txt
# httpcore
# httpx
# requests
charset-normalizer==3.4.1
# via
# -c ./base.txt
# -c base.txt
# requests
contourpy==1.3.0
# via matplotlib
cycler==0.12.1
# via matplotlib
cython==3.0.11
cython==3.0.12
# via unstructured-paddleocr
decorator==5.1.1
# via paddlepaddle
exceptiongroup==1.2.2
# via
# -c ./base.txt
# -c base.txt
# anyio
fonttools==4.55.8
fonttools==4.56.0
# via matplotlib
h11==0.14.0
# via
# -c ./base.txt
# -c base.txt
# httpcore
httpcore==1.0.7
# via
# -c ./base.txt
# -c base.txt
# httpx
httpx==0.28.1
# via
# -c ./base.txt
# -c base.txt
# paddlepaddle
idna==3.10
# via
# -c ./base.txt
# -c base.txt
# anyio
# httpx
# requests
@ -72,7 +72,7 @@ networkx==3.2.1
# scikit-image
numpy==1.26.4
# via
# -c ./base.txt
# -c base.txt
# contourpy
# imageio
# imgaug
@ -96,12 +96,12 @@ opt-einsum==3.3.0
# via paddlepaddle
packaging==24.2
# via
# -c ./base.txt
# -c base.txt
# lazy-loader
# matplotlib
# scikit-image
paddlepaddle==3.0.0b1
# via -r ./extra-paddleocr.in
# via -r extra-paddleocr.in
pdf2image==1.17.0
# via unstructured-paddleocr
pillow==11.1.0
@ -121,17 +121,17 @@ pyparsing==3.2.1
# via matplotlib
python-dateutil==2.9.0.post0
# via
# -c ./base.txt
# -c base.txt
# matplotlib
pyyaml==6.0.2
# via unstructured-paddleocr
rapidfuzz==3.11.0
rapidfuzz==3.12.1
# via
# -c ./base.txt
# -c base.txt
# unstructured-paddleocr
requests==2.32.3
# via
# -c ./base.txt
# -c base.txt
# unstructured-paddleocr
scikit-image==0.24.0
# via
@ -141,36 +141,36 @@ scipy==1.13.1
# via
# imgaug
# scikit-image
shapely==2.0.6
shapely==2.0.7
# via
# imgaug
# unstructured-paddleocr
six==1.17.0
# via
# -c ./base.txt
# -c base.txt
# imgaug
# python-dateutil
sniffio==1.3.1
# via
# -c ./base.txt
# -c base.txt
# anyio
tifffile==2024.8.30
# via scikit-image
tqdm==4.67.1
# via
# -c ./base.txt
# -c base.txt
# unstructured-paddleocr
typing-extensions==4.12.2
# via
# -c ./base.txt
# -c base.txt
# anyio
# paddlepaddle
unstructured-paddleocr==2.8.1.0
# via -r ./extra-paddleocr.in
# via -r extra-paddleocr.in
urllib3==1.26.20
# via
# -c ././deps/constraints.txt
# -c ./base.txt
# -c ./deps/constraints.txt
# -c base.txt
# requests
zipp==3.21.0
# via importlib-resources

View File

@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./extra-pandoc.in
# pip-compile extra-pandoc.in
#
pypandoc==1.15
# via -r ./extra-pandoc.in
# via -r extra-pandoc.in

View File

@ -2,49 +2,49 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./extra-pdf-image.in
# pip-compile extra-pdf-image.in
#
antlr4-python3-runtime==4.9.3
# via omegaconf
cachetools==5.5.1
# via google-auth
certifi==2024.12.14
certifi==2025.1.31
# via
# -c ./base.txt
# -c base.txt
# requests
cffi==1.17.1
# via
# -c ./base.txt
# -c base.txt
# cryptography
charset-normalizer==3.4.1
# via
# -c ./base.txt
# -c base.txt
# pdfminer-six
# requests
coloredlogs==15.0.1
# via onnxruntime
contourpy==1.3.0
# via matplotlib
cryptography==44.0.0
cryptography==44.0.1
# via
# -c ./base.txt
# -c base.txt
# pdfminer-six
cycler==0.12.1
# via matplotlib
deprecated==1.2.18
# via pikepdf
effdet==0.4.1
# via -r ./extra-pdf-image.in
# via -r extra-pdf-image.in
filelock==3.17.0
# via
# huggingface-hub
# torch
# transformers
flatbuffers==25.1.24
flatbuffers==25.2.10
# via onnxruntime
fonttools==4.55.8
fonttools==4.56.0
# via matplotlib
fsspec==2024.12.0
fsspec==2025.2.0
# via
# huggingface-hub
# torch
@ -54,20 +54,20 @@ google-auth==2.38.0
# via
# google-api-core
# google-cloud-vision
google-cloud-vision==3.9.0
# via -r ./extra-pdf-image.in
googleapis-common-protos==1.66.0
google-cloud-vision==3.10.0
# via -r extra-pdf-image.in
googleapis-common-protos==1.67.0
# via
# google-api-core
# grpcio-status
grpcio==1.70.0
# via
# -c ././deps/constraints.txt
# -c ./deps/constraints.txt
# google-api-core
# grpcio-status
grpcio-status==1.70.0
# via google-api-core
huggingface-hub==0.28.0
huggingface-hub==0.28.1
# via
# timm
# tokenizers
@ -77,7 +77,7 @@ humanfriendly==10.0
# via coloredlogs
idna==3.10
# via
# -c ./base.txt
# -c base.txt
# requests
importlib-resources==6.5.2
# via matplotlib
@ -85,9 +85,9 @@ jinja2==3.1.5
# via torch
kiwisolver==1.4.7
# via matplotlib
lxml==5.3.0
lxml==5.3.1
# via
# -c ./base.txt
# -c base.txt
# pikepdf
markupsafe==3.0.2
# via jinja2
@ -101,7 +101,7 @@ networkx==3.2.1
# via torch
numpy==1.26.4
# via
# -c ./base.txt
# -c base.txt
# contourpy
# matplotlib
# onnx
@ -117,7 +117,7 @@ omegaconf==2.3.0
# via effdet
onnx==1.17.0
# via
# -r ./extra-pdf-image.in
# -r extra-pdf-image.in
# unstructured-inference
onnxruntime==1.19.2
# via unstructured-inference
@ -125,7 +125,7 @@ opencv-python==4.11.0.86
# via unstructured-inference
packaging==24.2
# via
# -c ./base.txt
# -c base.txt
# huggingface-hub
# matplotlib
# onnxruntime
@ -135,15 +135,15 @@ packaging==24.2
pandas==2.2.3
# via unstructured-inference
pdf2image==1.17.0
# via -r ./extra-pdf-image.in
# via -r extra-pdf-image.in
pdfminer-six==20240706
# via
# -r ./extra-pdf-image.in
# -r extra-pdf-image.in
# unstructured-inference
pi-heif==0.21.0
# via -r ./extra-pdf-image.in
pikepdf==9.5.1
# via -r ./extra-pdf-image.in
# via -r extra-pdf-image.in
pikepdf==9.5.2
# via -r extra-pdf-image.in
pillow==11.1.0
# via
# matplotlib
@ -175,24 +175,24 @@ pycocotools==2.0.8
# via effdet
pycparser==2.22
# via
# -c ./base.txt
# -c base.txt
# cffi
pyparsing==3.2.1
# via matplotlib
pypdf==5.2.0
pypdf==5.3.0
# via
# -c ./base.txt
# -r ./extra-pdf-image.in
# -c base.txt
# -r extra-pdf-image.in
pypdfium2==4.30.1
# via unstructured-inference
python-dateutil==2.9.0.post0
# via
# -c ./base.txt
# -c base.txt
# matplotlib
# pandas
python-multipart==0.0.20
# via unstructured-inference
pytz==2024.2
pytz==2025.1
# via pandas
pyyaml==6.0.2
# via
@ -200,17 +200,17 @@ pyyaml==6.0.2
# omegaconf
# timm
# transformers
rapidfuzz==3.11.0
rapidfuzz==3.12.1
# via
# -c ./base.txt
# -c base.txt
# unstructured-inference
regex==2024.11.6
# via
# -c ./base.txt
# -c base.txt
# transformers
requests==2.32.3
# via
# -c ./base.txt
# -c base.txt
# google-api-core
# huggingface-hub
# transformers
@ -224,7 +224,7 @@ scipy==1.13.1
# via unstructured-inference
six==1.17.0
# via
# -c ./base.txt
# -c base.txt
# python-dateutil
sympy==1.13.1
# via
@ -234,9 +234,9 @@ timm==1.0.14
# via
# effdet
# unstructured-inference
tokenizers==0.19.1
tokenizers==0.21.0
# via
# -c ././deps/constraints.txt
# -c ./deps/constraints.txt
# transformers
torch==2.6.0
# via
@ -250,31 +250,31 @@ torchvision==0.21.0
# timm
tqdm==4.67.1
# via
# -c ./base.txt
# -c base.txt
# huggingface-hub
# transformers
transformers==4.44.2
transformers==4.49.0
# via unstructured-inference
typing-extensions==4.12.2
# via
# -c ./base.txt
# -c base.txt
# huggingface-hub
# pypdf
# torch
tzdata==2025.1
# via pandas
unstructured-inference==0.8.7
# via -r ./extra-pdf-image.in
# via -r extra-pdf-image.in
unstructured-pytesseract==0.3.13
# via -r ./extra-pdf-image.in
# via -r extra-pdf-image.in
urllib3==1.26.20
# via
# -c ././deps/constraints.txt
# -c ./base.txt
# -c ./deps/constraints.txt
# -c base.txt
# requests
wrapt==1.17.2
# via
# -c ./base.txt
# -c base.txt
# deprecated
zipp==3.21.0
# via importlib-resources

View File

@ -2,14 +2,14 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./extra-pptx.in
# pip-compile extra-pptx.in
#
lxml==5.3.0
lxml==5.3.1
# via python-pptx
pillow==11.1.0
# via python-pptx
python-pptx==1.0.2
# via -r ./extra-pptx.in
# via -r extra-pptx.in
typing-extensions==4.12.2
# via python-pptx
xlsxwriter==3.2.2

View File

@ -2,31 +2,31 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./extra-xlsx.in
# pip-compile extra-xlsx.in
#
et-xmlfile==2.0.0
# via openpyxl
networkx==3.2.1
# via -r ./extra-xlsx.in
# via -r extra-xlsx.in
numpy==1.26.4
# via
# -c ./base.txt
# -c base.txt
# pandas
openpyxl==3.1.5
# via -r ./extra-xlsx.in
# via -r extra-xlsx.in
pandas==2.2.3
# via -r ./extra-xlsx.in
# via -r extra-xlsx.in
python-dateutil==2.9.0.post0
# via
# -c ./base.txt
# -c base.txt
# pandas
pytz==2024.2
pytz==2025.1
# via pandas
six==1.17.0
# via
# -c ./base.txt
# -c base.txt
# python-dateutil
tzdata==2025.1
# via pandas
xlrd==2.0.1
# via -r ./extra-xlsx.in
# via -r extra-xlsx.in

View File

@ -2,47 +2,47 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./huggingface.in
# pip-compile huggingface.in
#
certifi==2024.12.14
certifi==2025.1.31
# via
# -c ./base.txt
# -c base.txt
# requests
charset-normalizer==3.4.1
# via
# -c ./base.txt
# -c base.txt
# requests
click==8.1.8
# via
# -c ./base.txt
# -c base.txt
# sacremoses
filelock==3.17.0
# via
# huggingface-hub
# torch
# transformers
fsspec==2024.12.0
fsspec==2025.2.0
# via
# huggingface-hub
# torch
huggingface-hub==0.28.0
huggingface-hub==0.28.1
# via
# tokenizers
# transformers
idna==3.10
# via
# -c ./base.txt
# -c base.txt
# requests
jinja2==3.1.5
# via torch
joblib==1.4.2
# via
# -c ./base.txt
# -c base.txt
# sacremoses
langdetect==1.0.9
# via
# -c ./base.txt
# -r ./huggingface.in
# -c base.txt
# -r huggingface.in
markupsafe==3.0.2
# via jinja2
mpmath==1.3.0
@ -51,11 +51,11 @@ networkx==3.2.1
# via torch
numpy==1.26.4
# via
# -c ./base.txt
# -c base.txt
# transformers
packaging==24.2
# via
# -c ./base.txt
# -c base.txt
# huggingface-hub
# transformers
pyyaml==6.0.2
@ -64,47 +64,47 @@ pyyaml==6.0.2
# transformers
regex==2024.11.6
# via
# -c ./base.txt
# -c base.txt
# sacremoses
# transformers
requests==2.32.3
# via
# -c ./base.txt
# -c base.txt
# huggingface-hub
# transformers
sacremoses==0.1.1
# via -r ./huggingface.in
# via -r huggingface.in
safetensors==0.5.2
# via transformers
sentencepiece==0.2.0
# via -r ./huggingface.in
# via -r huggingface.in
six==1.17.0
# via
# -c ./base.txt
# -c base.txt
# langdetect
sympy==1.13.1
# via torch
tokenizers==0.19.1
tokenizers==0.21.0
# via
# -c ././deps/constraints.txt
# -c ./deps/constraints.txt
# transformers
torch==2.6.0
# via -r ./huggingface.in
# via -r huggingface.in
tqdm==4.67.1
# via
# -c ./base.txt
# -c base.txt
# huggingface-hub
# sacremoses
# transformers
transformers==4.44.2
# via -r ./huggingface.in
transformers==4.49.0
# via -r huggingface.in
typing-extensions==4.12.2
# via
# -c ./base.txt
# -c base.txt
# huggingface-hub
# torch
urllib3==1.26.20
# via
# -c ././deps/constraints.txt
# -c ./base.txt
# -c ./deps/constraints.txt
# -c base.txt
# requests

View File

@ -2,13 +2,13 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./test.in
# pip-compile test.in
#
annotated-types==0.7.0
# via pydantic
anyio==4.8.0
# via
# -c ./base.txt
# -c base.txt
# httpx
appdirs==1.4.4
# via label-studio-sdk
@ -19,29 +19,29 @@ attrs==25.1.0
# jsonschema
# referencing
autoflake==2.3.1
# via -r ./test.in
# via -r test.in
black==25.1.0
# via
# -r ./test.in
# -r test.in
# datamodel-code-generator
certifi==2024.12.14
certifi==2025.1.31
# via
# -c ./base.txt
# -c base.txt
# httpcore
# httpx
# requests
charset-normalizer==3.4.1
# via
# -c ./base.txt
# -c base.txt
# requests
click==8.1.8
# via
# -c ./base.txt
# -c base.txt
# black
# nltk
coverage[toml]==7.6.10
coverage[toml]==7.6.12
# via
# -r ./test.in
# -r test.in
# pytest-cov
datamodel-code-generator==0.26.1
# via label-studio-sdk
@ -51,40 +51,40 @@ email-validator==2.2.0
# via pydantic
exceptiongroup==1.2.2
# via
# -c ./base.txt
# -c base.txt
# anyio
# pytest
faker==35.0.0
faker==36.1.1
# via jsf
flake8==7.1.1
flake8==7.1.2
# via
# -r ./test.in
# -r test.in
# flake8-print
flake8-print==5.0.0
# via -r ./test.in
# via -r test.in
freezegun==1.5.1
# via -r ./test.in
# via -r test.in
genson==1.3.0
# via datamodel-code-generator
grpcio==1.70.0
# via
# -c ././deps/constraints.txt
# -r ./test.in
# -c ./deps/constraints.txt
# -r test.in
h11==0.14.0
# via
# -c ./base.txt
# -c base.txt
# httpcore
httpcore==1.0.7
# via
# -c ./base.txt
# -c base.txt
# httpx
httpx==0.28.1
# via
# -c ./base.txt
# -c base.txt
# label-studio-sdk
idna==3.10
# via
# -c ./base.txt
# -c base.txt
# anyio
# email-validator
# httpx
@ -102,7 +102,7 @@ jinja2==3.1.5
# via datamodel-code-generator
joblib==1.4.2
# via
# -c ./base.txt
# -c base.txt
# nltk
jsf==0.11.2
# via label-studio-sdk
@ -112,13 +112,13 @@ jsonschema==4.23.0
# label-studio-sdk
jsonschema-specifications==2024.10.1
# via jsonschema
label-studio-sdk==1.0.8
# via -r ./test.in
label-studio-sdk==1.0.10
# via -r test.in
liccheck==0.9.2
# via -r ./test.in
lxml==5.3.0
# via -r test.in
lxml==5.3.1
# via
# -c ./base.txt
# -c base.txt
# label-studio-sdk
markupsafe==3.0.2
# via jinja2
@ -126,25 +126,25 @@ mccabe==0.7.0
# via flake8
multidict==6.1.0
# via yarl
mypy==1.14.1
# via -r ./test.in
mypy==1.15.0
# via -r test.in
mypy-extensions==1.0.0
# via
# -c ./base.txt
# -c base.txt
# black
# mypy
nltk==3.9.1
# via
# -c ./base.txt
# -c base.txt
# label-studio-sdk
numpy==1.26.4
# via
# -c ./base.txt
# -c base.txt
# label-studio-sdk
# pandas
packaging==24.2
# via
# -c ./base.txt
# -c base.txt
# black
# datamodel-code-generator
# pytest
@ -166,12 +166,14 @@ pycodestyle==2.12.1
# flake8-print
pydantic[email]==2.10.6
# via
# -r ./test.in
# -r test.in
# datamodel-code-generator
# jsf
# label-studio-sdk
pydantic-core==2.27.2
# via pydantic
# via
# label-studio-sdk
# pydantic
pyflakes==3.2.0
# via
# autoflake
@ -181,16 +183,15 @@ pytest==8.3.4
# pytest-cov
# pytest-mock
pytest-cov==6.0.0
# via -r ./test.in
# via -r test.in
pytest-mock==3.14.0
# via -r ./test.in
# via -r test.in
python-dateutil==2.9.0.post0
# via
# -c ./base.txt
# faker
# -c base.txt
# freezegun
# pandas
pytz==2024.2
pytz==2025.1
# via pandas
pyyaml==6.0.2
# via
@ -202,11 +203,11 @@ referencing==0.36.2
# jsonschema-specifications
regex==2024.11.6
# via
# -c ./base.txt
# -c base.txt
# nltk
requests==2.32.3
# via
# -c ./base.txt
# -c base.txt
# label-studio-sdk
# requests-mock
# smart-open
@ -218,19 +219,19 @@ rpds-py==0.22.3
# referencing
rstr==3.2.2
# via jsf
ruff==0.9.3
# via -r ./test.in
ruff==0.9.6
# via -r test.in
semantic-version==2.10.0
# via liccheck
six==1.17.0
# via
# -c ./base.txt
# -c base.txt
# python-dateutil
smart-open[http]==7.1.0
# via jsf
sniffio==1.3.1
# via
# -c ./base.txt
# -c base.txt
# anyio
toml==0.10.2
# via
@ -245,24 +246,23 @@ tomli==2.2.1
# pytest
tqdm==4.67.1
# via
# -c ./base.txt
# -c base.txt
# nltk
types-click==7.1.8
# via -r ./test.in
# via -r test.in
types-markdown==3.7.0.20241204
# via -r ./test.in
# via -r test.in
types-requests==2.31.0.6
# via -r ./test.in
# via -r test.in
types-tabulate==0.9.0.20241207
# via -r ./test.in
# via -r test.in
types-urllib3==1.26.25.14
# via types-requests
typing-extensions==4.12.2
# via
# -c ./base.txt
# -c base.txt
# anyio
# black
# faker
# jsf
# label-studio-sdk
# multidict
@ -271,20 +271,22 @@ typing-extensions==4.12.2
# pydantic-core
# referencing
tzdata==2025.1
# via pandas
# via
# faker
# pandas
ujson==5.10.0
# via label-studio-sdk
urllib3==1.26.20
# via
# -c ././deps/constraints.txt
# -c ./base.txt
# -c ./deps/constraints.txt
# -c base.txt
# requests
# vcrpy
vcrpy==7.0.0
# via -r ./test.in
# via -r test.in
wrapt==1.17.2
# via
# -c ./base.txt
# -c base.txt
# smart-open
# vcrpy
xmljson==0.2.1

View File

@ -1 +1 @@
__version__ = "0.16.21" # pragma: no cover
__version__ = "0.16.22" # pragma: no cover

View File

@ -52,6 +52,7 @@ def translate_text(text: str, source_lang: Optional[str] = None, target_lang: st
return text
model_name = _get_opus_mt_model_name(_source_lang, target_lang)
print(f"Using model: {model_name}")
try:
tokenizer = MarianTokenizer.from_pretrained(model_name)
@ -79,7 +80,7 @@ def _translate_text(text, model, tokenizer):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
translated = model.generate(
**tokenizer([text], return_tensors="pt", padding="max_length", max_length=512),
**tokenizer([text], return_tensors="pt", padding=True, truncation=True),
)
return [tokenizer.decode(t, max_new_tokens=512, skip_special_tokens=True) for t in translated][
0