luke-kucing a7e90f7990
resolve CVEs and HF issue (#4009)
update reqs to resolve CVEs and add the HF ENV to stop it from reaching
out

updated the Dockerfile with
ENV HF_HUB_OFFLINE=1

to stop it from pinging HF. This was an issue for a gov customer. and
updated requirements to resolve some open CVEs

---------

Co-authored-by: cragwolfe <crag@unstructured.io>
Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com>
Co-authored-by: luke-kucing <luke-kucing@users.noreply.github.com>
2025-06-04 18:52:58 +00:00

65 lines
2.0 KiB
Python

import os
import pytest
from unstructured.cleaners import translate
IS_CI = os.getenv("CI") == "true"
def test_get_opus_mt_model_name():
model_name = translate._get_opus_mt_model_name("ru", "en")
assert model_name == "Helsinki-NLP/opus-mt-ru-en"
@pytest.mark.parametrize("code", ["way-too-long", "a", "", None])
def test_validate_language_code(code):
with pytest.raises(ValueError):
translate._validate_language_code(code)
def test_translate_returns_same_text_if_dest_is_same():
text = "This is already in English!"
assert translate.translate_text(text, "en", "en") == text
def test_translate_returns_same_text_text_is_empty():
text = " "
assert translate.translate_text(text) == text
@pytest.mark.skipif(IS_CI, reason="Skipping this test in CI pipeline")
def test_translate_with_language_specified():
text = "Ich bin ein Berliner!"
assert translate.translate_text(text, "de") == "I'm a Berliner!"
@pytest.mark.skipif(IS_CI, reason="Skipping this test in CI pipeline")
def test_translate_with_no_language_specified():
text = "Ich bin ein Berliner!"
assert translate.translate_text(text) == "I'm a Berliner!"
@pytest.mark.skipif(IS_CI, reason="Skipping this test in CI pipeline")
def test_translate_raises_with_bad_language():
text = "Ich bin ein Berliner!"
with pytest.raises(ValueError):
translate.translate_text(text, "zz")
@pytest.mark.skipif(IS_CI, reason="Skipping this test in CI pipeline")
def test_tranlate_works_with_russian():
text = "Я тоже можно переводать русский язык!"
assert translate.translate_text(text) == "I can also translate Russian!"
@pytest.mark.skipif(IS_CI, reason="Skipping this test in CI pipeline")
def test_translate_works_with_chinese():
text = "網站有中、英文版本"
translate.translate_text(text) == "Website available in Chinese and English"
def translate_works_with_arabic():
text = "مرحباً بكم في متجرنا"
translate.translate_text(text) == "Welcome to our store."