mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-03 07:05:20 +00:00

* Apply import sorting ruff . --select I --fix * Remove unnecessary open mode parameter ruff . --select UP015 --fix * Use f-string formatting rather than .format * Remove extraneous parentheses Also use "" instead of str() * Resolve missing trailing commas ruff . --select COM --fix * Rewrite list() and dict() calls using literals ruff . --select C4 --fix * Add () to pytest.fixture, use tuples for parametrize, etc. ruff . --select PT --fix * Simplify code: merge conditionals, context managers ruff . --select SIM --fix * Import without unnecessary alias ruff . --select PLR0402 --fix * Apply formatting via black * Rewrite ValueError somewhat Slightly unrelated to the rest of the PR * Apply formatting to tests via black * Update expected exception message to match 0d81564 * Satisfy E501 line too long in test * Update changelog & version * Add ruff to make tidy and test deps * Run 'make tidy' * Update changelog & version * Update changelog & version * Add ruff to 'check' target Doing so required me to also fix some non-auto-fixable issues. Two of them I fixed with a noqa: SIM115, but especially the one in __init__ may need some attention. That said, that refactor is out of scope of this PR.
56 lines
1.6 KiB
Python
56 lines
1.6 KiB
Python
import pytest
|
|
|
|
from unstructured.cleaners import translate
|
|
|
|
|
|
def test_get_opus_mt_model_name():
|
|
model_name = translate._get_opus_mt_model_name("ru", "en")
|
|
assert model_name == "Helsinki-NLP/opus-mt-ru-en"
|
|
|
|
|
|
@pytest.mark.parametrize("code", ["way-too-long", "a", "", None])
|
|
def test_validate_language_code(code):
|
|
with pytest.raises(ValueError):
|
|
translate._validate_language_code(code)
|
|
|
|
|
|
def test_translate_returns_same_text_if_dest_is_same():
|
|
text = "This is already in English!"
|
|
assert translate.translate_text(text, "en", "en") == text
|
|
|
|
|
|
def test_translate_returns_same_text_text_is_empty():
|
|
text = " "
|
|
assert translate.translate_text(text) == text
|
|
|
|
|
|
def test_translate_with_language_specified():
|
|
text = "Ich bin ein Berliner!"
|
|
assert translate.translate_text(text, "de") == "I'm a Berliner!"
|
|
|
|
|
|
def test_translate_with_no_language_specified():
|
|
text = "Ich bin ein Berliner!"
|
|
assert translate.translate_text(text) == "I'm a Berliner!"
|
|
|
|
|
|
def test_translate_raises_with_bad_language():
|
|
text = "Ich bin ein Berliner!"
|
|
with pytest.raises(ValueError):
|
|
translate.translate_text(text, "zz")
|
|
|
|
|
|
def test_tranlate_works_with_russian():
|
|
text = "Я тоже можно переводать русский язык!"
|
|
assert translate.translate_text(text) == "I can also translate Russian!"
|
|
|
|
|
|
def test_translate_works_with_chinese():
|
|
text = "網站有中、英文版本"
|
|
translate.translate_text(text) == "Website available in Chinese and English"
|
|
|
|
|
|
def translate_works_with_arabic():
|
|
text = "مرحباً بكم في متجرنا"
|
|
translate.translate_text(text) == "Welcome to our store."
|