mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-13 03:55:55 +00:00

* Apply import sorting ruff . --select I --fix * Remove unnecessary open mode parameter ruff . --select UP015 --fix * Use f-string formatting rather than .format * Remove extraneous parentheses Also use "" instead of str() * Resolve missing trailing commas ruff . --select COM --fix * Rewrite list() and dict() calls using literals ruff . --select C4 --fix * Add () to pytest.fixture, use tuples for parametrize, etc. ruff . --select PT --fix * Simplify code: merge conditionals, context managers ruff . --select SIM --fix * Import without unnecessary alias ruff . --select PLR0402 --fix * Apply formatting via black * Rewrite ValueError somewhat Slightly unrelated to the rest of the PR * Apply formatting to tests via black * Update expected exception message to match 0d81564 * Satisfy E501 line too long in test * Update changelog & version * Add ruff to make tidy and test deps * Run 'make tidy' * Update changelog & version * Update changelog & version * Add ruff to 'check' target Doing so required me to also fix some non-auto-fixable issues. Two of them I fixed with a noqa: SIM115, but especially the one in __init__ may need some attention. That said, that refactor is out of scope of this PR.
41 lines
1.3 KiB
Python
41 lines
1.3 KiB
Python
from functools import partial
|
|
|
|
import pytest
|
|
|
|
from unstructured.cleaners.core import clean_prefix
|
|
from unstructured.cleaners.translate import translate_text
|
|
from unstructured.documents.elements import Element, NoID, Text
|
|
|
|
|
|
def test_text_id():
|
|
text_element = Text(text="hello there!")
|
|
assert text_element.id == "c69509590d81db2f37f9d75480c8efed"
|
|
|
|
|
|
def test_element_defaults_to_blank_id():
|
|
element = Element()
|
|
assert isinstance(element.id, NoID)
|
|
|
|
|
|
def test_text_element_apply_cleaners():
|
|
text_element = Text(text="[1] A Textbook on Crocodile Habitats")
|
|
|
|
text_element.apply(partial(clean_prefix, pattern=r"\[\d{1,2}\]"))
|
|
assert str(text_element) == "A Textbook on Crocodile Habitats"
|
|
|
|
|
|
def test_text_element_apply_multiple_cleaners():
|
|
cleaners = [
|
|
partial(clean_prefix, pattern=r"\[\d{1,2}\]"),
|
|
partial(translate_text, target_lang="ru"),
|
|
]
|
|
text_element = Text(text="[1] A Textbook on Crocodile Habitats")
|
|
text_element.apply(*cleaners)
|
|
assert str(text_element) == "Учебник по крокодильным средам обитания"
|
|
|
|
|
|
def test_apply_raises_if_func_does_not_produce_string():
|
|
text_element = Text(text="[1] A Textbook on Crocodile Habitats")
|
|
with pytest.raises(ValueError):
|
|
text_element.apply(lambda s: 1)
|