Tom Aarsen 5eb1466acc
Resolve various style issues to improve overall code quality (#282)
* Apply import sorting

ruff . --select I --fix

* Remove unnecessary open mode parameter

ruff . --select UP015 --fix

* Use f-string formatting rather than .format

* Remove extraneous parentheses

Also use "" instead of str()

* Resolve missing trailing commas

ruff . --select COM --fix

* Rewrite list() and dict() calls using literals

ruff . --select C4 --fix

* Add () to pytest.fixture, use tuples for parametrize, etc.

ruff . --select PT --fix

* Simplify code: merge conditionals, context managers

ruff . --select SIM --fix

* Import without unnecessary alias

ruff . --select PLR0402 --fix

* Apply formatting via black

* Rewrite ValueError somewhat

Slightly unrelated to the rest of the PR

* Apply formatting to tests via black

* Update expected exception message to match
0d81564

* Satisfy E501 line too long in test

* Update changelog & version

* Add ruff to make tidy and test deps

* Run 'make tidy'

* Update changelog & version

* Update changelog & version

* Add ruff to 'check' target

Doing so required me to also fix some non-auto-fixable issues. Two of them I fixed with a noqa: SIM115, but especially the one in __init__ may need some attention. That said, that refactor is out of scope of this PR.
2023-02-27 11:30:54 -05:00

106 lines
3.5 KiB
Python

from unstructured_inference.inference.layout import LayoutElement
from unstructured.documents.elements import (
CheckBox,
FigureCaption,
ListItem,
NarrativeText,
Text,
Title,
)
from unstructured.partition import common
def test_normalize_layout_element_dict():
layout_element = {
"type": "Title",
"coordinates": [[1, 2], [3, 4], [5, 6], [7, 8]],
"text": "Some lovely text",
}
element = common.normalize_layout_element(layout_element)
assert element == Title(text="Some lovely text", coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]])
def test_normalize_layout_element_dict_caption():
layout_element = {
"type": "Figure",
"coordinates": [[1, 2], [3, 4], [5, 6], [7, 8]],
"text": "Some lovely text",
}
element = common.normalize_layout_element(layout_element)
assert element == FigureCaption(
text="Some lovely text",
coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]],
)
def test_normalize_layout_element_dict_misc():
layout_element = {
"type": "Misc",
"coordinates": [[1, 2], [3, 4], [5, 6], [7, 8]],
"text": "Some lovely text",
}
element = common.normalize_layout_element(layout_element)
assert element == Text(text="Some lovely text", coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]])
def test_normalize_layout_element_layout_element():
layout_element = LayoutElement(
type="Text",
coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]],
text="Some lovely text",
)
element = common.normalize_layout_element(layout_element)
assert element == NarrativeText(
text="Some lovely text",
coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]],
)
def test_normalize_layout_element_checked_box():
layout_element = LayoutElement(
type="Checked",
coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]],
text="",
)
element = common.normalize_layout_element(layout_element)
assert element == CheckBox(checked=True, coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]])
def test_normalize_layout_element_unchecked_box():
layout_element = LayoutElement(
type="Unchecked",
coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]],
text="",
)
element = common.normalize_layout_element(layout_element)
assert element == CheckBox(checked=False, coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]])
def test_normalize_layout_element_enumerated_list():
layout_element = LayoutElement(
type="List",
coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]],
text="1. I'm so cool! 2. You're cool too. 3. We're all cool!",
)
elements = common.normalize_layout_element(layout_element)
assert elements == [
ListItem(text="I'm so cool!", coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]]),
ListItem(text="You're cool too.", coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]]),
ListItem(text="We're all cool!", coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]]),
]
def test_normalize_layout_element_bulleted_list():
layout_element = LayoutElement(
type="List",
coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]],
text="* I'm so cool! * You're cool too. * We're all cool!",
)
elements = common.normalize_layout_element(layout_element)
assert elements == [
ListItem(text="I'm so cool!", coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]]),
ListItem(text="You're cool too.", coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]]),
ListItem(text="We're all cool!", coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]]),
]