mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-12-26 06:36:06 +00:00
add tests for more coverage
This commit is contained in:
parent
775bfb7588
commit
fcc52d8d33
BIN
example-docs/blank.xlsx
Normal file
BIN
example-docs/blank.xlsx
Normal file
Binary file not shown.
@ -40,6 +40,12 @@ def test_get_element_type_frequency(filename, frequency):
|
||||
assert elements_freq == frequency
|
||||
|
||||
|
||||
def test_get_element_type_frequency_zero_len():
|
||||
elements = partition(filename=f"example-docs/blank.xlsx")
|
||||
elements_freq = get_element_type_frequency(elements_to_json(elements))
|
||||
assert len(elements_freq) == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("filename", "expected_frequency", "percent_matched"),
|
||||
[
|
||||
@ -107,3 +113,11 @@ def test_calculate_element_type_percent_match(filename, expected_frequency, perc
|
||||
round(calculate_element_type_percent_match(elements_frequency, expected_frequency, 0.8), 2)
|
||||
== percent_matched[2]
|
||||
)
|
||||
|
||||
|
||||
def test_calculate_element_type_percent_match_zero_source_output():
|
||||
with_frequency = {("Header", None): 1}
|
||||
elements = partition(filename=f"example-docs/blank.xlsx")
|
||||
no_frequency = get_element_type_frequency(elements_to_json(elements))
|
||||
assert calculate_element_type_percent_match(with_frequency, no_frequency) == 0.0
|
||||
assert calculate_element_type_percent_match(no_frequency, with_frequency) == 0.0
|
||||
|
||||
@ -216,3 +216,10 @@ def test_calculate_percent_missing_text(output_text, source_text, expected_perce
|
||||
text_extraction.calculate_percent_missing_text(output_text, source_text)
|
||||
== expected_percentage
|
||||
)
|
||||
|
||||
|
||||
def test_error_return_type():
|
||||
output_elements = partition(filename=f"example-docs/fake-text.txt")
|
||||
source_elements = partition(filename=f"example-docs/fake-text.txt")
|
||||
with pytest.raises(ValueError):
|
||||
text_extraction.calculate_edit_distance(output_elements, source_elements, "typo")
|
||||
@ -226,6 +226,12 @@ def test_partition_xlsx_subtables(filename="example-docs/vodafone.xlsx"):
|
||||
assert len(elements) == 6
|
||||
|
||||
|
||||
def test_partition_xlsx_not_find_subtable(filename="example-docs/vodafone.xlsx"):
|
||||
elements = partition_xlsx(filename, find_subtable=False)
|
||||
assert sum(isinstance(element, Table) for element in elements) == 1
|
||||
assert len(elements) == 1
|
||||
|
||||
|
||||
def test_partition_xlsx_element_metadata_has_languages():
|
||||
filename = "example-docs/stanley-cups.xlsx"
|
||||
elements = partition_xlsx(filename=filename)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user