add tests for more coverage

This commit is contained in:
Klaijan Sinteppadon 2023-10-17 22:13:15 -04:00
parent 775bfb7588
commit fcc52d8d33
4 changed files with 27 additions and 0 deletions

BIN
example-docs/blank.xlsx Normal file

Binary file not shown.

View File

@ -40,6 +40,12 @@ def test_get_element_type_frequency(filename, frequency):
assert elements_freq == frequency
def test_get_element_type_frequency_zero_len():
elements = partition(filename=f"example-docs/blank.xlsx")
elements_freq = get_element_type_frequency(elements_to_json(elements))
assert len(elements_freq) == 0
@pytest.mark.parametrize(
("filename", "expected_frequency", "percent_matched"),
[
@ -107,3 +113,11 @@ def test_calculate_element_type_percent_match(filename, expected_frequency, perc
round(calculate_element_type_percent_match(elements_frequency, expected_frequency, 0.8), 2)
== percent_matched[2]
)
def test_calculate_element_type_percent_match_zero_source_output():
with_frequency = {("Header", None): 1}
elements = partition(filename=f"example-docs/blank.xlsx")
no_frequency = get_element_type_frequency(elements_to_json(elements))
assert calculate_element_type_percent_match(with_frequency, no_frequency) == 0.0
assert calculate_element_type_percent_match(no_frequency, with_frequency) == 0.0

View File

@ -216,3 +216,10 @@ def test_calculate_percent_missing_text(output_text, source_text, expected_perce
text_extraction.calculate_percent_missing_text(output_text, source_text)
== expected_percentage
)
def test_error_return_type():
output_elements = partition(filename=f"example-docs/fake-text.txt")
source_elements = partition(filename=f"example-docs/fake-text.txt")
with pytest.raises(ValueError):
text_extraction.calculate_edit_distance(output_elements, source_elements, "typo")

View File

@ -226,6 +226,12 @@ def test_partition_xlsx_subtables(filename="example-docs/vodafone.xlsx"):
assert len(elements) == 6
def test_partition_xlsx_not_find_subtable(filename="example-docs/vodafone.xlsx"):
elements = partition_xlsx(filename, find_subtable=False)
assert sum(isinstance(element, Table) for element in elements) == 1
assert len(elements) == 1
def test_partition_xlsx_element_metadata_has_languages():
filename = "example-docs/stanley-cups.xlsx"
elements = partition_xlsx(filename=filename)