mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00

- add helper to run inference over an image or pdf of table and compare it against a ground truth csv file - this metric generates a similarity score between 1 and 0, where 1 is perfect match and 0 is no match at all - add example docs for testing - NOTE: this metric is only relevant to table structure detection. Therefore the input should be just the table area in an image/pdf file; we are not evaluating table element detection in this metric
28 lines
771 B
Python
28 lines
771 B
Python
import pytest
|
|
|
|
from unstructured.metrics.table_structure import (
|
|
eval_table_transformer_for_file,
|
|
image_or_pdf_to_dataframe,
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"filename",
|
|
[
|
|
"example-docs/table-multi-row-column-cells.png",
|
|
"example-docs/table-multi-row-column-cells.pdf",
|
|
],
|
|
)
|
|
def test_image_or_pdf_to_dataframe(filename):
|
|
df = image_or_pdf_to_dataframe(filename)
|
|
assert ["Blind", "5", "1", "4", "34.5%, n=1", "1199 sec, n=1"] in df.values
|
|
|
|
|
|
def test_eval_table_transformer_for_file():
|
|
score = eval_table_transformer_for_file(
|
|
"example-docs/table-multi-row-column-cells.png",
|
|
"example-docs/table-multi-row-column-cells-actual.csv",
|
|
)
|
|
# avoid severe degradation of performance
|
|
assert 0.8 < score < 1
|