mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00

This pull request add table detection metrics. One case that was considered by me: Case: Two tables are predicted and matched with one table in ground truth Question: Is this matching correct in both cases or just for on table There are two subcases: - table was predicted by OD as two sub tables (so half in two, there are two non overlapping subtables) -> in my opinion both are correct - it is false positive from tables matching script in get_table_level_alignment -> 1 good, 1 wrong As we don't have bounding boxes I followed the notebook calculation script and assumed pessimistic, second subcase version
34 lines
1.5 KiB
Python
34 lines
1.5 KiB
Python
import pytest
|
|
|
|
from unstructured.metrics.table.table_eval import calculate_table_detection_metrics
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("matched_indices", "ground_truth_tables_number", "expected_metrics"),
|
|
[
|
|
([0, 1, 2], 3, (1, 1, 1)), # everything was predicted correctly
|
|
([2, 1, 0], 3, (1, 1, 1)), # everything was predicted correctly
|
|
(
|
|
[-1, 2, -1, 1, 0, -1],
|
|
3,
|
|
(1, 0.5, 0.66),
|
|
), # some false positives, all tables matched, too many predictions
|
|
([2, 2, 1, 1], 8, (0.25, 0.5, 0.33)),
|
|
# Some false negatives, all predictions matched with gt, not enough predictions
|
|
# The precision here is not 1 as only one from tables matched with '1' index can be correct
|
|
([1, -1], 2, (0.5, 0.5, 0.5)), # typical case with false positive and false negative
|
|
([-1, -1, -1], 2, (0, 0, 0)), # nothing was matched
|
|
([-1, -1, -1], 0, (0, 0, 0)), # there was no table in ground truth
|
|
([], 0, (0, 0, 0)), # just zeros to account for errors
|
|
],
|
|
)
|
|
def test_calculate_table_metrics(matched_indices, ground_truth_tables_number, expected_metrics):
|
|
expected_recall, expected_precision, expected_f1 = expected_metrics
|
|
pred_recall, pred_precision, pred_f1 = calculate_table_detection_metrics(
|
|
matched_indices=matched_indices, ground_truth_tables_number=ground_truth_tables_number
|
|
)
|
|
|
|
assert pred_recall == expected_recall
|
|
assert pred_precision == expected_precision
|
|
assert pred_f1 == pytest.approx(expected_f1, abs=0.01)
|