Add backward compatibility for metric calculation (#3798)

Co-authored-by: cragwolfe <crag@unstructured.io>
This commit is contained in:
Pluto 2024-11-26 19:14:16 +01:00 committed by GitHub
parent e48d79eca1
commit 0fe6ac60aa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 24 additions and 3 deletions

View File

@ -1,3 +1,12 @@
## 0.16.8
### Enhancements
- **Metrics: Weighted table average is optional**
### Features
### Fixes
## 0.16.7 ## 0.16.7
### Enhancements ### Enhancements
@ -7,6 +16,7 @@
### Fixes ### Fixes
## 0.16.6 ## 0.16.6
### Enhancements ### Enhancements

View File

@ -1 +1 @@
__version__ = "0.16.7" # pragma: no cover __version__ = "0.16.8" # pragma: no cover

View File

@ -216,6 +216,8 @@ class TableStructureMetricsCalculator(BaseMetricsCalculator):
""" """
cutoff: Optional[float] = None cutoff: Optional[float] = None
weighted_average: bool = True
include_false_positives: bool = True
def __post_init__(self): def __post_init__(self):
super().__post_init__() super().__post_init__()
@ -287,11 +289,20 @@ class TableStructureMetricsCalculator(BaseMetricsCalculator):
df = pd.DataFrame(rows, columns=headers) df = pd.DataFrame(rows, columns=headers)
df["_table_weights"] = df["total_tables"] df["_table_weights"] = df["total_tables"]
# we give false positive tables a 1 table worth of weight in computing table level acc
df["_table_weights"][df.total_tables.eq(0) & df.total_predicted_tables.gt(0)] = 1 if self.include_false_positives:
# we give false positive tables a 1 table worth of weight in computing table level acc
df["_table_weights"][df.total_tables.eq(0) & df.total_predicted_tables.gt(0)] = 1
# filter down to only those with actual and/or predicted tables # filter down to only those with actual and/or predicted tables
has_tables_df = df[df["_table_weights"] > 0] has_tables_df = df[df["_table_weights"] > 0]
if not self.weighted_average:
# for all non zero elements assign them value 1
df["_table_weights"] = df["_table_weights"].apply(
lambda table_weight: 1 if table_weight != 0 else 0
)
if has_tables_df.empty: if has_tables_df.empty:
agg_df = pd.DataFrame( agg_df = pd.DataFrame(
[[metric, None, None, None, 0] for metric in self.supported_metric_names] [[metric, None, None, None, 0] for metric in self.supported_metric_names]