mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2026-01-08 05:10:11 +00:00
chore: exit evaluation script if nothing to do (#1910)
Relates to CI ingest-tests. The last step of test-ingest.sh is to calculate evaluation metrics (comparing gold set standard outputs with actual output files). If no output files were created, as *should* be the case right now in CI for all python versions other than 3.10 (that only test a limited number of files/connectors),`unstructured/ingest/evaluate.py` would fail.
This commit is contained in:
parent
670687bb67
commit
ff752e88df
@ -4,6 +4,7 @@ import csv
|
||||
import logging
|
||||
import os
|
||||
import statistics
|
||||
import sys
|
||||
from typing import Any, List, Optional, Tuple
|
||||
|
||||
import click
|
||||
@ -81,11 +82,21 @@ def measure_text_edit_distance(
|
||||
Calculates text accuracy and percent missing. After looped through the whole list, write to tsv.
|
||||
Also calculates the aggregated accuracy and percent missing.
|
||||
"""
|
||||
|
||||
print(f"output_dir is {output_dir}")
|
||||
print(f"source_dir is {source_dir}")
|
||||
if not output_list:
|
||||
output_list = _listdir_recursive(output_dir)
|
||||
if not source_list:
|
||||
source_list = _listdir_recursive(source_dir)
|
||||
|
||||
print(f"output_list is {output_list}")
|
||||
print(f"source_list is {source_list}")
|
||||
|
||||
if not output_list:
|
||||
print("No output files to calculate to edit distances for, exiting")
|
||||
sys.exit(0)
|
||||
|
||||
rows = []
|
||||
accuracy_scores: List[float] = []
|
||||
percent_missing_scores: List[float] = []
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user