mirror of
https://github.com/allenai/olmocr.git
synced 2025-12-28 07:34:13 +00:00
Updated eval script
This commit is contained in:
parent
fb4e585e9f
commit
4fb7e9b184
@ -126,6 +126,9 @@ def load_gold_data(gold_data_path: str) -> dict:
|
||||
|
||||
gold_jsonl_files = list_jsonl_files(gold_data_path)
|
||||
|
||||
gold_errors = 0
|
||||
gold_overruns = 0
|
||||
|
||||
for path in gold_jsonl_files:
|
||||
# Load the JSON file
|
||||
with smart_open(path, 'r') as f:
|
||||
@ -133,9 +136,17 @@ def load_gold_data(gold_data_path: str) -> dict:
|
||||
data = json.loads(line)
|
||||
data = normalize_json_entry(data)
|
||||
|
||||
gold_data[data.goldkey] = data.text
|
||||
if data.error is not None:
|
||||
gold_errors += 1
|
||||
elif data.finish_reason != "stop":
|
||||
gold_overruns += 1
|
||||
else:
|
||||
gold_data[data.goldkey] = data.text
|
||||
|
||||
print(f"Loaded {len(gold_data):,} gold data entries for comparison")
|
||||
print(f"Gold processing errors: {gold_errors}")
|
||||
print(f"Gold overrun errors: {gold_overruns}")
|
||||
print("-----------------------------------------------------------")
|
||||
|
||||
return gold_data
|
||||
|
||||
@ -173,6 +184,8 @@ def process_jsonl_file(jsonl_file, gold_data, comparer):
|
||||
char_weighted_alignment_score = 0
|
||||
total_pages = 0
|
||||
total_chars = 0
|
||||
total_errors = 0
|
||||
total_overruns = 0
|
||||
|
||||
with smart_open(jsonl_file, 'r') as f:
|
||||
for line in f:
|
||||
@ -189,12 +202,16 @@ def process_jsonl_file(jsonl_file, gold_data, comparer):
|
||||
gold_text = gold_text or ""
|
||||
eval_text = eval_text or ""
|
||||
|
||||
# If the eval text or gold text is empty, we skip this page and don't use it for comparison
|
||||
# It means that something was an OCR page, and the text-based pipeline just won't be able to handle that
|
||||
# if len(eval_text.strip()) < 10 or len(gold_text.strip()) < 10:
|
||||
# continue
|
||||
if data.error is not None:
|
||||
total_errors += 1
|
||||
|
||||
if data.finish_reason != "stop":
|
||||
total_overruns += 1
|
||||
|
||||
alignment = comparer.compute(gold_text, eval_text)
|
||||
if len(gold_text.strip()) < 3 and len(eval_text.strip()) < 3:
|
||||
alignment = 1.0
|
||||
else:
|
||||
alignment = comparer.compute(gold_text, eval_text)
|
||||
|
||||
page_data[data.goldkey] = {
|
||||
"s3_path": data.s3_path,
|
||||
@ -209,13 +226,17 @@ def process_jsonl_file(jsonl_file, gold_data, comparer):
|
||||
total_chars += len(gold_text)
|
||||
total_pages += 1
|
||||
|
||||
return total_alignment_score, char_weighted_alignment_score, total_chars, total_pages, page_data
|
||||
return total_alignment_score, char_weighted_alignment_score, total_chars, total_pages, total_errors, total_overruns, page_data
|
||||
|
||||
def do_eval(gold_data_path: str, eval_data_path: str, review_page_name: str, review_page_size: int) -> tuple[float, list[dict]]:
|
||||
gold_data = load_gold_data(gold_data_path)
|
||||
|
||||
total_alignment_score = 0
|
||||
total_char_alignment_score = 0
|
||||
total_weight = 0
|
||||
total_pages = 0
|
||||
total_errors = 0
|
||||
total_overruns = 0
|
||||
total_pages_compared = set()
|
||||
|
||||
page_eval_data = []
|
||||
@ -240,11 +261,15 @@ def do_eval(gold_data_path: str, eval_data_path: str, review_page_name: str, rev
|
||||
|
||||
# Process each future as it completes
|
||||
for future in tqdm(as_completed(futures), total=len(jsonl_files)):
|
||||
alignment_score, char_weighted_score, chars, pages, page_data = future.result() # Get the result of the completed task
|
||||
alignment_score, char_weighted_score, chars, pages, errors, overruns, page_data = future.result() # Get the result of the completed task
|
||||
|
||||
# Aggregate statistics
|
||||
total_alignment_score += char_weighted_score
|
||||
total_alignment_score += alignment_score
|
||||
total_char_alignment_score += char_weighted_score
|
||||
total_weight += chars
|
||||
total_pages += pages
|
||||
total_errors += errors
|
||||
total_overruns += overruns
|
||||
total_pages_compared |= page_data.keys()
|
||||
|
||||
# Generate the eval data
|
||||
@ -258,8 +283,9 @@ def do_eval(gold_data_path: str, eval_data_path: str, review_page_name: str, rev
|
||||
page_eval_data.append(pd)
|
||||
|
||||
print(f"Compared {len(total_pages_compared):,} pages")
|
||||
print(f"Total corpus alignment: {total_alignment_score:.2f}")
|
||||
print(f"Mean alignment: {total_alignment_score / total_weight:.3f}")
|
||||
print(f"Found {total_errors} errors in the eval set, and {total_overruns} cases of length overruns")
|
||||
print(f"Mean page-weighted alignment: {total_alignment_score / total_pages:.3f}")
|
||||
print(f"Mean char-weighted alignment: {total_char_alignment_score / total_weight:.3f}")
|
||||
print("")
|
||||
print("...creating review page")
|
||||
|
||||
|
||||
@ -10,7 +10,7 @@ then
|
||||
fi
|
||||
|
||||
|
||||
EXTRA_ARGS="-c pdelfin/train/config/qwen2vl-7b-lora.yaml --num_proc 64 --save.path \"s3://ai2-oe-data/jakep/experiments/qwen2vl-pdf/v1/models/\${BEAKER_USER_ID}\""
|
||||
EXTRA_ARGS="-c pdelfin/train/config/qwen2vl-7b.yaml --num_proc 64 --save.path \"s3://ai2-oe-data/jakep/experiments/qwen2vl-pdf/v1/models/\${BEAKER_USER_ID}\""
|
||||
|
||||
run_name=$(basename "$0" .sh)
|
||||
|
||||
@ -44,4 +44,4 @@ gantry run \
|
||||
--env-secret WANDB_API_KEY=WANDB_API_KEY \
|
||||
--shared-memory 10GiB \
|
||||
--yes \
|
||||
-- /bin/bash -c "source scripts/beaker/${CLUSTER}-ib.sh && accelerate launch --use_fsdp --multi_gpu --num_processes \${BEAKER_ASSIGNED_GPU_COUNT} --fsdp_offload_params false --fsdp_sharding_strategy FULL_SHARD --fsdp_auto_wrap_policy TRANSFORMER_BASED_WRAP --mixed_precision bf16 -m pdelfin.train.train ${EXTRA_ARGS}"
|
||||
-- /bin/bash -c "source scripts/beaker/${CLUSTER}-ib.sh && accelerate launch --use_fsdp --num_processes \${BEAKER_ASSIGNED_GPU_COUNT} --fsdp_offload_params false --fsdp_sharding_strategy FULL_SHARD --fsdp_auto_wrap_policy TRANSFORMER_BASED_WRAP --mixed_precision bf16 -m pdelfin.train.train ${EXTRA_ARGS}"
|
||||
Loading…
x
Reference in New Issue
Block a user