diff --git a/scripts/pareto/ocr_pareto.pdf b/scripts/pareto/ocr_pareto.pdf new file mode 100644 index 0000000..3d7d42d Binary files /dev/null and b/scripts/pareto/ocr_pareto.pdf differ diff --git a/scripts/pareto/ocr_pareto.png b/scripts/pareto/ocr_pareto.png new file mode 100644 index 0000000..bcb6ea9 Binary files /dev/null and b/scripts/pareto/ocr_pareto.png differ diff --git a/scripts/pareto_plot.py b/scripts/pareto/pareto_plot.py similarity index 96% rename from scripts/pareto_plot.py rename to scripts/pareto/pareto_plot.py index 7c04a70..d2959b6 100644 --- a/scripts/pareto_plot.py +++ b/scripts/pareto/pareto_plot.py @@ -69,7 +69,7 @@ data = { "Qwen 2 VL", "Qwen 2.5 VL", ], - COST_COLUMN_NAME: [12480, 6240, 1000, 596, 499, 249, 235, 178, 178, 178], # Same cost as Ours # Same cost as Ours + COST_COLUMN_NAME: [12480, 6240, 1000, 596, 499, 249, 1492, 178, 178, 178], # Same cost as Ours # Same cost as Ours PERF_COLUMN_NAME: [ 69.9, # GPT-4o (Anchored) 69.9, # Same performance for batch @@ -78,7 +78,7 @@ data = { 63.8, # Gemini Flash 2 (Anchored) 63.8, # Same performance for batch 70.1, # marker v1.7.5 base - 77.4, # Ours (performance is the same across hardware) + 75.5, # Ours (performance is the same across hardware) 31.5, # Qwen2VL 65.5, # Qwen2.5VL ], @@ -131,8 +131,8 @@ model_label_offsets = { "Mistral OCR": [-20, 10], "MinerU": [-15, -20], "Gemini Flash 2": [-10, 10], - "Gemini Flash 2 (Batch)": [-50, -15], - "Marker v1.7.5": [-20, 15], + "Gemini Flash 2 (Batch)": [-50, -20], + "Marker v1.7.5": [-25, -20], "Ours": [-20, 10], "Qwen 2 VL": [-35, 10], "Qwen 2.5 VL": [-35, 10],