mirror of
https://github.com/allenai/olmocr.git
synced 2025-06-27 04:00:02 +00:00
add boxplot drawing
This commit is contained in:
parent
a790ba73ee
commit
86b17d0ea3
@ -91,7 +91,8 @@ train = [
|
||||
elo = [
|
||||
"numpy",
|
||||
"scipy",
|
||||
"pandas"
|
||||
"pandas",
|
||||
"matplotlib"
|
||||
]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
|
BIN
scripts/elo/boxplots.png
Normal file
BIN
scripts/elo/boxplots.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 36 KiB |
196
scripts/elo/draw_boxplots.py
Normal file
196
scripts/elo/draw_boxplots.py
Normal file
@ -0,0 +1,196 @@
|
||||
"""
|
||||
|
||||
Boxplots of Elo ratings with 95% confidence intervals for each method.
|
||||
|
||||
Invocation:
|
||||
python draw_boxplots.py results.txt boxplots.png
|
||||
|
||||
@kylel
|
||||
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
import matplotlib.font_manager as font_manager
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import requests
|
||||
|
||||
# AI2 Colors
|
||||
AI2_PINK = "#f0529c"
|
||||
AI2_DARK_TEAL = "#0a3235"
|
||||
AI2_TEAL = "#105257"
|
||||
|
||||
# Name mappings
|
||||
NAME_DISPLAY_MAP = {"pdelf": "olmOCR", "mineru": "MinerU", "marker": "Marker", "gotocr_format": "GOTOCR"}
|
||||
|
||||
|
||||
def download_and_cache_file(url, cache_dir=None):
|
||||
"""Download a file and cache it locally."""
|
||||
if cache_dir is None:
|
||||
cache_dir = Path.home() / ".cache" / "elo_plot"
|
||||
|
||||
cache_dir = Path(cache_dir)
|
||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create filename from URL hash
|
||||
url_hash = hashlib.sha256(url.encode()).hexdigest()[:12]
|
||||
file_name = url.split("/")[-1]
|
||||
cached_path = cache_dir / f"{url_hash}_{file_name}"
|
||||
|
||||
if not cached_path.exists():
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
|
||||
with open(cached_path, "wb") as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
|
||||
return str(cached_path)
|
||||
|
||||
|
||||
def parse_elo_data(file_path):
|
||||
"""Parse Elo ratings data from a text file."""
|
||||
with open(file_path, "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Regular expression to match the data lines
|
||||
pattern = r"(\w+)\s+(\d+\.\d+)\s*±\s*(\d+\.\d+)\s*\[(\d+\.\d+),\s*(\d+\.\d+)\]"
|
||||
matches = re.finditer(pattern, content)
|
||||
|
||||
# Initialize lists to store data
|
||||
names = []
|
||||
medians = []
|
||||
errors = []
|
||||
ci_low = []
|
||||
ci_high = []
|
||||
|
||||
for match in matches:
|
||||
names.append(match.group(1))
|
||||
medians.append(float(match.group(2)))
|
||||
errors.append(float(match.group(3)))
|
||||
ci_low.append(float(match.group(4)))
|
||||
ci_high.append(float(match.group(5)))
|
||||
|
||||
return names, medians, errors, ci_low, ci_high
|
||||
|
||||
|
||||
def create_boxplot(names, medians, errors, ci_low, ci_high, output_path, font_path):
|
||||
"""Create and save a boxplot of Elo ratings."""
|
||||
# Set up Manrope font
|
||||
font_manager.fontManager.addfont(font_path)
|
||||
plt.rcParams["font.family"] = "Manrope"
|
||||
plt.rcParams["font.weight"] = "medium"
|
||||
|
||||
# Define colors - pdelf in pink, others in shades of teal/grey based on performance
|
||||
max_median = max(medians)
|
||||
colors = []
|
||||
for i, median in enumerate(medians):
|
||||
if names[i] == "pdelf":
|
||||
colors.append(AI2_PINK)
|
||||
else:
|
||||
# Calculate a shade between dark teal and grey based on performance
|
||||
performance_ratio = (median - min(medians)) / (max_median - min(medians))
|
||||
base_color = np.array(tuple(int(AI2_DARK_TEAL[i : i + 2], 16) for i in (1, 3, 5))) / 255.0
|
||||
grey = np.array([0.7, 0.7, 0.7]) # Light grey
|
||||
color = tuple(np.clip(base_color * performance_ratio + grey * (1 - performance_ratio), 0, 1))
|
||||
colors.append(color)
|
||||
|
||||
# Create box plot data
|
||||
box_data = []
|
||||
for i in range(len(names)):
|
||||
q1 = medians[i] - errors[i]
|
||||
q3 = medians[i] + errors[i]
|
||||
box_data.append([ci_low[i], q1, medians[i], q3, ci_high[i]])
|
||||
|
||||
# Create box plot with smaller width and spacing
|
||||
plt.figure(figsize=(4, 4))
|
||||
bp = plt.boxplot(
|
||||
box_data,
|
||||
labels=[NAME_DISPLAY_MAP[name] for name in names],
|
||||
whis=1.5,
|
||||
patch_artist=True,
|
||||
widths=0.15, # Make boxes much narrower
|
||||
medianprops=dict(color="black"), # Make median line black
|
||||
positions=np.arange(len(names)) * 0.25,
|
||||
) # Reduce spacing between boxes significantly
|
||||
|
||||
# Color each box
|
||||
for patch, color in zip(bp["boxes"], colors):
|
||||
patch.set_facecolor(color)
|
||||
patch.set_alpha(0.8)
|
||||
|
||||
# Style the plot
|
||||
# plt.ylabel("Elo Rating", fontsize=12, color=AI2_DARK_TEAL)
|
||||
plt.xticks(
|
||||
np.arange(len(names)) * 0.25, # Match positions from boxplot
|
||||
[NAME_DISPLAY_MAP[name] for name in names],
|
||||
rotation=45,
|
||||
ha="right",
|
||||
color=AI2_DARK_TEAL,
|
||||
)
|
||||
plt.yticks(color=AI2_DARK_TEAL)
|
||||
|
||||
# Set x-axis limits to maintain proper spacing
|
||||
plt.xlim(-0.1, (len(names) - 1) * 0.25 + 0.1)
|
||||
|
||||
# Remove the title and adjust the layout
|
||||
plt.tight_layout()
|
||||
|
||||
# Remove spines
|
||||
for spine in plt.gca().spines.values():
|
||||
spine.set_visible(False)
|
||||
|
||||
# Add left spine only
|
||||
plt.gca().spines["left"].set_visible(True)
|
||||
plt.gca().spines["left"].set_color(AI2_DARK_TEAL)
|
||||
plt.gca().spines["left"].set_linewidth(0.5)
|
||||
|
||||
# Add bottom spine only
|
||||
plt.gca().spines["bottom"].set_visible(True)
|
||||
plt.gca().spines["bottom"].set_color(AI2_DARK_TEAL)
|
||||
plt.gca().spines["bottom"].set_linewidth(0.5)
|
||||
|
||||
plt.savefig(output_path, dpi=300, bbox_inches="tight", transparent=True)
|
||||
plt.close()
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("input_file", type=click.Path(exists=True))
|
||||
@click.argument("output_file", type=click.Path())
|
||||
@click.option(
|
||||
"--manrope-medium-font-path",
|
||||
type=str,
|
||||
default="https://dolma-artifacts.org/Manrope-Medium.ttf",
|
||||
help="Path to the Manrope Medium font file (local path or URL)",
|
||||
)
|
||||
def main(input_file, output_file, manrope_medium_font_path):
|
||||
"""Generate a boxplot from Elo ratings data.
|
||||
|
||||
INPUT_FILE: Path to the text file containing Elo ratings data
|
||||
OUTPUT_FILE: Path where the plot should be saved
|
||||
"""
|
||||
try:
|
||||
# Handle font path - download and cache if it's a URL
|
||||
if manrope_medium_font_path.startswith(("http://", "https://")):
|
||||
font_path = download_and_cache_file(manrope_medium_font_path)
|
||||
else:
|
||||
font_path = manrope_medium_font_path
|
||||
|
||||
# Parse the data
|
||||
names, medians, errors, ci_low, ci_high = parse_elo_data(input_file)
|
||||
|
||||
# Create and save the plot
|
||||
create_boxplot(names, medians, errors, ci_low, ci_high, output_file, font_path)
|
||||
click.echo(f"Plot successfully saved to {output_file}")
|
||||
|
||||
except Exception as e:
|
||||
click.echo(f"Error: {str(e)}", err=True)
|
||||
raise click.Abort()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
16
scripts/elo/results.txt
Normal file
16
scripts/elo/results.txt
Normal file
@ -0,0 +1,16 @@
|
||||
Bootstrapped Elo Ratings (95% CI):
|
||||
--------------------------------------------------
|
||||
pdelf 1813.0 ± 84.9 [1605.9, 1930.0]
|
||||
mineru 1545.2 ± 99.7 [1336.7, 1714.1]
|
||||
marker 1429.1 ± 100.7 [1267.6, 1645.5]
|
||||
gotocr_format 1212.7 ± 82.0 [1097.3, 1408.3]
|
||||
|
||||
Pairwise Significance Tests:
|
||||
--------------------------------------------------
|
||||
gotocr_format vs marker Δ = -216.3 [-470.8, 135.0] p = 0.218
|
||||
gotocr_format vs mineru Δ = -332.5 [-567.5, 19.3] p = 0.051
|
||||
gotocr_format vs pdelf Δ = -600.3 [-826.1, -344.3] p = 0.000*
|
||||
marker vs mineru Δ = -116.1 [-365.4, 246.5] p = 0.430
|
||||
marker vs pdelf Δ = -383.9 [-610.6, -10.9] p = 0.044*
|
||||
mineru vs pdelf Δ = -267.8 [-517.3, 104.0] p = 0.135
|
||||
|
Loading…
x
Reference in New Issue
Block a user