From 7a744cc0b463a0b6f34afb9ac53daa30824c1ba7 Mon Sep 17 00:00:00 2001
From: Jake Poznanski <jakep@allenai.org>
Date: Sun, 19 Oct 2025 18:21:45 +0000
Subject: [PATCH] FInal docs on model setup

---
 olmocr/train/README.md | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/olmocr/train/README.md b/olmocr/train/README.md
index a2bac9f..ff6c0e0 100644
--- a/olmocr/train/README.md
+++ b/olmocr/train/README.md
@@ -112,12 +112,18 @@ then convert them into HTML templates. Those HTML templates were then rendered,
 We then ran a GPRO based training process with a reward based on the benchmark score on this synthetic benchmark.
 
 ```bash
-./scripts/train/grpotrainer-beaker-multi-gpu-augusta.sh --num-gpus 8      --model_name s3://ai2-oe-data/jakep/olmocr/qwen2.5-vl-7b-olmocrv4_1epoch_promptv4_mix102
-5_more_rotation_filtered-8372 --train_bench_data_folder /data/jakep/grpo_data_mixes/olmocr-synthmix-1025-v2-rotate10p/bench_data --reward_bench 1.0 --reward_front_matter 1.0 --reward_eos 1
-.0 --beta 0.01 --name promptv4_mix1025_more_rotation_multigpu_v1_beta_01_lr2e-6_frontmatter1_0_eos_28gen_synthmix-1025_rotate10p_importanceseq_finalrun_filtered_0 --seed 0 --importance_sampling_level sequence --gradient_accumulation_steps 28 --learning_rate 2e-6 --preemptible
+
+./scripts/train/grpotrainer-beaker-multi-gpu-augusta.sh --num-gpus 8      --model_name s3://ai2-oe-data/jakep/olmocr/qwen2.5-vl-7b-olmocrv4_1epoch_promptv4_mix1025_more_rotation-8372 --train_bench_data_folder /data/jakep/grpo_data_mixes/olmocr-synthmix-1025-v2-rotate10p/bench_data --reward_bench 1.0 --reward_front_matter 1.0 --reward_eos 1.0 --beta 0.01 --name promptv4_mix1025_more_rotation_multigpu_v1_beta_01_lr2e-6_frontmatter1_0_eos_28gen_synthmix-1025_rotate10p_finalrun1 --seed 1 --gradient_accumulation_steps 28 --learning_rate 2e-6 --preemptible
+
+./scripts/train/grpotrainer-beaker-multi-gpu-augusta.sh --num-gpus 8      --model_name s3://ai2-oe-data/jakep/olmocr/qwen2.5-vl-7b-olmocrv4_1epoch_promptv4_mix1025_more_rotation-8372 --train_bench_data_folder /data/jakep/grpo_data_mixes/olmocr-synthmix-1025-v2-rotate10p/bench_data --reward_bench 1.0 --reward_front_matter 1.0 --reward_eos 1.0 --beta 0.01 --name promptv4_mix1025_more_rotation_multigpu_v1_beta_01_lr2e-6_frontmatter1_0_eos_28gen_synthmix-1025_rotate10p_importanceseq_finalrun1 --seed 1 --importance_sampling_level sequence --gradient_accumulation_steps 28 --learning_rate 2e-6 --preemptible
 ```
 
-6 seeds were run, and then merged into a final checkpoint.
+6 seeds were run, 3 with importance sampling level=sequence, and 3 with importance sampling level=token, and then merged into a final checkpoint. This can be done by passing more arguments to the `prepare_checkpoint` script.
+
+```bash
+# Final souping command for olmocr-7b-1025
+python -m olmocr.train.prepare_checkpoint s3://ai2-oe-data/jakep/olmocr-grpo-checkpoints/promptv4_mix1025_more_rotation_multigpu_v1_beta_01_lr2e-6_frontmatter1_0_eos_28gen_synthmix-1025_rotate10p_finalrun1-multigpu-01K60YDRKCJY82TKF0FP6WE4VA/checkpoint-306/ s3://ai2-oe-data/jakep/olmocr-grpo-checkpoints/promptv4_mix1025_more_rotation_multigpu_v1_beta_01_lr2e-6_frontmatter1_0_eos_28gen_synthmix-1025_rotate10p_finalrun2-multigpu-01K60YGB5Y2G15BG8CX4H1QW23/checkpoint-306/ s3://ai2-oe-data/jakep/olmocr-grpo-checkpoints/promptv4_mix1025_more_rotation_multigpu_v1_beta_01_lr2e-6_frontmatter1_0_eos_28gen_synthmix-1025_rotate10p_finalrun3-multigpu-01K60YGM2QEKJK9FC94JJG5YDP/checkpoint-306/ s3://ai2-oe-data/jakep/olmocr-grpo-checkpoints/promptv4_mix1025_more_rotation_multigpu_v1_beta_01_lr2e-6_frontmatter1_0_eos_28gen_synthmix-1025_rotate10p_importanceseq_finalrun3-multigpu-01K60YJBGC3AR7STTNH23BWH8A/checkpoint-306/ s3://ai2-oe-data/jakep/olmocr-grpo-checkpoints/promptv4_mix1025_more_rotation_multigpu_v1_beta_01_lr2e-6_frontmatter1_0_eos_28gen_synthmix-1025_rotate10p_importanceseq_finalrun2-multigpu-01K60YJ315K1GYCPN8VADTN7C3/checkpoint-306/ s3://ai2-oe-data/jakep/olmocr-grpo-checkpoints/promptv4_mix1025_more_rotation_multigpu_v1_beta_01_lr2e-6_frontmatter1_0_eos_28gen_synthmix-1025_rotate10p_importanceseq_finalrun1-multigpu-01K60YHSHCNS9RZWSF9E56J9FB/checkpoint-306/ s3://ai2-oe-data/jakep/olmocr-grpo-checkpoints/promptv4_mix1025_more_rotation_multigpu_v1_beta_01_lr2e-6_frontmatter1_0_eos_28gen_synthmix-1025_rotate10p_soupersoup
+```
 
 ### Notes for AI2
 If you are a collaborator of AI2, you can use the following scripts to run training and inference