diff --git a/olmocr/train/grpo_train.py b/olmocr/train/grpo_train.py index 8f2dbd7..61859c0 100644 --- a/olmocr/train/grpo_train.py +++ b/olmocr/train/grpo_train.py @@ -406,7 +406,6 @@ def main(): warmup_steps=10, max_completion_length=3000, temperature=0.7, - do_sample=True, report_to=report_to, remove_unused_columns=False, torch_dtype=torch.bfloat16,