diff --git a/olmocr/train/config.py b/olmocr/train/config.py index 42f1c41..5e653cb 100644 --- a/olmocr/train/config.py +++ b/olmocr/train/config.py @@ -147,11 +147,6 @@ class TrainingConfig: gradient_checkpointing: bool = True gradient_checkpointing_kwargs: Dict[str, Any] = field(default_factory=lambda: {"use_reentrant": False}) - # Mixed precision - fp16: bool = False - bf16: bool = True - tf32: bool = True # Enable TF32 on Ampere GPUs - # Evaluation and checkpointing evaluation_strategy: str = "steps" eval_steps: int = 500 diff --git a/olmocr/train/train.py b/olmocr/train/train.py index d7dba7a..68cd84d 100644 --- a/olmocr/train/train.py +++ b/olmocr/train/train.py @@ -178,9 +178,7 @@ def main(): adam_epsilon=config.training.adam_epsilon, weight_decay=config.training.weight_decay, max_grad_norm=config.training.max_grad_norm, - fp16=config.training.fp16, - bf16=config.training.bf16, - tf32=config.training.tf32, + bf16=True, # We're sticking with this known good reduced precision option eval_strategy=config.training.evaluation_strategy, eval_steps=config.training.eval_steps, save_strategy=config.training.save_strategy,