diff --git a/olmocr/bench/runners/run_docling.py b/olmocr/bench/runners/run_docling.py
index dbcaf26..600a492 100644
--- a/olmocr/bench/runners/run_docling.py
+++ b/olmocr/bench/runners/run_docling.py
@@ -29,7 +29,7 @@ def init_model(model_name: str = "ds4sd/SmolDocling-256M-preview"):
             torch_dtype=torch.bfloat16,
             # _attn_implementation="flash_attention_2" if device.type == "cuda" else "eager",
             _attn_implementation="eager",
-        ).to(device)
+        ).eval().to(device)
 
         _cached_model = model
         _cached_processor = processor
diff --git a/olmocr/bench/runners/run_transformers.py b/olmocr/bench/runners/run_transformers.py
index 26958c7..9801535 100644
--- a/olmocr/bench/runners/run_transformers.py
+++ b/olmocr/bench/runners/run_transformers.py
@@ -47,7 +47,7 @@ def run_transformers(
     if _cached_model is None:
         model = Qwen2VLForConditionalGeneration.from_pretrained(model, torch_dtype=torch.bfloat16).eval()
         processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
-        model.to(device)
+        model = model.to(device)
 
         _cached_model = model
         _cached_processor = processor