diff --git a/olmocr/bench/runners/run_docling.py b/olmocr/bench/runners/run_docling.py index dbcaf26..600a492 100644 --- a/olmocr/bench/runners/run_docling.py +++ b/olmocr/bench/runners/run_docling.py @@ -29,7 +29,7 @@ def init_model(model_name: str = "ds4sd/SmolDocling-256M-preview"): torch_dtype=torch.bfloat16, # _attn_implementation="flash_attention_2" if device.type == "cuda" else "eager", _attn_implementation="eager", - ).to(device) + ).eval().to(device) _cached_model = model _cached_processor = processor diff --git a/olmocr/bench/runners/run_transformers.py b/olmocr/bench/runners/run_transformers.py index 26958c7..9801535 100644 --- a/olmocr/bench/runners/run_transformers.py +++ b/olmocr/bench/runners/run_transformers.py @@ -47,7 +47,7 @@ def run_transformers( if _cached_model is None: model = Qwen2VLForConditionalGeneration.from_pretrained(model, torch_dtype=torch.bfloat16).eval() processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct") - model.to(device) + model = model.to(device) _cached_model = model _cached_processor = processor