From 4f7623c4294f0f69d122fc269f7ea0310d25193e Mon Sep 17 00:00:00 2001 From: aman-17 Date: Fri, 19 Sep 2025 16:14:05 -0700 Subject: [PATCH] fixed dotsocr runner --- olmocr/bench/runners/run_dotsocr.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/olmocr/bench/runners/run_dotsocr.py b/olmocr/bench/runners/run_dotsocr.py index 911f0e5..123665c 100644 --- a/olmocr/bench/runners/run_dotsocr.py +++ b/olmocr/bench/runners/run_dotsocr.py @@ -1,4 +1,5 @@ import base64 +import os from io import BytesIO import torch @@ -8,6 +9,10 @@ from qwen_vl_utils import process_vision_info from olmocr.data.renderpdf import render_pdf_to_base64png +# Set LOCAL_RANK as required by DotsOCR +if "LOCAL_RANK" not in os.environ: + os.environ["LOCAL_RANK"] = "0" + # Global cache for the model and processor. _device = "cuda" if torch.cuda.is_available() else "cpu" _model = None @@ -27,6 +32,7 @@ def load_model(model_name: str = "rednote-hilab/dots.ocr"): """ global _model, _processor if _model is None or _processor is None: + # Load model following the official repo pattern _model = AutoModelForCausalLM.from_pretrained( model_name, attn_implementation="flash_attention_2", @@ -117,7 +123,7 @@ def run_dotsocr( return_tensors="pt", ) - inputs = inputs.to(_device) + inputs = inputs.to("cuda") # Inference: Generation of the output with torch.no_grad():