Adding paddlepaddle v5 runner for benchmarking

2025-11-02 02:54:53 +00:00 · 2025-09-19 16:45:53 +00:00 · 2025-09-19 16:45:53 +00:00 · e9ab2fd1bb
commit e9ab2fd1bb
parent 1c703917df
2 changed files with 33 additions and 0 deletions
--- a/olmocr/bench/convert.py
+++ b/olmocr/bench/convert.py
@ -231,6 +231,7 @@ if __name__ == "__main__":
        "mistral": ("olmocr.bench.runners.run_mistral", "run_mistral"),
        "docling": ("olmocr.bench.runners.run_docling", "run_docling"),
        "rolmocr": ("olmocr.bench.runners.run_rolmocr", "run_rolmocr"),
+        "paddlepaddle": ("olmocr.bench.runners.run_paddlepaddle", "run_paddlepaddle"),
        "transformers": ("olmocr.bench.runners.run_transformers", "run_transformers"),
        "server": ("olmocr.bench.runners.run_server", "run_server"),
    }
--- a/olmocr/bench/runners/run_paddlepaddle.py
+++ b/olmocr/bench/runners/run_paddlepaddle.py
@ -0,0 +1,32 @@
+import json
+import os
+from typing import Literal
+
+from openai import OpenAI
+
+from paddleocr import PPStructureV3
+
+
+# Run's paddle paddle as in the docs here: https://huggingface.co/PaddlePaddle/PP-OCRv5_server_det
+#  text_detection_model_name="PP-OCRv5_server_det",
+# and using the PP-StructureV3 pipeline to create markdown
+
+
+def run_paddlepaddle(
+    pdf_path: str,
+    page_num: int = 1,
+    **kwargs
+) -> str:
+    pipeline = PPStructureV3(
+        text_detection_model_name="PP-OCRv5_server_det",
+        use_doc_orientation_classify=False, # Use use_doc_orientation_classify to enable/disable document orientation classification model
+        use_doc_unwarping=False,    # Use use_doc_unwarping to enable/disable document unwarping module
+        use_textline_orientation=False, # Use use_textline_orientation to enable/disable textline orientation classification model
+        device="gpu:0", # Use device to specify GPU for model inference
+        )
+    output = pipeline.predict(pdf_path)
+    for cur_page_0_indexed, res in enumerate(output):
+        if cur_page_0_indexed == page_num - 1:
+            return res.markdown
+    
+    raise ValueError(f"Did not get markdown for page {page_num}")