diff --git a/olmocr/bench/convert.py b/olmocr/bench/convert.py index e6d81d0..e75eaff 100644 --- a/olmocr/bench/convert.py +++ b/olmocr/bench/convert.py @@ -231,6 +231,7 @@ if __name__ == "__main__": "mistral": ("olmocr.bench.runners.run_mistral", "run_mistral"), "docling": ("olmocr.bench.runners.run_docling", "run_docling"), "rolmocr": ("olmocr.bench.runners.run_rolmocr", "run_rolmocr"), + "paddlepaddle": ("olmocr.bench.runners.run_paddlepaddle", "run_paddlepaddle"), "transformers": ("olmocr.bench.runners.run_transformers", "run_transformers"), "server": ("olmocr.bench.runners.run_server", "run_server"), } diff --git a/olmocr/bench/runners/run_paddlepaddle.py b/olmocr/bench/runners/run_paddlepaddle.py new file mode 100644 index 0000000..047ec99 --- /dev/null +++ b/olmocr/bench/runners/run_paddlepaddle.py @@ -0,0 +1,32 @@ +import json +import os +from typing import Literal + +from openai import OpenAI + +from paddleocr import PPStructureV3 + + +# Run's paddle paddle as in the docs here: https://huggingface.co/PaddlePaddle/PP-OCRv5_server_det +# text_detection_model_name="PP-OCRv5_server_det", +# and using the PP-StructureV3 pipeline to create markdown + + +def run_paddlepaddle( + pdf_path: str, + page_num: int = 1, + **kwargs +) -> str: + pipeline = PPStructureV3( + text_detection_model_name="PP-OCRv5_server_det", + use_doc_orientation_classify=False, # Use use_doc_orientation_classify to enable/disable document orientation classification model + use_doc_unwarping=False, # Use use_doc_unwarping to enable/disable document unwarping module + use_textline_orientation=False, # Use use_textline_orientation to enable/disable textline orientation classification model + device="gpu:0", # Use device to specify GPU for model inference + ) + output = pipeline.predict(pdf_path) + for cur_page_0_indexed, res in enumerate(output): + if cur_page_0_indexed == page_num - 1: + return res.markdown + + raise ValueError(f"Did not get markdown for page {page_num}")