From 2f74a2a9962817954753df6d0cee2d1c8c46db85 Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Tue, 8 Apr 2025 13:25:15 -0700 Subject: [PATCH] Prompt6 for qwen2.7 vl --- olmocr/bench/prompts.py | 3 +-- olmocr/bench/scripts/convert_all.sh | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/olmocr/bench/prompts.py b/olmocr/bench/prompts.py index 728909c..a3dedbd 100644 --- a/olmocr/bench/prompts.py +++ b/olmocr/bench/prompts.py @@ -1,6 +1,5 @@ def build_basic_prompt() -> str: - return "Just return the plain text representation of this document as if you were reading it naturally. Use markdown for formatting. Convert math to Latex using \( and \) as delimiters for inline math, and \[ and \] as delimiters otherwise. Convert all tables to markdown." - + return "Please provide a natural, plain text representation of the document, formatted in Markdown. For mathematical expressions, use LaTeX notation with \( and \) for inline equations and \[ and \] for display equations. Convert any tables into Markdown format." def claude_response_format_schema() -> dict: return ( diff --git a/olmocr/bench/scripts/convert_all.sh b/olmocr/bench/scripts/convert_all.sh index d67421c..40d5e25 100755 --- a/olmocr/bench/scripts/convert_all.sh +++ b/olmocr/bench/scripts/convert_all.sh @@ -260,7 +260,7 @@ pip install --upgrade vllm==0.8.3 start_server vllm "Qwen/Qwen2.5-VL-7B-Instruct" --max-model-len 8192 -python -m olmocr.bench.convert --dir "$BENCH_DIR" server:name=qwen25vl_prompt4:model=Qwen/Qwen2.5-VL-7B-Instruct:temperature=0.1:prompt_template=basic:response_template=plain --parallel 50 +python -m olmocr.bench.convert --dir "$BENCH_DIR" server:name=qwen25vl_prompt6:model=Qwen/Qwen2.5-VL-7B-Instruct:temperature=0.1:prompt_template=basic:response_template=plain --parallel 50 stop_server start_server vllm "reducto/RolmOCR" --max-model-len 8192