diff --git a/olmocr/bench/prompts.py b/olmocr/bench/prompts.py index 728909c..a3dedbd 100644 --- a/olmocr/bench/prompts.py +++ b/olmocr/bench/prompts.py @@ -1,6 +1,5 @@ def build_basic_prompt() -> str: - return "Just return the plain text representation of this document as if you were reading it naturally. Use markdown for formatting. Convert math to Latex using \( and \) as delimiters for inline math, and \[ and \] as delimiters otherwise. Convert all tables to markdown." - + return "Please provide a natural, plain text representation of the document, formatted in Markdown. For mathematical expressions, use LaTeX notation with \( and \) for inline equations and \[ and \] for display equations. Convert any tables into Markdown format." def claude_response_format_schema() -> dict: return ( diff --git a/olmocr/bench/scripts/convert_all.sh b/olmocr/bench/scripts/convert_all.sh index d67421c..40d5e25 100755 --- a/olmocr/bench/scripts/convert_all.sh +++ b/olmocr/bench/scripts/convert_all.sh @@ -260,7 +260,7 @@ pip install --upgrade vllm==0.8.3 start_server vllm "Qwen/Qwen2.5-VL-7B-Instruct" --max-model-len 8192 -python -m olmocr.bench.convert --dir "$BENCH_DIR" server:name=qwen25vl_prompt4:model=Qwen/Qwen2.5-VL-7B-Instruct:temperature=0.1:prompt_template=basic:response_template=plain --parallel 50 +python -m olmocr.bench.convert --dir "$BENCH_DIR" server:name=qwen25vl_prompt6:model=Qwen/Qwen2.5-VL-7B-Instruct:temperature=0.1:prompt_template=basic:response_template=plain --parallel 50 stop_server start_server vllm "reducto/RolmOCR" --max-model-len 8192