Lint fixes, adjusting qwen2.5 vl prompt

2025-12-25 06:06:23 +00:00 · 2025-04-07 21:19:36 -07:00 · 2025-04-07 21:19:36 -07:00 · 474e0ef6ed
commit 474e0ef6ed
parent aa5cb95169
5 changed files with 2 additions and 7 deletions
--- a/olmocr/bench/prompts.py
+++ b/olmocr/bench/prompts.py
@ -1,5 +1,5 @@
 def build_basic_prompt() -> str:
-    return "Just return the markdown representation of this document as if you were reading it naturally. Convert equations to markdown using \( \) for inline math, and \[ \] otherwise."
+    return "Just return the plain text representation of this document as if you were reading it naturally. Convert equations to LaTeX using \( \) for inline math, and \[ \] otherwise. Convert all tables to markdown."


 def claude_response_format_schema() -> dict:
--- a/olmocr/bench/runners/run_rolmocr.py
+++ b/olmocr/bench/runners/run_rolmocr.py
@ -1,5 +1,3 @@
-import json
-from typing import Literal

 import httpx

--- a/olmocr/bench/runners/run_server.py
+++ b/olmocr/bench/runners/run_server.py
@ -45,8 +45,6 @@ async def run_server(
        prompt = build_finetuning_prompt(anchor_text)
    elif prompt_template == "basic":
        prompt = build_basic_prompt()
-    elif prompt_template == "rolmocr":
-        prompt = build_rolmocr_prompt()
    else:
        raise ValueError("Unknown prompt template")

--- a/olmocr/bench/scripts/convert_all.sh
+++ b/olmocr/bench/scripts/convert_all.sh
@ -260,7 +260,7 @@ pip install --upgrade vllm==0.8.3


 start_server vllm "Qwen/Qwen2.5-VL-7B-Instruct" --max-model-len 8192
-python -m olmocr.bench.convert --dir "$BENCH_DIR" server:name=qwen25vl_prompt3:model=Qwen/Qwen2.5-VL-7B-Instruct:temperature=0.1:prompt_template=basic:response_template=plain --parallel 50
+python -m olmocr.bench.convert --dir "$BENCH_DIR" server:name=qwen25vl_prompt4:model=Qwen/Qwen2.5-VL-7B-Instruct:temperature=0.1:prompt_template=basic:response_template=plain --parallel 50
 stop_server

 start_server vllm "reducto/RolmOCR" --max-model-len 8192
--- a/scripts/scan_dolmadocs.py
+++ b/scripts/scan_dolmadocs.py
@ -7,7 +7,6 @@ import os
 import random
 import re
 import sqlite3
-import string
 import tempfile
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path