This commit is contained in:
Jake Poznanski 2025-04-07 14:15:32 -07:00
parent aa5837074e
commit ae4fda7429
3 changed files with 3 additions and 5 deletions

View File

@ -1,6 +1,7 @@
def build_basic_prompt() -> str:
return "Just return the markdown representation of this document as if you were reading it naturally. Convert equations to markdown using \( \) for inline math, and \[ \] otherwise."
def claude_response_format_schema() -> dict:
return (
{

View File

@ -3,6 +3,7 @@ from typing import Literal
import httpx
from olmocr.bench.prompts import build_basic_prompt, build_rolmocr_prompt
from olmocr.data.renderpdf import render_pdf_to_base64png
from olmocr.prompts.anchor import get_anchor_text
from olmocr.prompts.prompts import (
@ -11,8 +12,6 @@ from olmocr.prompts.prompts import (
build_openai_silver_data_prompt,
)
from olmocr.bench.prompts import build_basic_prompt, build_rolmocr_prompt
async def run_rolmcr(
pdf_path: str,
@ -31,7 +30,6 @@ async def run_rolmcr(
# Convert the first page of the PDF to a base64-encoded PNG image.
image_base64 = render_pdf_to_base64png(pdf_path, page_num=page_num, target_longest_image_dim=target_longest_image_dim)
request = {
"model": model,
"messages": [

View File

@ -3,6 +3,7 @@ from typing import Literal
import httpx
from olmocr.bench.prompts import build_basic_prompt
from olmocr.data.renderpdf import render_pdf_to_base64png
from olmocr.prompts.anchor import get_anchor_text
from olmocr.prompts.prompts import (
@ -11,8 +12,6 @@ from olmocr.prompts.prompts import (
build_openai_silver_data_prompt,
)
from olmocr.bench.prompts import build_basic_prompt, build_rolmocr_prompt
async def run_server(
pdf_path: str,