Trying out some new prompts

This commit is contained in:
Jake Poznanski 2025-08-14 17:44:56 +00:00
parent 7a36c98e26
commit 1f50a6b6bd
2 changed files with 41 additions and 4 deletions

View File

@ -8,14 +8,16 @@ from olmocr.bench.prompts import (
build_basic_prompt, build_basic_prompt,
build_openai_silver_data_prompt_no_document_anchoring, build_openai_silver_data_prompt_no_document_anchoring,
) )
from olmocr.data.renderpdf import render_pdf_to_base64png from olmocr.data.renderpdf import render_pdf_to_base64png, get_png_dimensions_from_base64
from olmocr.prompts.anchor import get_anchor_text from olmocr.prompts.anchor import get_anchor_text
from olmocr.prompts.prompts import ( from olmocr.prompts.prompts import (
PageResponse, PageResponse,
build_finetuning_prompt, build_finetuning_prompt,
build_openai_silver_data_prompt, build_openai_silver_data_prompt,
openai_response_format_schema, openai_response_format_schema,
build_openai_silver_data_prompt_v2 build_openai_silver_data_prompt_v2,
build_openai_silver_data_prompt_v2_simple,
build_openai_silver_data_prompt_v3_simple,
) )
@ -25,7 +27,7 @@ def run_chatgpt(
model: str = "gpt-4o-2024-08-06", model: str = "gpt-4o-2024-08-06",
temperature: float = 0.1, temperature: float = 0.1,
target_longest_image_dim: int = 2048, target_longest_image_dim: int = 2048,
prompt_template: Literal["full", "full_no_document_anchoring", "basic", "finetune", "fullv2"] = "finetune", prompt_template: Literal["full", "full_no_document_anchoring", "basic", "finetune", "fullv2", "fullv2simple", "fullv3simple"] = "finetune",
response_template: Literal["plain", "json"] = "json", response_template: Literal["plain", "json"] = "json",
) -> str: ) -> str:
""" """
@ -58,6 +60,12 @@ def run_chatgpt(
prompt = build_basic_prompt() prompt = build_basic_prompt()
elif prompt_template == "fullv2": elif prompt_template == "fullv2":
prompt = build_openai_silver_data_prompt_v2(anchor_text) prompt = build_openai_silver_data_prompt_v2(anchor_text)
elif prompt_template == "fullv2simple":
width, height = get_png_dimensions_from_base64(image_base64)
prompt = build_openai_silver_data_prompt_v2_simple(width, height)
elif prompt_template == "fullv3simple":
width, height = get_png_dimensions_from_base64(image_base64)
prompt = build_openai_silver_data_prompt_v3_simple(width, height)
else: else:
raise ValueError("Unknown prompt template") raise ValueError("Unknown prompt template")
@ -74,7 +82,7 @@ def run_chatgpt(
], ],
temperature=temperature, temperature=temperature,
max_completion_tokens=20000, max_completion_tokens=20000,
reasoning_effort="high", #reasoning_effort="high",
response_format=openai_response_format_schema() if response_template == "json" else None, response_format=openai_response_format_schema() if response_template == "json" else None,
safety_identifier="olmocr-bench-runner", safety_identifier="olmocr-bench-runner",
) )

View File

@ -30,6 +30,35 @@ def build_openai_silver_data_prompt_v2(base_text: str) -> str:
f"RAW_TEXT_START\n{base_text}\nRAW_TEXT_END" f"RAW_TEXT_START\n{base_text}\nRAW_TEXT_END"
) )
def build_openai_silver_data_prompt_v2_simple(page_width: int, page_height: int) -> str:
return (
f"Attached is the image of one page of a PDF document."
f"Just return the plain text representation of this document as if you were reading it naturally.\n"
f"Turn equations and math symbols into a LaTeX representation, make sure to use \\( and \\) as a delimiter for inline math, and \\[ and \\] for block math. Always prefer LaTeX syntax instead of using unicode math symbols.\n"
f"Convert tables into HTML format. Remove the headers and footers, but keep references and footnotes.\n"
f"Read any natural handwriting.\n"
f"If there are any figures or charts, label them with the following markdown syntax ![Alt text describing the contents of the figure](page_startx_starty_width_height.png)"
f"This is likely one page out of several in the document, so be sure to preserve any sentences that come from the previous page, or continue onto the next page, exactly as they are.\n"
f"If there is no text at all that you think you should read, you can output null.\n"
f"Do not hallucinate.\n"
f"Page width: {page_width}, Page height: {page_height}"
)
def build_openai_silver_data_prompt_v3_simple(page_width: int, page_height: int) -> str:
return (
f"Attached is the image of one page of a PDF document."
f"Just return the plain text representation of this document as if you were reading it naturally.\n"
f"Turn equations and math symbols into a LaTeX representation, make sure to use \\( and \\) as a delimiter for inline math, and \\[ and \\] for block math. Do NOT use ascii or unicode math symbols such as ∈ ∉ ⊂ ⊃ ⊆ ⊇ ∅ ∩ ∀ ∃ ¬, just use LaTeX syntax, ex \( \in \\notin \subset \supset \subseteq \) etc. \n"
f"Convert tables into HTML format. Keep the syntax simple, but use <th> for header rows, and use rowspan and colspans appropriately. \n"
f"Remove the headers and footers, but keep references and footnotes.\n"
f"Read any natural handwriting.\n"
f"If there are any figures or charts, label them with the following markdown syntax ![Alt text describing the contents of the figure](page_startx_starty_width_height.png)"
f"This is likely one page out of several in the document, so be sure to preserve any sentences that come from the previous page, or continue onto the next page, exactly as they are.\n"
f"If there is no text at all that you think you should read, you can output null.\n"
f"Do not hallucinate.\n"
f"Page width: {page_width}, Page height: {page_height}"
)
@dataclass(frozen=True) @dataclass(frozen=True)