mirror of
https://github.com/allenai/olmocr.git
synced 2025-11-03 03:25:22 +00:00
Setting pipeline touse new prompt too
This commit is contained in:
parent
f2db62b0f8
commit
798335c88e
@ -37,7 +37,7 @@ from olmocr.data.renderpdf import render_pdf_to_base64png
|
|||||||
from olmocr.filter.filter import Language, PdfFilter
|
from olmocr.filter.filter import Language, PdfFilter
|
||||||
from olmocr.image_utils import convert_image_to_pdf_bytes, is_jpeg, is_png
|
from olmocr.image_utils import convert_image_to_pdf_bytes, is_jpeg, is_png
|
||||||
from olmocr.metrics import MetricsKeeper, WorkerTracker
|
from olmocr.metrics import MetricsKeeper, WorkerTracker
|
||||||
from olmocr.prompts import PageResponse, build_no_anchoring_yaml_prompt
|
from olmocr.prompts import PageResponse, build_no_anchoring_v4_yaml_prompt
|
||||||
from olmocr.prompts.anchor import get_anchor_text
|
from olmocr.prompts.anchor import get_anchor_text
|
||||||
from olmocr.s3_utils import (
|
from olmocr.s3_utils import (
|
||||||
download_directory,
|
download_directory,
|
||||||
@ -137,7 +137,7 @@ async def build_page_query(local_pdf_path: str, page: int, target_longest_image_
|
|||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "text", "text": build_no_anchoring_yaml_prompt()},
|
{"type": "text", "text": build_no_anchoring_v4_yaml_prompt()},
|
||||||
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
|
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user