diff --git a/olmocr/pipeline.py b/olmocr/pipeline.py index a1f0315..6d50e4d 100644 --- a/olmocr/pipeline.py +++ b/olmocr/pipeline.py @@ -129,8 +129,8 @@ async def build_page_query(local_pdf_path: str, page: int, target_longest_image_ { "role": "user", "content": [ - {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}}, {"type": "text", "text": build_no_anchoring_yaml_prompt()}, + {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}}, ], } ], diff --git a/olmocr/train/dataloader.py b/olmocr/train/dataloader.py index 502c8bc..1387a70 100644 --- a/olmocr/train/dataloader.py +++ b/olmocr/train/dataloader.py @@ -417,8 +417,8 @@ class InstructUserMessages(PipelineStep): messages = { "role": "user", "content": [ - {"type": "image", "image": sample["image"]}, {"type": "text", "text": sample["instruction_prompt"]}, + {"type": "image", "image": sample["image"]}, ], }