From ce86aff80a7af47c2940859a5931cf69c593b4d9 Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Wed, 20 Aug 2025 16:23:33 +0000 Subject: [PATCH] Refreshing the claude sonnet synth miner --- .gitignore | 1 + olmocr/bench/synth/mine_html_templates.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 95bb19e..8f9d32b 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ filtered_items/ filtered_items_prefilter/ augraphy_cache/ /*.html +html_templates*/ scoreelo.csv debug.log birrpipeline-debug.log diff --git a/olmocr/bench/synth/mine_html_templates.py b/olmocr/bench/synth/mine_html_templates.py index 5c464cc..b5ebdb8 100644 --- a/olmocr/bench/synth/mine_html_templates.py +++ b/olmocr/bench/synth/mine_html_templates.py @@ -67,7 +67,7 @@ def generate_html_from_image(client, image_base64): try: # Step 1: Initial analysis and column detection analysis_response = client.messages.create( - model="claude-3-7-sonnet-20250219", + model="claude-sonnet-4-20250514", max_tokens=2000, temperature=0.1, messages=[ @@ -96,7 +96,7 @@ def generate_html_from_image(client, image_base64): # Step 2: Initial HTML generation with detailed layout instructions initial_response = client.messages.create( - model="claude-3-7-sonnet-20250219", + model="claude-sonnet-4-20250514", max_tokens=6000, temperature=0.2, messages=[