diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_1epoch_compile.yaml b/olmocr/train/configs/qwen25_vl_olmocrv3_1epoch.yaml similarity index 97% rename from olmocr/train/configs/qwen25_vl_olmocrv2_1epoch_compile.yaml rename to olmocr/train/configs/qwen25_vl_olmocrv3_1epoch.yaml index 226e205..a6dd168 100644 --- a/olmocr/train/configs/qwen25_vl_olmocrv2_1epoch_compile.yaml +++ b/olmocr/train/configs/qwen25_vl_olmocrv3_1epoch.yaml @@ -2,7 +2,7 @@ # Project metadata project_name: olmocr-qwen-vl-training -run_name: qwen2.5-vl-7b-olmocrv2_1epoch_compile_v2 +run_name: qwen2.5-vl-7b-olmocrv3_1epoch_prompt_first # Model configuration model: @@ -37,6 +37,7 @@ dataset: - name: NewYamlFinetuningPromptWithNoAnchoring - name: FrontMatterOutputFormat - name: InstructUserMessages + prompt_first: true - name: Tokenizer masking_index: -100 end_of_message_token: "<|im_end|>" diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_day2_capped.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day2_capped.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_day2_capped.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day2_capped.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_day3_json.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_json.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_day3_json.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_json.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_day3_json_1280.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_json_1280.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_day3_json_1280.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_json_1280.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_day3_json_1280_noanchor.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_json_1280_noanchor.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_day3_json_1280_noanchor.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_json_1280_noanchor.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_day3_json_1600.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_json_1600.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_day3_json_1600.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_json_1600.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_day3_json_lr5e-5.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_json_lr5e-5.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_day3_json_lr5e-5.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_json_lr5e-5.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_day3_json_wsd.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_json_wsd.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_day3_json_wsd.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_json_wsd.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_day3_yaml_1280_noanchor.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_yaml_1280_noanchor.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_day3_yaml_1280_noanchor.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_yaml_1280_noanchor.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_day3_yaml_1280_noanchor_128batch.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_yaml_1280_noanchor_128batch.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_day3_yaml_1280_noanchor_128batch.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_yaml_1280_noanchor_128batch.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_day3_yaml_1280_noanchor_latexnormalize.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_yaml_1280_noanchor_latexnormalize.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_day3_yaml_1280_noanchor_latexnormalize.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_yaml_1280_noanchor_latexnormalize.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_day3_yaml_1280_noanchor_newprompt.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_yaml_1280_noanchor_newprompt.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_day3_yaml_1280_noanchor_newprompt.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_day3_yaml_1280_noanchor_newprompt.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_default.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_default.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_default_fresh_prompt.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_fresh_prompt.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_default_fresh_prompt.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_fresh_prompt.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_default_fresh_prompt_no_doc_anchoring.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_fresh_prompt_no_doc_anchoring.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_default_fresh_prompt_no_doc_anchoring.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_fresh_prompt_no_doc_anchoring.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_default_grad_acc32.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_grad_acc32.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_default_grad_acc32.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_grad_acc32.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_default_image_1280.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_image_1280.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_default_image_1280.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_image_1280.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_default_image_1280_no_doc_anchor.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_image_1280_no_doc_anchor.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_default_image_1280_no_doc_anchor.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_image_1280_no_doc_anchor.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_default_image_1600.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_image_1600.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_default_image_1600.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_image_1600.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_default_image_1600_no_doc_anchor.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_image_1600_no_doc_anchor.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_default_image_1600_no_doc_anchor.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_image_1600_no_doc_anchor.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_default_json.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_json.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_default_json.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_json.yaml diff --git a/olmocr/train/configs/qwen25_vl_b100_x1_default_no_doc_anchor.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_no_doc_anchor.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_b100_x1_default_no_doc_anchor.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_b100_x1_default_no_doc_anchor.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_1288_soupfull0.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_1288_soupfull0.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_1288_soupfull0.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_1288_soupfull0.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_1288_soupfull1.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_1288_soupfull1.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_1288_soupfull1.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_1288_soupfull1.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_1288_soupfull2.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_1288_soupfull2.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_1288_soupfull2.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_1288_soupfull2.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_1epoch.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_1epoch.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_1epoch.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_1epoch.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_1epoch_muon2e-5.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_1epoch_muon2e-5.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_1epoch_muon2e-5.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_1epoch_muon2e-5.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_1epoch_muon2e-6.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_1epoch_muon2e-6.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_1epoch_muon2e-6.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_1epoch_muon2e-6.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_1epoch_muon6e-6.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_1epoch_muon6e-6.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_1epoch_muon6e-6.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_1epoch_muon6e-6.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_2epoch.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_2epoch.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_2epoch.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_2epoch.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_2epoch_promptfirst.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_2epoch_promptfirst.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_2epoch_promptfirst.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_2epoch_promptfirst.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_soup0.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_soup0.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_soup0.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_soup0.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_soup1.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_soup1.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_soup1.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_soup1.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_soup2.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_soup2.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_soup2.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_soup2.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_tokflip.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_tokflip.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_tokflip.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_tokflip.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_tokflip1k.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_tokflip1k.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_tokflip1k.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_tokflip1k.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_tokflip3k.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_tokflip3k.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_tokflip3k.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_tokflip3k.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_tokflip500.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_tokflip500.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_tokflip500.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_tokflip500.yaml diff --git a/olmocr/train/configs/qwen25_vl_olmocrv2_tokflip_2ep.yaml b/olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_tokflip_2ep.yaml similarity index 100% rename from olmocr/train/configs/qwen25_vl_olmocrv2_tokflip_2ep.yaml rename to olmocr/train/configs/v0.2.0/qwen25_vl_olmocrv2_tokflip_2ep.yaml diff --git a/olmocr/train/configs/qwen2_vl_b100_x1_day3_json.yaml b/olmocr/train/configs/v0.2.0/qwen2_vl_b100_x1_day3_json.yaml similarity index 100% rename from olmocr/train/configs/qwen2_vl_b100_x1_day3_json.yaml rename to olmocr/train/configs/v0.2.0/qwen2_vl_b100_x1_day3_json.yaml diff --git a/olmocr/train/configs/qwen2_vl_b100_x1_day3_yaml.yaml b/olmocr/train/configs/v0.2.0/qwen2_vl_b100_x1_day3_yaml.yaml similarity index 100% rename from olmocr/train/configs/qwen2_vl_b100_x1_day3_yaml.yaml rename to olmocr/train/configs/v0.2.0/qwen2_vl_b100_x1_day3_yaml.yaml