mirror of
https://github.com/allenai/olmocr.git
synced 2025-10-12 00:32:45 +00:00
LRs
This commit is contained in:
parent
5d7245f887
commit
acf8bff554
106
olmocr/train/configs/qwen25_vl_olmocrv2_1epoch_muon2e-3.yaml
Normal file
106
olmocr/train/configs/qwen25_vl_olmocrv2_1epoch_muon2e-3.yaml
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
# Example OlmOCR Training Configuration
|
||||||
|
|
||||||
|
# Project metadata
|
||||||
|
project_name: olmocr-qwen-vl-training
|
||||||
|
run_name: qwen2.5-vl-7b-olmocrv2_1epoch_muon_2e-3
|
||||||
|
|
||||||
|
# Model configuration
|
||||||
|
model:
|
||||||
|
name: Qwen/Qwen2.5-VL-7B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
torch_dtype: bfloat16
|
||||||
|
use_flash_attention: true
|
||||||
|
attn_implementation: flash_attention_2
|
||||||
|
|
||||||
|
# LoRA settings (disabled by default)
|
||||||
|
use_lora: false
|
||||||
|
# lora_rank: 8
|
||||||
|
# lora_alpha: 32
|
||||||
|
# lora_dropout: 0.1
|
||||||
|
# lora_target_modules:
|
||||||
|
# - q_proj
|
||||||
|
# - v_proj
|
||||||
|
# - k_proj
|
||||||
|
# - o_proj
|
||||||
|
|
||||||
|
# Dataset configuration
|
||||||
|
dataset:
|
||||||
|
|
||||||
|
train:
|
||||||
|
- name: processed_01_books_train_iabooks
|
||||||
|
root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_01_books_train_iabooks/
|
||||||
|
pipeline: &basic_pipeline
|
||||||
|
- name: FrontMatterParser
|
||||||
|
front_matter_class: PageResponse
|
||||||
|
- name: PDFRenderer
|
||||||
|
target_longest_image_dim: 1288
|
||||||
|
- name: NewYamlFinetuningPromptWithNoAnchoring
|
||||||
|
- name: FrontMatterOutputFormat
|
||||||
|
- name: InstructUserMessages
|
||||||
|
- name: Tokenizer
|
||||||
|
masking_index: -100
|
||||||
|
end_of_message_token: "<|im_end|>"
|
||||||
|
- name: processed_00_documents_train_s2pdf
|
||||||
|
root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_00_documents_train_s2pdf/
|
||||||
|
pipeline: *basic_pipeline
|
||||||
|
|
||||||
|
eval:
|
||||||
|
- name: processed_00_documents_eval_s2pdf
|
||||||
|
root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_00_documents_eval_s2pdf/
|
||||||
|
pipeline: *basic_pipeline
|
||||||
|
- name: processed_01_books_eval_iabooks
|
||||||
|
root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_01_books_eval_iabooks/
|
||||||
|
pipeline: *basic_pipeline
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Training configuration
|
||||||
|
training:
|
||||||
|
output_dir: /weka/oe-data-default/jakep/olmocr-trainer/
|
||||||
|
num_train_epochs: 1
|
||||||
|
|
||||||
|
# Batch size and accumulation
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 32
|
||||||
|
|
||||||
|
gradient_checkpointing: False
|
||||||
|
|
||||||
|
collator_max_token_len: 8192
|
||||||
|
|
||||||
|
# Learning rate
|
||||||
|
learning_rate: 2e-3
|
||||||
|
lr_scheduler_type: linear
|
||||||
|
warmup_ratio: 0.1
|
||||||
|
|
||||||
|
# Optimization
|
||||||
|
optim: muon
|
||||||
|
weight_decay: 0.01
|
||||||
|
max_grad_norm: 1.0
|
||||||
|
|
||||||
|
# Muon optimizer specific settings
|
||||||
|
muon_momentum: 0.95
|
||||||
|
muon_lr_multiplier_head: 4.4 # 0.22 / 0.05 = 4.4
|
||||||
|
muon_lr_multiplier_embed: 12.0 # 0.6 / 0.05 = 12.0
|
||||||
|
muon_lr_multiplier_scalar: 0.8 # 0.04 / 0.05 = 0.8
|
||||||
|
|
||||||
|
# Adam parameters for non-muon groups
|
||||||
|
adam_beta1: 0.8
|
||||||
|
adam_beta2: 0.95
|
||||||
|
adam_epsilon: 1e-10
|
||||||
|
|
||||||
|
seed: 300
|
||||||
|
data_seed: 301
|
||||||
|
|
||||||
|
# Evaluation and checkpointing
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
|
save_strategy: steps
|
||||||
|
save_steps: 500
|
||||||
|
save_total_limit: 5
|
||||||
|
load_best_model_at_end: false # Needs to be false because it has a problem restoring checkpoints for some reason
|
||||||
|
metric_for_best_model: eval_processed_00_documents_eval_s2pdf_loss
|
||||||
|
greater_is_better: false
|
||||||
|
|
||||||
|
report_to:
|
||||||
|
- wandb
|
106
olmocr/train/configs/qwen25_vl_olmocrv2_1epoch_muon2e-4.yaml
Normal file
106
olmocr/train/configs/qwen25_vl_olmocrv2_1epoch_muon2e-4.yaml
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
# Example OlmOCR Training Configuration
|
||||||
|
|
||||||
|
# Project metadata
|
||||||
|
project_name: olmocr-qwen-vl-training
|
||||||
|
run_name: qwen2.5-vl-7b-olmocrv2_1epoch_muon_2e-4
|
||||||
|
|
||||||
|
# Model configuration
|
||||||
|
model:
|
||||||
|
name: Qwen/Qwen2.5-VL-7B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
torch_dtype: bfloat16
|
||||||
|
use_flash_attention: true
|
||||||
|
attn_implementation: flash_attention_2
|
||||||
|
|
||||||
|
# LoRA settings (disabled by default)
|
||||||
|
use_lora: false
|
||||||
|
# lora_rank: 8
|
||||||
|
# lora_alpha: 32
|
||||||
|
# lora_dropout: 0.1
|
||||||
|
# lora_target_modules:
|
||||||
|
# - q_proj
|
||||||
|
# - v_proj
|
||||||
|
# - k_proj
|
||||||
|
# - o_proj
|
||||||
|
|
||||||
|
# Dataset configuration
|
||||||
|
dataset:
|
||||||
|
|
||||||
|
train:
|
||||||
|
- name: processed_01_books_train_iabooks
|
||||||
|
root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_01_books_train_iabooks/
|
||||||
|
pipeline: &basic_pipeline
|
||||||
|
- name: FrontMatterParser
|
||||||
|
front_matter_class: PageResponse
|
||||||
|
- name: PDFRenderer
|
||||||
|
target_longest_image_dim: 1288
|
||||||
|
- name: NewYamlFinetuningPromptWithNoAnchoring
|
||||||
|
- name: FrontMatterOutputFormat
|
||||||
|
- name: InstructUserMessages
|
||||||
|
- name: Tokenizer
|
||||||
|
masking_index: -100
|
||||||
|
end_of_message_token: "<|im_end|>"
|
||||||
|
- name: processed_00_documents_train_s2pdf
|
||||||
|
root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_00_documents_train_s2pdf/
|
||||||
|
pipeline: *basic_pipeline
|
||||||
|
|
||||||
|
eval:
|
||||||
|
- name: processed_00_documents_eval_s2pdf
|
||||||
|
root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_00_documents_eval_s2pdf/
|
||||||
|
pipeline: *basic_pipeline
|
||||||
|
- name: processed_01_books_eval_iabooks
|
||||||
|
root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_01_books_eval_iabooks/
|
||||||
|
pipeline: *basic_pipeline
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Training configuration
|
||||||
|
training:
|
||||||
|
output_dir: /weka/oe-data-default/jakep/olmocr-trainer/
|
||||||
|
num_train_epochs: 1
|
||||||
|
|
||||||
|
# Batch size and accumulation
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 32
|
||||||
|
|
||||||
|
gradient_checkpointing: False
|
||||||
|
|
||||||
|
collator_max_token_len: 8192
|
||||||
|
|
||||||
|
# Learning rate
|
||||||
|
learning_rate: 2e-4
|
||||||
|
lr_scheduler_type: linear
|
||||||
|
warmup_ratio: 0.1
|
||||||
|
|
||||||
|
# Optimization
|
||||||
|
optim: muon
|
||||||
|
weight_decay: 0.01
|
||||||
|
max_grad_norm: 1.0
|
||||||
|
|
||||||
|
# Muon optimizer specific settings
|
||||||
|
muon_momentum: 0.95
|
||||||
|
muon_lr_multiplier_head: 4.4 # 0.22 / 0.05 = 4.4
|
||||||
|
muon_lr_multiplier_embed: 12.0 # 0.6 / 0.05 = 12.0
|
||||||
|
muon_lr_multiplier_scalar: 0.8 # 0.04 / 0.05 = 0.8
|
||||||
|
|
||||||
|
# Adam parameters for non-muon groups
|
||||||
|
adam_beta1: 0.8
|
||||||
|
adam_beta2: 0.95
|
||||||
|
adam_epsilon: 1e-10
|
||||||
|
|
||||||
|
seed: 300
|
||||||
|
data_seed: 301
|
||||||
|
|
||||||
|
# Evaluation and checkpointing
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
|
save_strategy: steps
|
||||||
|
save_steps: 500
|
||||||
|
save_total_limit: 5
|
||||||
|
load_best_model_at_end: false # Needs to be false because it has a problem restoring checkpoints for some reason
|
||||||
|
metric_for_best_model: eval_processed_00_documents_eval_s2pdf_loss
|
||||||
|
greater_is_better: false
|
||||||
|
|
||||||
|
report_to:
|
||||||
|
- wandb
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
# Project metadata
|
# Project metadata
|
||||||
project_name: olmocr-qwen-vl-training
|
project_name: olmocr-qwen-vl-training
|
||||||
run_name: qwen2.5-vl-7b-olmocrv2_1epoch_muon
|
run_name: qwen2.5-vl-7b-olmocrv2_1epoch_muon_2e-5
|
||||||
|
|
||||||
# Model configuration
|
# Model configuration
|
||||||
model:
|
model:
|
Loading…
x
Reference in New Issue
Block a user