More configs

This commit is contained in:
Jake Poznanski 2025-01-23 23:39:56 +00:00
parent 858b49656f
commit dabecd9ef0

View File

@ -0,0 +1,88 @@
model:
name_or_path: allenai/Molmo-7B-O-0924
arch: causal
use_flash_attn: true
wandb:
project: pdelfin
entity: ai2-llm
generate:
max_length: 8192
train_data:
seed: 1337
cache_location: /data/jakep/pdfdata/pdelfin_cache
sources:
- name: openai_batch_data_v5_1_train
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_train_done/*.json
target_longest_image_dim: [1024]
target_anchor_text_len: [6000]
- name: openai_batch_data_v5_1_iabooks_train
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_train_done/*.json
target_longest_image_dim: [1024]
target_anchor_text_len: [6000]
valid_data:
cache_location: /data/jakep/pdfdata/pdelfin_cache
metric_for_best_model: openai_batch_data_v5_1_eval_loss
sources:
# These tend to be small, so you can load from s3 it's no big deal
- name: openai_batch_data_v5_1_eval
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_eval/*.json
target_longest_image_dim: [1024]
target_anchor_text_len: [6000]
- name: openai_batch_data_v5_1_eval
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_eval/*.json
target_longest_image_dim: [1024]
target_anchor_text_len: [6000]
# Mostly pulled from https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.sh
hparams:
batch_size: 1
eval_batch_size: 1
gradient_accumulation_steps: 4
gradient_checkpointing: true
clip_grad_norm: 1.0
learning_rate: 1e-4
max_steps: 10000
pad_multiple_of: 16
log_every_steps: 10
eval_every_steps: 100
optim: adamw_torch
lr_scheduler: cosine
weight_decay: 0.01
warmup_ratio: 0.03
# From https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.py
lora:
rank: 32
alpha: 32
dropout: 0.05
task_type: CAUSAL_LM
target_modules:
# attention layers in main transformer
- att_proj
- ff_proj
- attn_out
- ff_out
# vision transformer attention and FF
- attention.wq
- attention.wk
- attention.wv
- attention.wo
- feed_forward.w1
- feed_forward.w2
# vision image projector
- vision_backbone.image_projector.w1
- vision_backbone.image_projector.w2
- vision_backbone.image_projector.w3
save:
path: s3://ai2-oe-data/jakep/experiments/molmo-o-0924/v1/models/
save_every_steps: 1000
max_workers: 10