mirror of
https://github.com/allenai/olmocr.git
synced 2026-01-06 12:22:55 +00:00
82 lines
2.2 KiB
YAML
82 lines
2.2 KiB
YAML
model:
|
|
name_or_path: Qwen/Qwen2-VL-7B-Instruct
|
|
arch: causal
|
|
use_flash_attn: true
|
|
|
|
wandb:
|
|
project: pdelfin
|
|
entity: ai2-llm
|
|
|
|
generate:
|
|
max_length: 8192
|
|
|
|
train_data:
|
|
seed: 1337
|
|
sources:
|
|
- name: openai_batch_data_v5_1_train
|
|
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_train_done/*.json
|
|
target_longest_image_dim: 1024
|
|
target_anchor_text_len: 6000
|
|
- name: openai_batch_data_v5_1_iabooks_train
|
|
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_train_done/*.json
|
|
target_longest_image_dim: 1024
|
|
target_anchor_text_len: 6000
|
|
|
|
valid_data:
|
|
metric_for_best_model: openai_batch_data_v5_1_eval_loss
|
|
sources:
|
|
# These tend to be small, so you can load from s3 it's no big deal
|
|
- name: openai_batch_data_v5_1_eval
|
|
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_eval/*.json
|
|
target_longest_image_dim: 1024
|
|
target_anchor_text_len: 6000
|
|
- name: openai_batch_data_v5_1_iabooks_eval
|
|
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_iabooks_eval/*.json
|
|
target_longest_image_dim: 1024
|
|
target_anchor_text_len: 6000
|
|
|
|
|
|
|
|
# Mostly pulled from https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.sh
|
|
hparams:
|
|
batch_size: 1
|
|
eval_batch_size: 1
|
|
gradient_accumulation_steps: 4
|
|
gradient_checkpointing: false
|
|
clip_grad_norm: 1.0
|
|
learning_rate: 1e-4
|
|
max_steps: 10000
|
|
pad_multiple_of: 16
|
|
log_every_steps: 10
|
|
eval_every_steps: 100
|
|
optim: adamw_torch
|
|
lr_scheduler: cosine
|
|
weight_decay: 0.01
|
|
warmup_ratio: 0.03
|
|
|
|
# From https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.py
|
|
lora:
|
|
rank: 32
|
|
alpha: 32
|
|
dropout: 0.05
|
|
task_type: causal_lm
|
|
target_modules:
|
|
- q_proj
|
|
- k_proj
|
|
- v_proj
|
|
- o_proj
|
|
- gate_proj
|
|
- up_proj
|
|
- down_proj
|
|
- visual.blocks.[0-9]+.attn.qkv
|
|
- visual.blocks.[0-9]+.attn.proj
|
|
- visual.blocks.[0-9]+.mlp.fc1
|
|
- visual.blocks.[0-9]+.mlp.fc2
|
|
- visual.merger.mlp.0
|
|
- visual.merger.mlp.2
|
|
|
|
save:
|
|
path: s3://ai2-oe-data/jakep/experiments/qwen2vl-pdf/v1/models/
|
|
save_every_steps: 1000
|
|
|
|
max_workers: 10 |