mirror of
https://github.com/allenai/olmocr.git
synced 2025-07-30 20:41:29 +00:00
Adding lora config to try to address OOMs
This commit is contained in:
parent
ea0226c499
commit
07c0323c91
84
pdelfin/train/config/qwen2vl-2b-lora.yaml
Normal file
84
pdelfin/train/config/qwen2vl-2b-lora.yaml
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
model:
|
||||||
|
name_or_path: Qwen/Qwen2-VL-2B-Instruct
|
||||||
|
arch: causal
|
||||||
|
use_flash_attn: true
|
||||||
|
|
||||||
|
wandb:
|
||||||
|
project: pdelfin
|
||||||
|
entity: ai2-llm
|
||||||
|
|
||||||
|
# TODO This is not used
|
||||||
|
format:
|
||||||
|
instruction_template: "Original:"
|
||||||
|
response_template: "Rewritten:"
|
||||||
|
# Template from here: https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.py#L30
|
||||||
|
chat_template: |
|
||||||
|
{% for message in messages %}
|
||||||
|
{{'<|im_start|>' + message['role'] + '\n' + message['content']}}
|
||||||
|
{% if loop.last %}
|
||||||
|
{{ '<|im_end|>'}}
|
||||||
|
{% else %}
|
||||||
|
{{ '<|im_end|>\n' }}
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
generate:
|
||||||
|
max_length: 4096
|
||||||
|
|
||||||
|
train_data:
|
||||||
|
seed: 1337
|
||||||
|
sources:
|
||||||
|
- name: openai_batch_data_v2
|
||||||
|
query_glob_path: s3://ai2-oe-data/jakep/openai_batch_data_v2/*.jsonl
|
||||||
|
response_glob_path: s3://ai2-oe-data/jakep/openai_batch_done_v2/*.json
|
||||||
|
backend:
|
||||||
|
- openai
|
||||||
|
size: 100_000
|
||||||
|
|
||||||
|
valid_data:
|
||||||
|
sources:
|
||||||
|
- name: openai_batch_data_eval_mini
|
||||||
|
query_glob_path: s3://ai2-oe-data/jakep/openai_batch_data_eval_mini/*.jsonl
|
||||||
|
response_glob_path: s3://ai2-oe-data/jakep/openai_batch_done_eval_mini/*.json
|
||||||
|
backend:
|
||||||
|
- openai
|
||||||
|
size: 100_000
|
||||||
|
|
||||||
|
# Mostly pulled from https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.sh
|
||||||
|
hparams:
|
||||||
|
batch_size: 1
|
||||||
|
eval_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 4
|
||||||
|
gradient_checkpointing: false
|
||||||
|
clip_grad_norm: 1.0
|
||||||
|
learning_rate: 3e-4
|
||||||
|
max_steps: 2000
|
||||||
|
pad_multiple_of: 16
|
||||||
|
log_every_steps: 50
|
||||||
|
eval_every_steps: 1000
|
||||||
|
optim: adamw_torch
|
||||||
|
lr_scheduler: cosine
|
||||||
|
weight_decay: 0.01
|
||||||
|
warmup_ratio: 0.03
|
||||||
|
|
||||||
|
# From https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.py
|
||||||
|
lora:
|
||||||
|
rank: 32
|
||||||
|
alpha: 32
|
||||||
|
dropout: 0.05
|
||||||
|
task_type: causal_lm
|
||||||
|
target_modules:
|
||||||
|
- q_proj
|
||||||
|
- k_proj
|
||||||
|
- v_proj
|
||||||
|
- o_proj
|
||||||
|
- gate_proj
|
||||||
|
- up_proj
|
||||||
|
- down_proj
|
||||||
|
- proj # For the vision net
|
||||||
|
|
||||||
|
save:
|
||||||
|
path: s3://ai2-oe-data/jakep/experiments/qwen2vl-pdf/v1/models/
|
||||||
|
save_every_steps: 500
|
||||||
|
|
||||||
|
max_workers: 10
|
Loading…
x
Reference in New Issue
Block a user