From 07c0323c91b68928e9894e0d97feb9c0af8b71b4 Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Wed, 25 Sep 2024 07:57:01 -0700 Subject: [PATCH] Adding lora config to try to address OOMs --- pdelfin/train/config/qwen2vl-2b-lora.yaml | 84 +++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 pdelfin/train/config/qwen2vl-2b-lora.yaml diff --git a/pdelfin/train/config/qwen2vl-2b-lora.yaml b/pdelfin/train/config/qwen2vl-2b-lora.yaml new file mode 100644 index 0000000..b4ff3dd --- /dev/null +++ b/pdelfin/train/config/qwen2vl-2b-lora.yaml @@ -0,0 +1,84 @@ +model: + name_or_path: Qwen/Qwen2-VL-2B-Instruct + arch: causal + use_flash_attn: true + +wandb: + project: pdelfin + entity: ai2-llm + +# TODO This is not used +format: + instruction_template: "Original:" + response_template: "Rewritten:" + # Template from here: https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.py#L30 + chat_template: | + {% for message in messages %} + {{'<|im_start|>' + message['role'] + '\n' + message['content']}} + {% if loop.last %} + {{ '<|im_end|>'}} + {% else %} + {{ '<|im_end|>\n' }} + {% endif %} + {% endfor %} + +generate: + max_length: 4096 + +train_data: + seed: 1337 + sources: + - name: openai_batch_data_v2 + query_glob_path: s3://ai2-oe-data/jakep/openai_batch_data_v2/*.jsonl + response_glob_path: s3://ai2-oe-data/jakep/openai_batch_done_v2/*.json + backend: + - openai + size: 100_000 + +valid_data: + sources: + - name: openai_batch_data_eval_mini + query_glob_path: s3://ai2-oe-data/jakep/openai_batch_data_eval_mini/*.jsonl + response_glob_path: s3://ai2-oe-data/jakep/openai_batch_done_eval_mini/*.json + backend: + - openai + size: 100_000 + +# Mostly pulled from https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.sh +hparams: + batch_size: 1 + eval_batch_size: 1 + gradient_accumulation_steps: 4 + gradient_checkpointing: false + clip_grad_norm: 1.0 + learning_rate: 3e-4 + max_steps: 2000 + pad_multiple_of: 16 + log_every_steps: 50 + eval_every_steps: 1000 + optim: adamw_torch + lr_scheduler: cosine + weight_decay: 0.01 + warmup_ratio: 0.03 + +# From https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.py +lora: + rank: 32 + alpha: 32 + dropout: 0.05 + task_type: causal_lm + target_modules: + - q_proj + - k_proj + - v_proj + - o_proj + - gate_proj + - up_proj + - down_proj + - proj # For the vision net + +save: + path: s3://ai2-oe-data/jakep/experiments/qwen2vl-pdf/v1/models/ + save_every_steps: 500 + +max_workers: 10 \ No newline at end of file