From a30ca16e1f1876d367794014d34d03f521d36249 Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Mon, 23 Sep 2024 14:41:35 -0700 Subject: [PATCH] Script adjustment --- pdelfin/train/config/qwen2vl-2b.yaml | 27 ++++++++++++++------------- scripts/qwen2vl-2b-gantry.sh | 4 ++-- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/pdelfin/train/config/qwen2vl-2b.yaml b/pdelfin/train/config/qwen2vl-2b.yaml index efe03dc..90f6f23 100644 --- a/pdelfin/train/config/qwen2vl-2b.yaml +++ b/pdelfin/train/config/qwen2vl-2b.yaml @@ -62,19 +62,20 @@ hparams: warmup_ratio: 0.03 # From https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.py -lora: - rank: 32 - alpha: 32 - dropout: 0.05 - task_type: causal_lm - target_modules: - - q_proj - - k_proj - - v_proj - - o_proj - - gate_proj - - up_proj - - down_proj +# Disable LORA for now, because we want the visual network to get trained too +# lora: +# rank: 32 +# alpha: 32 +# dropout: 0.05 +# task_type: causal_lm +# target_modules: +# - q_proj +# - k_proj +# - v_proj +# - o_proj +# - gate_proj +# - up_proj +# - down_proj save: path: s3://ai2-oe-data/jakep/experiments/qwen2vl-pdf/v1/models/ diff --git a/scripts/qwen2vl-2b-gantry.sh b/scripts/qwen2vl-2b-gantry.sh index b7fee4b..f0d7a66 100644 --- a/scripts/qwen2vl-2b-gantry.sh +++ b/scripts/qwen2vl-2b-gantry.sh @@ -29,8 +29,8 @@ gantry run \ --workspace ai2/oe-data-pdf \ --beaker-image 'lucas/refine-axelot-vllm' \ --venv 'base' \ - --priority high \ - --gpus 8 \ + --priority normal \ + --gpus 4 \ --preemptible \ --cluster "ai2/${CLUSTER}*" \ --budget ai2/oe-data \