78 lines
2.0 KiB
Bash
Raw Normal View History

2024-10-21 16:13:13 +08:00
export WANDB_MODE=disabled
train_data="\
2024-10-21 18:13:52 +08:00
../example_data/prompt_based/examples.jsonl "
2024-10-21 16:13:13 +08:00
# set large epochs and small batch size for testing
2024-10-21 18:13:52 +08:00
num_train_epochs=1
2024-10-21 16:13:13 +08:00
per_device_train_batch_size=2
gradient_accumulation_steps=1
train_group_size=8
# set num_gpus to 2 for testing
num_gpus=2
if [ -z "$HF_HUB_CACHE" ]; then
export HF_HUB_CACHE="$HOME/.cache/huggingface/hub"
fi
model_args="\
2024-10-21 22:06:49 +08:00
--model_name_or_path BAAI/bge-reranker-v2-minicpm-layerwise \
2024-10-21 16:13:13 +08:00
--cache_dir $HF_HUB_CACHE \
2024-10-21 18:13:52 +08:00
--use_lora True \
--lora_rank 32 \
--lora_alpha 64 \
--use_flash_attn True \
--target_modules q_proj k_proj v_proj o_proj \
--save_merged_lora_model True \
--model_type decoder \
2024-10-21 22:06:49 +08:00
--model_type from_finetuned_model \
2024-10-21 18:13:52 +08:00
--start_layer 8 \
--head_multi True \
--head_type simple \
--trust_remote_code True \
2024-10-21 16:13:13 +08:00
"
data_args="\
--train_data $train_data \
--cache_path ~/.cache \
--train_group_size $train_group_size \
2024-10-21 18:13:52 +08:00
--query_max_len 512 \
--passage_max_len 512 \
2024-10-21 16:13:13 +08:00
--pad_to_multiple_of 8 \
--knowledge_distillation True \
2024-10-22 12:05:22 +08:00
--query_instruction_for_rerank 'A: ' \
2024-10-21 18:13:52 +08:00
--query_instruction_format '{}{}' \
2024-10-22 12:05:22 +08:00
--passage_instruction_for_rerank 'B: ' \
2024-10-21 18:13:52 +08:00
--passage_instruction_format '{}{}' \
2024-10-21 16:13:13 +08:00
"
training_args="\
2024-10-21 22:06:49 +08:00
--output_dir ./test_decoder_only_base_bge-reranker-v2-minicpm-layerwise \
2024-10-21 16:13:13 +08:00
--overwrite_output_dir \
2024-10-21 18:13:52 +08:00
--learning_rate 2e-4 \
2024-10-21 16:13:13 +08:00
--fp16 \
--num_train_epochs $num_train_epochs \
--per_device_train_batch_size $per_device_train_batch_size \
--gradient_accumulation_steps $gradient_accumulation_steps \
--dataloader_drop_last True \
--warmup_ratio 0.1 \
--gradient_checkpointing \
--weight_decay 0.01 \
--deepspeed ../../ds_stage0.json \
--logging_steps 1 \
--save_steps 1000 \
"
cmd="torchrun --nproc_per_node $num_gpus \
2024-10-21 18:13:52 +08:00
--master_port=4567 \
-m FlagEmbedding.finetune.reranker.decoder_only.layerwise \
2024-10-21 16:13:13 +08:00
$model_args \
$data_args \
$training_args \
"
echo $cmd
eval $cmd