mirror of
https://github.com/allenai/olmocr.git
synced 2025-08-16 12:52:12 +00:00
train script
This commit is contained in:
parent
8f001bf74c
commit
cf3b377bb9
49
scripts/molmo-7b-lora-gantry.sh
Executable file
49
scripts/molmo-7b-lora-gantry.sh
Executable file
@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
# check if jq is installed
|
||||
if ! command -v jq &> /dev/null
|
||||
then
|
||||
echo "jq could not be found. Please install it."
|
||||
exit
|
||||
fi
|
||||
|
||||
|
||||
EXTRA_ARGS="-c pdelfin/train/config/molmo-7b-lora.yaml --num_proc 64 --save.path \"s3://ai2-oe-data/jakep/experiments/molmo-pdf/v1/models/\${BEAKER_USER_ID}\""
|
||||
|
||||
run_name=$(basename "$0" .sh)
|
||||
|
||||
# --cluster 'ai2/jupiter*' \
|
||||
# --cluster 'ai2/pluto*' \
|
||||
# --cluster 'ai2/allennlp-cirrascale' \
|
||||
# --priority high \
|
||||
|
||||
CLUSTER='jupiter'
|
||||
|
||||
gantry run \
|
||||
--description "${run_name}"\
|
||||
--task-name "${run_name}"\
|
||||
--allow-dirty \
|
||||
--host-networking \
|
||||
--workspace ai2/oe-data-model-based-cleanup \
|
||||
--beaker-image 'jakep/jakep-pdf-finetunev1.2' \
|
||||
--venv 'base' \
|
||||
--pip gantry-requirements.txt \
|
||||
--priority high \
|
||||
--gpus 8 \
|
||||
--preemptible \
|
||||
--cluster "ai2/${CLUSTER}*" \
|
||||
--budget ai2/oe-data \
|
||||
--weka "oe-data-default:/data" \
|
||||
--env LOG_FILTER_TYPE=local_rank0_only \
|
||||
--env OMP_NUM_THREADS=8 \
|
||||
--env BEAKER_USER_ID=$(beaker account whoami --format json | jq '.[0].name' -cr) \
|
||||
--env-secret AWS_ACCESS_KEY_ID=S2_AWS_ACCESS_KEY_ID \
|
||||
--env-secret AWS_SECRET_ACCESS_KEY=S2_AWS_SECRET_ACCESS_KEY \
|
||||
--env-secret DS_AWS_ACCESS_KEY_ID=S2_AWS_ACCESS_KEY_ID \
|
||||
--env-secret DS_AWS_SECRET_ACCESS_KEY=S2_AWS_SECRET_ACCESS_KEY \
|
||||
--env-secret WANDB_API_KEY=JAKE_WANDB_API_KEY \
|
||||
--shared-memory 10GiB \
|
||||
--yes \
|
||||
-- /bin/bash -c "source scripts/beaker/${CLUSTER}-ib.sh && python -m pdelfin.train.loaddataset ${EXTRA_ARGS} && accelerate launch --multi_gpu --num_processes \${BEAKER_ASSIGNED_GPU_COUNT} --mixed_precision bf16 -m pdelfin.train.train ${EXTRA_ARGS}"
|
Loading…
x
Reference in New Issue
Block a user