Setting up for a real train run

This commit is contained in:
Jake Poznanski 2024-09-23 14:32:10 -07:00
parent 0812b0dd77
commit a3feca01fc
2 changed files with 9 additions and 9 deletions

View File

@ -28,18 +28,18 @@ generate:
train_data: train_data:
seed: 1337 seed: 1337
sources: sources:
- name: openai_batch_data_v2_mini - name: openai_batch_data_v2
query_glob_path: s3://ai2-oe-data/jakep/openai_batch_data_v2_mini/*.jsonl query_glob_path: s3://ai2-oe-data/jakep/openai_batch_data_v2/*.jsonl
response_glob_path: s3://ai2-oe-data/jakep/openai_batch_done_v2_mini/*.json response_glob_path: s3://ai2-oe-data/jakep/openai_batch_done_v2/*.json
backend: backend:
- openai - openai
size: 100_000 size: 100_000
valid_data: valid_data:
sources: sources:
- name: openai_batch_data_v2_mini - name: openai_batch_data_eval_mini
query_glob_path: s3://ai2-oe-data/jakep/openai_batch_data_v2_mini/*.jsonl query_glob_path: s3://ai2-oe-data/jakep/openai_batch_data_eval_mini/*.jsonl
response_glob_path: s3://ai2-oe-data/jakep/openai_batch_done_v2_mini/*.json response_glob_path: s3://ai2-oe-data/jakep/openai_batch_done_eval_mini/*.json
backend: backend:
- openai - openai
size: 100_000 size: 100_000
@ -52,7 +52,7 @@ hparams:
gradient_checkpointing: false gradient_checkpointing: false
clip_grad_norm: 1.0 clip_grad_norm: 1.0
learning_rate: 3e-4 learning_rate: 3e-4
max_steps: 200 max_steps: 2000
pad_multiple_of: 16 pad_multiple_of: 16
log_every_steps: 5 log_every_steps: 5
eval_every_steps: 100 eval_every_steps: 100
@ -78,6 +78,6 @@ lora:
save: save:
path: s3://ai2-oe-data/jakep/experiments/qwen2vl-pdf/v1/models/ path: s3://ai2-oe-data/jakep/experiments/qwen2vl-pdf/v1/models/
save_every_steps: 100 save_every_steps: 500
max_workers: 30 max_workers: 30

View File

@ -26,7 +26,7 @@ gantry run \
--task-name "${run_name}"\ --task-name "${run_name}"\
--allow-dirty \ --allow-dirty \
--host-networking \ --host-networking \
--workspace ai2/oe-data-model-based-cleanup \ --workspace ai2/oe-data-pdf \
--beaker-image 'lucas/refine-axelot-vllm' \ --beaker-image 'lucas/refine-axelot-vllm' \
--venv 'base' \ --venv 'base' \
--priority high \ --priority high \