mirror of
https://github.com/allenai/olmocr.git
synced 2025-12-27 07:05:05 +00:00
Hopefully can use weka for the train datasets now
This commit is contained in:
parent
d8e459c9f3
commit
44bcdc771b
@ -29,11 +29,11 @@ train_data:
|
||||
seed: 1337
|
||||
sources:
|
||||
- name: openai_batch_data_v5_1_train
|
||||
query_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_data_v5_1_train/*.jsonl
|
||||
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_data_v5_1_train_done/*.json
|
||||
query_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_train/*.jsonl
|
||||
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_train_done/*.json
|
||||
- name: openai_batch_data_v5_1_train
|
||||
query_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_train/*.jsonl
|
||||
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_train_done/*.json
|
||||
query_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_train/*.jsonl
|
||||
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_train_done/*.json
|
||||
|
||||
valid_data:
|
||||
metric_for_best_model: openai_batch_data_v5_1_eval_loss
|
||||
|
||||
@ -19,7 +19,7 @@ run_name=$(basename "$0" .sh)
|
||||
# --cluster 'ai2/allennlp-cirrascale' \
|
||||
# --priority high \
|
||||
|
||||
CLUSTER='pluto'
|
||||
CLUSTER='jupiter'
|
||||
|
||||
gantry run \
|
||||
--description "${run_name}"\
|
||||
@ -35,6 +35,7 @@ gantry run \
|
||||
--preemptible \
|
||||
--cluster "ai2/${CLUSTER}*" \
|
||||
--budget ai2/oe-data \
|
||||
--weka "oe-data-default:/data" \
|
||||
--env LOG_FILTER_TYPE=local_rank0_only \
|
||||
--env OMP_NUM_THREADS=8 \
|
||||
--env BEAKER_USER_ID=$(beaker account whoami --format json | jq '.[0].name' -cr) \
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user