mirror of
https://github.com/allenai/olmocr.git
synced 2025-12-02 10:10:44 +00:00
Mini train config
This commit is contained in:
parent
4505a49420
commit
2227605bfb
@ -30,16 +30,16 @@ train_data:
|
||||
sources:
|
||||
- name: openai_batch_data_v5_1_eval # TODO This is just for testing the job, once ready change to a real train dataset
|
||||
query_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_data_v5_1_eval/*.jsonl
|
||||
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_eval/*.jsonl
|
||||
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_eval/*.json
|
||||
|
||||
valid_data:
|
||||
sources:
|
||||
- name: openai_batch_data_v5_1_eval
|
||||
query_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_data_v5_1_eval/*.jsonl
|
||||
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_eval/*.jsonl
|
||||
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_eval/*.json
|
||||
- name: openai_batch_data_v5_1_iabooks_eval
|
||||
query_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_eval/*.jsonl
|
||||
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_iabooks_eval/*.jsonl
|
||||
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_iabooks_eval/*.json
|
||||
|
||||
|
||||
|
||||
@ -51,7 +51,7 @@ hparams:
|
||||
gradient_checkpointing: false
|
||||
clip_grad_norm: 1.0
|
||||
learning_rate: 3e-4
|
||||
max_steps: 2000
|
||||
max_steps: 500
|
||||
pad_multiple_of: 16
|
||||
log_every_steps: 50
|
||||
eval_every_steps: 100
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user