mirror of
https://github.com/allenai/olmocr.git
synced 2025-10-26 23:53:31 +00:00
Config updates
This commit is contained in:
parent
6a4a55f9e0
commit
8f001bf74c
@ -16,22 +16,22 @@ train_data:
|
||||
sources:
|
||||
- name: openai_batch_data_v5_1_train
|
||||
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_train_done/*.json
|
||||
target_longest_image_dim: 1024
|
||||
target_anchor_text_len: 6000
|
||||
target_longest_image_dim: [1024]
|
||||
target_anchor_text_len: [6000]
|
||||
- name: openai_batch_data_v5_1_iabooks_train
|
||||
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_train_done/*.json
|
||||
target_longest_image_dim: 1024
|
||||
target_anchor_text_len: 6000
|
||||
target_longest_image_dim: [1024]
|
||||
target_anchor_text_len: [6000]
|
||||
|
||||
valid_data:
|
||||
cache_location: /data/jakep/pdfdata/pdelfin_cache
|
||||
metric_for_best_model: openai_batch_data_v5_1_iabooks_eval
|
||||
metric_for_best_model: openai_batch_data_v5_1_eval_loss
|
||||
sources:
|
||||
# These tend to be small, so you can load from s3 it's no big deal
|
||||
- name: openai_batch_data_v5_1_eval
|
||||
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_eval/*.json
|
||||
target_longest_image_dim: 1024
|
||||
target_anchor_text_len: 6000
|
||||
target_longest_image_dim: [1024]
|
||||
target_anchor_text_len: [6000]
|
||||
- name: openai_batch_data_v5_1_eval
|
||||
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_eval/*.json
|
||||
target_longest_image_dim: [1024]
|
||||
|
||||
@ -17,11 +17,11 @@ train_data:
|
||||
- name: openai_batch_data_v5_1_train
|
||||
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_train_done/*.json
|
||||
target_longest_image_dim: [1024]
|
||||
target_anchor_text_len: [0, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000]
|
||||
target_anchor_text_len: [6000]
|
||||
- name: openai_batch_data_v5_1_iabooks_train
|
||||
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_train_done/*.json
|
||||
target_longest_image_dim: [1024]
|
||||
target_anchor_text_len: [0, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000]
|
||||
target_anchor_text_len: [6000]
|
||||
|
||||
valid_data:
|
||||
cache_location: /data/jakep/pdfdata/pdelfin_cache
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user