mirror of
https://github.com/allenai/olmocr.git
synced 2025-10-28 08:33:59 +00:00
Config updates
This commit is contained in:
parent
6a4a55f9e0
commit
8f001bf74c
@ -16,22 +16,22 @@ train_data:
|
|||||||
sources:
|
sources:
|
||||||
- name: openai_batch_data_v5_1_train
|
- name: openai_batch_data_v5_1_train
|
||||||
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_train_done/*.json
|
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_train_done/*.json
|
||||||
target_longest_image_dim: 1024
|
target_longest_image_dim: [1024]
|
||||||
target_anchor_text_len: 6000
|
target_anchor_text_len: [6000]
|
||||||
- name: openai_batch_data_v5_1_iabooks_train
|
- name: openai_batch_data_v5_1_iabooks_train
|
||||||
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_train_done/*.json
|
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_train_done/*.json
|
||||||
target_longest_image_dim: 1024
|
target_longest_image_dim: [1024]
|
||||||
target_anchor_text_len: 6000
|
target_anchor_text_len: [6000]
|
||||||
|
|
||||||
valid_data:
|
valid_data:
|
||||||
cache_location: /data/jakep/pdfdata/pdelfin_cache
|
cache_location: /data/jakep/pdfdata/pdelfin_cache
|
||||||
metric_for_best_model: openai_batch_data_v5_1_iabooks_eval
|
metric_for_best_model: openai_batch_data_v5_1_eval_loss
|
||||||
sources:
|
sources:
|
||||||
# These tend to be small, so you can load from s3 it's no big deal
|
# These tend to be small, so you can load from s3 it's no big deal
|
||||||
- name: openai_batch_data_v5_1_eval
|
- name: openai_batch_data_v5_1_eval
|
||||||
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_eval/*.json
|
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_eval/*.json
|
||||||
target_longest_image_dim: 1024
|
target_longest_image_dim: [1024]
|
||||||
target_anchor_text_len: 6000
|
target_anchor_text_len: [6000]
|
||||||
- name: openai_batch_data_v5_1_eval
|
- name: openai_batch_data_v5_1_eval
|
||||||
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_eval/*.json
|
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_eval/*.json
|
||||||
target_longest_image_dim: [1024]
|
target_longest_image_dim: [1024]
|
||||||
|
|||||||
@ -17,11 +17,11 @@ train_data:
|
|||||||
- name: openai_batch_data_v5_1_train
|
- name: openai_batch_data_v5_1_train
|
||||||
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_train_done/*.json
|
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_train_done/*.json
|
||||||
target_longest_image_dim: [1024]
|
target_longest_image_dim: [1024]
|
||||||
target_anchor_text_len: [0, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000]
|
target_anchor_text_len: [6000]
|
||||||
- name: openai_batch_data_v5_1_iabooks_train
|
- name: openai_batch_data_v5_1_iabooks_train
|
||||||
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_train_done/*.json
|
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_train_done/*.json
|
||||||
target_longest_image_dim: [1024]
|
target_longest_image_dim: [1024]
|
||||||
target_anchor_text_len: [0, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000]
|
target_anchor_text_len: [6000]
|
||||||
|
|
||||||
valid_data:
|
valid_data:
|
||||||
cache_location: /data/jakep/pdfdata/pdelfin_cache
|
cache_location: /data/jakep/pdfdata/pdelfin_cache
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user