Config updates

This commit is contained in:
Jake Poznanski 2024-10-30 14:02:57 -07:00
parent 6a4a55f9e0
commit 8f001bf74c
2 changed files with 9 additions and 9 deletions

View File

@ -16,22 +16,22 @@ train_data:
sources:
- name: openai_batch_data_v5_1_train
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_train_done/*.json
target_longest_image_dim: 1024
target_anchor_text_len: 6000
target_longest_image_dim: [1024]
target_anchor_text_len: [6000]
- name: openai_batch_data_v5_1_iabooks_train
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_train_done/*.json
target_longest_image_dim: 1024
target_anchor_text_len: 6000
target_longest_image_dim: [1024]
target_anchor_text_len: [6000]
valid_data:
cache_location: /data/jakep/pdfdata/pdelfin_cache
metric_for_best_model: openai_batch_data_v5_1_iabooks_eval
metric_for_best_model: openai_batch_data_v5_1_eval_loss
sources:
# These tend to be small, so you can load from s3 it's no big deal
- name: openai_batch_data_v5_1_eval
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_eval/*.json
target_longest_image_dim: 1024
target_anchor_text_len: 6000
target_longest_image_dim: [1024]
target_anchor_text_len: [6000]
- name: openai_batch_data_v5_1_eval
response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_eval/*.json
target_longest_image_dim: [1024]

View File

@ -17,11 +17,11 @@ train_data:
- name: openai_batch_data_v5_1_train
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_train_done/*.json
target_longest_image_dim: [1024]
target_anchor_text_len: [0, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000]
target_anchor_text_len: [6000]
- name: openai_batch_data_v5_1_iabooks_train
response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_train_done/*.json
target_longest_image_dim: [1024]
target_anchor_text_len: [0, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000]
target_anchor_text_len: [6000]
valid_data:
cache_location: /data/jakep/pdfdata/pdelfin_cache