This commit is contained in:
Jake Poznanski 2025-09-10 17:01:15 +00:00
parent 43ccd82609
commit 1ee3dce948

View File

@ -49,11 +49,11 @@ dataset:
- name: processed_00_documents_train_s2pdf
root_dir: /data/olmOCR-mix-0925/processed_00_documents_train_s2pdf/
pipeline: *basic_pipeline
- name: processed_national_archives
root_dir: /data/olmOCR-mix-0925/processed_national_archives/
- name: processed_02_loc_transcripts
root_dir: /data/olmOCR-mix-0925/processed_02_loc_transcripts/
pipeline: *basic_pipeline
- name: processed_loc_transcripts
root_dir: /data/olmOCR-mix-0925/processed_loc_transcripts/
- name: processed_03_national_archives
root_dir: /data/olmOCR-mix-0925/processed_03_national_archives/
pipeline: *basic_pipeline
eval:
@ -63,6 +63,12 @@ dataset:
- name: processed_01_books_eval_iabooks
root_dir: /data/olmOCR-mix-0925/processed_01_books_eval_iabooks/
pipeline: *basic_pipeline
- name: processed_02_loc_transcripts_eval
root_dir: /data/olmOCR-mix-0925/processed_02_loc_transcripts_eval/
pipeline: *basic_pipeline
- name: processed_03_national_archives_eval
root_dir: /data/olmOCR-mix-0925/processed_03_national_archives_eval/
pipeline: *basic_pipeline
# Training configuration