Fixing data config

This commit is contained in:
Jake Poznanski 2025-09-09 15:22:04 +00:00
parent 077e3eea7f
commit 3eaa584ed5

View File

@ -49,11 +49,11 @@ dataset:
- name: processed_00_documents_train_s2pdf
root_dir: /data/olmOCR-mix-0925/processed_00_documents_train_s2pdf/
pipeline: *basic_pipeline
- name: processed_national_archives
root_dir: /data/olmOCR-mix-0925/processed_national_archives/
- name: processed_02_loc_transcripts
root_dir: /data/olmOCR-mix-0925/processed_02_loc_transcripts/
pipeline: *basic_pipeline
- name: processed_loc_transcripts
root_dir: /data/olmOCR-mix-0925/processed_loc_transcripts/
- name: processed_03_national_archives
root_dir: /data/olmOCR-mix-0925/processed_03_national_archives/
pipeline: *basic_pipeline
eval:
@ -63,6 +63,12 @@ dataset:
- name: processed_01_books_eval_iabooks
root_dir: /data/olmOCR-mix-0925/processed_01_books_eval_iabooks/
pipeline: *basic_pipeline
- name: processed_02_loc_transcripts_eval
root_dir: /data/olmOCR-mix-0925/processed_02_loc_transcripts_eval/
pipeline: *basic_pipeline
- name: processed_03_national_archives_eval
root_dir: /data/olmOCR-mix-0925/processed_03_national_archives_eval/
pipeline: *basic_pipeline
# Training configuration