mirror of
https://github.com/allenai/olmocr.git
synced 2025-10-11 16:22:29 +00:00
Fix dataloader bug
This commit is contained in:
parent
3d36545fa5
commit
a90eb94951
@ -51,7 +51,7 @@ def load_jsonl_into_ds(s3_glob_path: str, first_n_files: int = None) -> Dataset:
|
|||||||
"""
|
"""
|
||||||
Loads JSONL files from the specified S3 path into a Hugging Face Dataset.
|
Loads JSONL files from the specified S3 path into a Hugging Face Dataset.
|
||||||
"""
|
"""
|
||||||
all_json_files = s3_glob_path(s3_glob_path)
|
all_json_files = list_dataset_files(s3_glob_path)
|
||||||
|
|
||||||
if first_n_files:
|
if first_n_files:
|
||||||
all_json_files = all_json_files[:first_n_files]
|
all_json_files = all_json_files[:first_n_files]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user