From 2fab89d47ea2621cecdba2ee7f25aea1b3196d0a Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Tue, 2 Apr 2024 13:29:23 -0500 Subject: [PATCH] Use max size properly --- ch05/03_bonus_pretraining_on_gutenberg/prepare_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ch05/03_bonus_pretraining_on_gutenberg/prepare_dataset.py b/ch05/03_bonus_pretraining_on_gutenberg/prepare_dataset.py index df66ecf..a8fe5f8 100644 --- a/ch05/03_bonus_pretraining_on_gutenberg/prepare_dataset.py +++ b/ch05/03_bonus_pretraining_on_gutenberg/prepare_dataset.py @@ -69,5 +69,5 @@ if __name__ == "__main__": for name in files if name.endswith((".txt", ".txt.utf8")) and "raw" not in path] print(f"{len(all_files)} file(s) to process.") - file_counter = combine_files(all_files, args.output_dir) + file_counter = combine_files(all_files, args.output_dir, max_size_mb=args.max_size_mb) print(f"{file_counter} file(s) saved in {os.path.abspath(args.output_dir)}")