diff --git a/ch05/03_bonus_pretraining_on_gutenberg/prepare_dataset.py b/ch05/03_bonus_pretraining_on_gutenberg/prepare_dataset.py index df66ecf..a8fe5f8 100644 --- a/ch05/03_bonus_pretraining_on_gutenberg/prepare_dataset.py +++ b/ch05/03_bonus_pretraining_on_gutenberg/prepare_dataset.py @@ -69,5 +69,5 @@ if __name__ == "__main__": for name in files if name.endswith((".txt", ".txt.utf8")) and "raw" not in path] print(f"{len(all_files)} file(s) to process.") - file_counter = combine_files(all_files, args.output_dir) + file_counter = combine_files(all_files, args.output_dir, max_size_mb=args.max_size_mb) print(f"{file_counter} file(s) saved in {os.path.abspath(args.output_dir)}")