From 73be1c592fc24a3a9cf3b2073617dff42e5ccaec Mon Sep 17 00:00:00 2001 From: Daniel Kleine <53251018+d-kleine@users.noreply.github.com> Date: Thu, 20 Jun 2024 00:36:46 +0200 Subject: [PATCH] fixed num_workers (#229) * fixed num_workers * ch06 & ch07: added num_workers to create_dataloader_v1 --- appendix-D/01_main-chapter-code/previous_chapters.py | 2 +- ch02/01_main-chapter-code/ch02.ipynb | 2 +- ch02/01_main-chapter-code/dataloader.ipynb | 2 +- ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb | 2 +- ch04/01_main-chapter-code/gpt.py | 4 ++-- ch04/01_main-chapter-code/previous_chapters.py | 2 +- ch04/02_performance-analysis/previous_chapters.py | 2 +- ch05/01_main-chapter-code/previous_chapters.py | 2 +- ch05/02_alternative_weight_loading/previous_chapters.py | 2 +- ch05/03_bonus_pretraining_on_gutenberg/previous_chapters.py | 2 +- ch05/05_bonus_hparam_tuning/previous_chapters.py | 2 +- ch06/01_main-chapter-code/previous_chapters.py | 4 ++-- ch06/02_bonus_additional-experiments/previous_chapters.py | 4 ++-- ch06/03_bonus_imdb-classification/previous_chapters.py | 4 ++-- ch07/01_main-chapter-code/previous_chapters.py | 4 ++-- 15 files changed, 20 insertions(+), 20 deletions(-) diff --git a/appendix-D/01_main-chapter-code/previous_chapters.py b/appendix-D/01_main-chapter-code/previous_chapters.py index 47170a5..58acd18 100644 --- a/appendix-D/01_main-chapter-code/previous_chapters.py +++ b/appendix-D/01_main-chapter-code/previous_chapters.py @@ -50,7 +50,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256, # Create dataloader dataloader = DataLoader( - dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0) + dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers) return dataloader diff --git a/ch02/01_main-chapter-code/ch02.ipynb b/ch02/01_main-chapter-code/ch02.ipynb index 6349f86..eed8795 100644 --- a/ch02/01_main-chapter-code/ch02.ipynb +++ b/ch02/01_main-chapter-code/ch02.ipynb @@ -1346,7 +1346,7 @@ " batch_size=batch_size,\n", " shuffle=shuffle,\n", " drop_last=drop_last,\n", - " num_workers=0\n", + " num_workers=num_workers\n", " )\n", "\n", " return dataloader" diff --git a/ch02/01_main-chapter-code/dataloader.ipynb b/ch02/01_main-chapter-code/dataloader.ipynb index 5962ec9..3a9ecaf 100644 --- a/ch02/01_main-chapter-code/dataloader.ipynb +++ b/ch02/01_main-chapter-code/dataloader.ipynb @@ -82,7 +82,7 @@ "\n", " # Create dataloader\n", " dataloader = DataLoader(\n", - " dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)\n", + " dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)\n", "\n", " return dataloader\n", "\n", diff --git a/ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb b/ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb index 0ae5e63..2255a21 100644 --- a/ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb +++ b/ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb @@ -128,7 +128,7 @@ " batch_size=batch_size,\n", " shuffle=shuffle,\n", " drop_last=drop_last,\n", - " num_workers=0\n", + " num_workers=num_workers\n", " )\n", "\n", " return dataloader" diff --git a/ch04/01_main-chapter-code/gpt.py b/ch04/01_main-chapter-code/gpt.py index 5066012..85e3736 100644 --- a/ch04/01_main-chapter-code/gpt.py +++ b/ch04/01_main-chapter-code/gpt.py @@ -13,7 +13,7 @@ from torch.utils.data import Dataset, DataLoader class GPTDatasetV1(Dataset): - def __init__(self, txt, tokenizer, max_length, stride, num_workers=0): + def __init__(self, txt, tokenizer, max_length, stride): self.input_ids = [] self.target_ids = [] @@ -44,7 +44,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256, # Create dataloader dataloader = DataLoader( - dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last) + dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers) return dataloader diff --git a/ch04/01_main-chapter-code/previous_chapters.py b/ch04/01_main-chapter-code/previous_chapters.py index 4a652bf..027f2a7 100644 --- a/ch04/01_main-chapter-code/previous_chapters.py +++ b/ch04/01_main-chapter-code/previous_chapters.py @@ -41,7 +41,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256, # Create dataloader dataloader = DataLoader( - dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0) + dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers) return dataloader diff --git a/ch04/02_performance-analysis/previous_chapters.py b/ch04/02_performance-analysis/previous_chapters.py index b1063ca..369e370 100644 --- a/ch04/02_performance-analysis/previous_chapters.py +++ b/ch04/02_performance-analysis/previous_chapters.py @@ -49,7 +49,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256, # Create dataloader dataloader = DataLoader( - dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0) + dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers) return dataloader diff --git a/ch05/01_main-chapter-code/previous_chapters.py b/ch05/01_main-chapter-code/previous_chapters.py index b1063ca..369e370 100644 --- a/ch05/01_main-chapter-code/previous_chapters.py +++ b/ch05/01_main-chapter-code/previous_chapters.py @@ -49,7 +49,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256, # Create dataloader dataloader = DataLoader( - dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0) + dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers) return dataloader diff --git a/ch05/02_alternative_weight_loading/previous_chapters.py b/ch05/02_alternative_weight_loading/previous_chapters.py index 2b1c5f2..6c5b101 100644 --- a/ch05/02_alternative_weight_loading/previous_chapters.py +++ b/ch05/02_alternative_weight_loading/previous_chapters.py @@ -49,7 +49,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256, # Create dataloader dataloader = DataLoader( - dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0) + dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers) return dataloader diff --git a/ch05/03_bonus_pretraining_on_gutenberg/previous_chapters.py b/ch05/03_bonus_pretraining_on_gutenberg/previous_chapters.py index d5f3b6d..8f2e4da 100644 --- a/ch05/03_bonus_pretraining_on_gutenberg/previous_chapters.py +++ b/ch05/03_bonus_pretraining_on_gutenberg/previous_chapters.py @@ -44,7 +44,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256, tokenizer = tiktoken.get_encoding("gpt2") dataset = GPTDatasetV1(txt, tokenizer, max_length, stride) dataloader = DataLoader( - dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0) + dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers) return dataloader diff --git a/ch05/05_bonus_hparam_tuning/previous_chapters.py b/ch05/05_bonus_hparam_tuning/previous_chapters.py index 5a2d85d..b5c92fa 100644 --- a/ch05/05_bonus_hparam_tuning/previous_chapters.py +++ b/ch05/05_bonus_hparam_tuning/previous_chapters.py @@ -49,7 +49,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256, # Create dataloader dataloader = DataLoader( - dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0) + dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers) return dataloader diff --git a/ch06/01_main-chapter-code/previous_chapters.py b/ch06/01_main-chapter-code/previous_chapters.py index feb6ab1..9f3d8e8 100644 --- a/ch06/01_main-chapter-code/previous_chapters.py +++ b/ch06/01_main-chapter-code/previous_chapters.py @@ -41,7 +41,7 @@ class GPTDatasetV1(Dataset): def create_dataloader_v1(txt, batch_size=4, max_length=256, - stride=128, shuffle=True, drop_last=True): + stride=128, shuffle=True, drop_last=True, num_workers=0): # Initialize the tokenizer tokenizer = tiktoken.get_encoding("gpt2") @@ -50,7 +50,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256, # Create dataloader dataloader = DataLoader( - dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last) + dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers) return dataloader diff --git a/ch06/02_bonus_additional-experiments/previous_chapters.py b/ch06/02_bonus_additional-experiments/previous_chapters.py index ead6a0f..46549e9 100644 --- a/ch06/02_bonus_additional-experiments/previous_chapters.py +++ b/ch06/02_bonus_additional-experiments/previous_chapters.py @@ -41,7 +41,7 @@ class GPTDatasetV1(Dataset): def create_dataloader_v1(txt, batch_size=4, max_length=256, - stride=128, shuffle=True, drop_last=True): + stride=128, shuffle=True, drop_last=True, num_workers=0): # Initialize the tokenizer tokenizer = tiktoken.get_encoding("gpt2") @@ -50,7 +50,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256, # Create dataloader dataloader = DataLoader( - dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last) + dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers) return dataloader diff --git a/ch06/03_bonus_imdb-classification/previous_chapters.py b/ch06/03_bonus_imdb-classification/previous_chapters.py index 884eb5c..2bd2035 100644 --- a/ch06/03_bonus_imdb-classification/previous_chapters.py +++ b/ch06/03_bonus_imdb-classification/previous_chapters.py @@ -42,7 +42,7 @@ class GPTDatasetV1(Dataset): def create_dataloader_v1(txt, batch_size=4, max_length=256, - stride=128, shuffle=True, drop_last=True): + stride=128, shuffle=True, drop_last=True, num_workers=0): # Initialize the tokenizer tokenizer = tiktoken.get_encoding("gpt2") @@ -51,7 +51,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256, # Create dataloader dataloader = DataLoader( - dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last) + dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers) return dataloader diff --git a/ch07/01_main-chapter-code/previous_chapters.py b/ch07/01_main-chapter-code/previous_chapters.py index 39018a3..c3e9a76 100644 --- a/ch07/01_main-chapter-code/previous_chapters.py +++ b/ch07/01_main-chapter-code/previous_chapters.py @@ -45,7 +45,7 @@ class GPTDatasetV1(Dataset): def create_dataloader_v1(txt, batch_size=4, max_length=256, - stride=128, shuffle=True, drop_last=True): + stride=128, shuffle=True, drop_last=True, num_workers=0): # Initialize the tokenizer tokenizer = tiktoken.get_encoding("gpt2") @@ -54,7 +54,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256, # Create dataloader dataloader = DataLoader( - dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last) + dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers) return dataloader