diff --git a/ch02/01_main-chapter-code/dataloader.ipynb b/ch02/01_main-chapter-code/dataloader.ipynb index a6371c5..62b62f0 100644 --- a/ch02/01_main-chapter-code/dataloader.ipynb +++ b/ch02/01_main-chapter-code/dataloader.ipynb @@ -103,8 +103,8 @@ " return self.input_ids[idx], self.target_ids[idx]\n", "\n", "\n", - "def create_dataloader_v1(txt, batch_size=4, max_length=256, \n", - " stride=128, shuffle=True, drop_last=True, num_workers=0):\n", + "def create_dataloader_v1(txt, batch_size, max_length, stride,\n", + " shuffle=True, drop_last=True, num_workers=0):\n", " # Initialize the tokenizer\n", " tokenizer = tiktoken.get_encoding(\"gpt2\")\n", "\n", @@ -121,9 +121,6 @@ "with open(\"the-verdict.txt\", \"r\", encoding=\"utf-8\") as f:\n", " raw_text = f.read()\n", "\n", - "tokenizer = tiktoken.get_encoding(\"gpt2\")\n", - "encoded_text = tokenizer.encode(raw_text)\n", - "\n", "vocab_size = 50257\n", "output_dim = 256\n", "context_length = 1024\n", @@ -132,8 +129,14 @@ "token_embedding_layer = torch.nn.Embedding(vocab_size, output_dim)\n", "pos_embedding_layer = torch.nn.Embedding(context_length, output_dim)\n", "\n", + "batch_size = 8\n", "max_length = 4\n", - "dataloader = create_dataloader_v1(raw_text, batch_size=8, max_length=max_length, stride=max_length)" + "dataloader = create_dataloader_v1(\n", + " raw_text,\n", + " batch_size=batch_size,\n", + " max_length=max_length,\n", + " stride=max_length\n", + ")" ] }, { @@ -189,7 +192,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.11.4" } }, "nbformat": 4,