From 496b52f842eeaecd8f02b5a75c1db4e72c307ed3 Mon Sep 17 00:00:00 2001 From: rasbt Date: Sat, 10 Feb 2024 17:47:56 -0600 Subject: [PATCH] format the other GPT architecture sizes --- ch04/01_main-chapter-code/ch04.ipynb | 40 ++++++++++++++-------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/ch04/01_main-chapter-code/ch04.ipynb b/ch04/01_main-chapter-code/ch04.ipynb index 1ef4aa2..a560e56 100644 --- a/ch04/01_main-chapter-code/ch04.ipynb +++ b/ch04/01_main-chapter-code/ch04.ipynb @@ -884,7 +884,7 @@ "source": [ "torch.manual_seed(123)\n", "\n", - "x = torch.rand(2, 6, 768)\n", + "x = torch.rand(2, 6, 768) # Shape: [batch_size, num_tokens, emb_dim]\n", "block = TransformerBlock(GPT_CONFIG_124M)\n", "output = block(x)\n", "\n", @@ -1140,27 +1140,27 @@ "id": "309a3be4-c20a-4657-b4e0-77c97510b47c", "metadata": {}, "source": [ - "- Exercise: you can try the other configurations as well:\n", + "- Exercise: you can try the following other configurations, which are referenced in the [GPT-2 paper](https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf), as well.\n", "\n", - "- GPT2-small (the 124M configuration we implemented):\n", - " - \"emb_dim\" = 768\n", - " - \"n_layers\" = 12\n", - " - \"n_heads\" = 12\n", + " - **GPT2-small** (the 124M configuration we already implemented):\n", + " - \"emb_dim\" = 768\n", + " - \"n_layers\" = 12\n", + " - \"n_heads\" = 12\n", "\n", - "- GPT2-medium:\n", - " - \"emb_dim\" = 1024\n", - " - \"n_layers\" = 24\n", - " - \"n_heads\" = 16\n", - "\n", - "- GPT2-large:\n", - " - \"emb_dim\" = 1280\n", - " - \"n_layers\" = 36\n", - " - \"n_heads\" = 20\n", - "\n", - "- GPT2-XL:\n", - " - \"emb_dim\" = 1600\n", - " - \"n_layers\" = 48\n", - " - \"n_heads\" = 25" + " - **GPT2-medium:**\n", + " - \"emb_dim\" = 1024\n", + " - \"n_layers\" = 24\n", + " - \"n_heads\" = 16\n", + " \n", + " - **GPT2-large:**\n", + " - \"emb_dim\" = 1280\n", + " - \"n_layers\" = 36\n", + " - \"n_heads\" = 20\n", + " \n", + " - **GPT2-XL:**\n", + " - \"emb_dim\" = 1600\n", + " - \"n_layers\" = 48\n", + " - \"n_heads\" = 25" ] }, {