From 3a5fc79b38a0bc032c64ee5e752f9c0e95639aba Mon Sep 17 00:00:00 2001 From: rasbt Date: Mon, 5 Feb 2024 06:51:58 -0600 Subject: [PATCH] add and update readme files --- ch02/01_main-chapter-code/README.md | 2 +- ch02/README.md | 2 +- ch03/01_main-chapter-code/README.md | 4 ++-- ch03/README.md | 2 +- ch04/01_main-chapter-code/README.md | 6 ++++++ ch04/01_main-chapter-code/ch04.ipynb | 12 ++++++++---- ch04/README.md | 3 +++ 7 files changed, 22 insertions(+), 9 deletions(-) create mode 100644 ch04/01_main-chapter-code/README.md create mode 100644 ch04/README.md diff --git a/ch02/01_main-chapter-code/README.md b/ch02/01_main-chapter-code/README.md index 646bf68..9286cf8 100644 --- a/ch02/01_main-chapter-code/README.md +++ b/ch02/01_main-chapter-code/README.md @@ -1,5 +1,5 @@ # Chapter 2: Working with Text Data -- [ch02.ipynb](ch02.ipynb) has all the code as it appears in the chapter +- [ch02.ipynb](ch02.ipynb) contains all the code as it appears in the chapter - [dataloader.ipynb](dataloader.ipynb) is a minimal notebook with the main data loading pipeline implemented in this chapter diff --git a/ch02/README.md b/ch02/README.md index 7c085a9..bd98860 100644 --- a/ch02/README.md +++ b/ch02/README.md @@ -1,6 +1,6 @@ # Chapter 2: Working with Text Data -- [01_main-chapter-code](01_main-chapter-code) contains the main chapter code +- [01_main-chapter-code](01_main-chapter-code) contains the main chapter code and exercise solutions - [02_bonus_bytepair-encoder](02_bonus_bytepair-encoder) contains optional code to benchmark different byte pair encoder implementations diff --git a/ch03/01_main-chapter-code/README.md b/ch03/01_main-chapter-code/README.md index ef8457e..44d8b46 100644 --- a/ch03/01_main-chapter-code/README.md +++ b/ch03/01_main-chapter-code/README.md @@ -1,5 +1,5 @@ -# Chapter 3: Understanding Attention Mechanisms +# Chapter 3: Coding Attention Mechanisms -- [ch03.ipynb](ch03.ipynb) has all the code as it appears in the chapter +- [ch03.ipynb](ch03.ipynb) contains all the code as it appears in the chapter - [multihead-attention.ipynb](multihead-attention.ipynb) is a minimal notebook with the main data loading pipeline implemented in this chapter diff --git a/ch03/README.md b/ch03/README.md index 9545007..846044b 100644 --- a/ch03/README.md +++ b/ch03/README.md @@ -1,3 +1,3 @@ -# Chapter 3: Understanding Attention Mechanisms +# Chapter 3: Coding Attention Mechanisms - [01_main-chapter-code](01_main-chapter-code) contains the main chapter code. \ No newline at end of file diff --git a/ch04/01_main-chapter-code/README.md b/ch04/01_main-chapter-code/README.md new file mode 100644 index 0000000..7d22944 --- /dev/null +++ b/ch04/01_main-chapter-code/README.md @@ -0,0 +1,6 @@ +# Chapter 4: Implementing a GPT model from Scratch To Generate Text + +- [ch04.ipynb](ch04.ipynb) contains all the code as it appears in the chapter +- [previous_chapters.py](previous_chapters.py) is a Python module that contains the `MultiHeadAttention` module from the previous chapter, which we import in [ch04.ipynb](ch04.ipynb) to create the GPT model +- [gpt.py](gpt.py) is a standalone Python script file with the code that we implemented thus far, including the GPT model we coded in this chapter + diff --git a/ch04/01_main-chapter-code/ch04.ipynb b/ch04/01_main-chapter-code/ch04.ipynb index 0658728..742c962 100644 --- a/ch04/01_main-chapter-code/ch04.ipynb +++ b/ch04/01_main-chapter-code/ch04.ipynb @@ -134,7 +134,9 @@ " \n", " # Use a placeholder for LayerNorm\n", " self.final_norm = DummyLayerNorm(cfg[\"emb_dim\"])\n", - " self.out_head = nn.Linear(cfg[\"emb_dim\"], cfg[\"vocab_size\"], bias=False)\n", + " self.out_head = nn.Linear(\n", + " cfg[\"emb_dim\"], cfg[\"vocab_size\"], bias=False\n", + " )\n", "\n", " def forward(self, in_idx):\n", " batch_size, seq_len = in_idx.shape\n", @@ -208,7 +210,7 @@ "batch.append(torch.tensor(tokenizer.encode(txt1)))\n", "batch.append(torch.tensor(tokenizer.encode(txt2)))\n", "batch = torch.stack(batch, dim=0)\n", - "batch" + "print(batch)" ] }, { @@ -772,7 +774,7 @@ "torch.manual_seed(123)\n", "ex_short = ExampleWithShortcut()\n", "inputs = torch.tensor([[-1., 1., 2.]])\n", - "ex_short(inputs)" + "print(ex_short(inputs))" ] }, { @@ -947,7 +949,9 @@ " \n", " # Use a placeholder for LayerNorm\n", " self.final_norm = LayerNorm(cfg[\"emb_dim\"])\n", - " self.out_head = nn.Linear(cfg[\"emb_dim\"], cfg[\"vocab_size\"], bias=False)\n", + " self.out_head = nn.Linear(\n", + " cfg[\"emb_dim\"], cfg[\"vocab_size\"], bias=False\n", + " )\n", "\n", " def forward(self, in_idx):\n", " batch_size, seq_len = in_idx.shape\n", diff --git a/ch04/README.md b/ch04/README.md new file mode 100644 index 0000000..43db748 --- /dev/null +++ b/ch04/README.md @@ -0,0 +1,3 @@ +# Chapter 4: Implementing a GPT model from Scratch To Generate Text + +- [01_main-chapter-code](01_main-chapter-code) contains the main chapter code. \ No newline at end of file