diff --git a/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb b/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb index 2b537ee..01fb09e 100644 --- a/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb +++ b/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb @@ -108,7 +108,7 @@ "id": "UJJneXpTEg4W" }, "source": [ - "## 1. Convert the GPT model implementation step by step" + "# 1. Convert the GPT model implementation step by step" ] }, { @@ -129,7 +129,7 @@ "id": "979c7b6d-1370-4da1-8bfb-a2b27537bf2f" }, "source": [ - "### 1.2 Replace LayerNorm with RMSNorm layer" + "## 1.1 Replace LayerNorm with RMSNorm layer" ] }, { @@ -228,7 +228,7 @@ "id": "5eb81f83-c38c-46a4-b763-aa630a32e357" }, "source": [ - "## Replace GELU with SiLU activation" + "## 1.2 Replace GELU with SiLU activation" ] }, { @@ -300,7 +300,7 @@ "id": "4f9b5167-1da9-46c8-9964-8036b3b1deb9" }, "source": [ - "## Update the FeedForward module" + "## 1.3 Update the FeedForward module" ] }, { @@ -388,7 +388,7 @@ "id": "f6b7bf4f-99d0-42c1-807c-5074d2cc1949" }, "source": [ - "## Implement RoPE" + "## 1.4 Implement RoPE" ] }, { @@ -503,7 +503,7 @@ "id": "f78127b0-dda2-4c5a-98dd-bae8f5fe8297" }, "source": [ - "## Add RoPE to MultiHeadAttention module" + "## 1.5 Add RoPE to MultiHeadAttention module" ] }, { @@ -652,7 +652,7 @@ "id": "e5a1a272-a038-4b8f-aaaa-f4b241e7f23f" }, "source": [ - "## Update the TransformerBlock module" + "## 1.6 Update the TransformerBlock module" ] }, { @@ -727,7 +727,7 @@ "id": "ada953bc-e2c0-4432-a32d-3f7efa3f6e0f" }, "source": [ - "## Update the model class" + "## 1.7 Update the model class" ] }, { @@ -791,7 +791,7 @@ "id": "4bc94940-aaeb-45b9-9399-3a69b8043e60" }, "source": [ - "## Initialize model" + "## 2. Initialize model" ] }, { @@ -1029,7 +1029,7 @@ "id": "5dc64a06-27dc-46ec-9e6d-1700a8227d34" }, "source": [ - "## Load tokenizer" + "## 3. Load tokenizer" ] }, { @@ -1259,7 +1259,7 @@ "id": "f63cc248-1d27-4eb6-aa50-173b436652f8" }, "source": [ - "## Load pretrained weights" + "## 4. Load pretrained weights" ] }, {