mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2025-08-29 19:10:19 +00:00
add section numbers
This commit is contained in:
parent
505e9a5fa5
commit
2ae4ad15ba
@ -108,7 +108,7 @@
|
|||||||
"id": "UJJneXpTEg4W"
|
"id": "UJJneXpTEg4W"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## 1. Convert the GPT model implementation step by step"
|
"# 1. Convert the GPT model implementation step by step"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -129,7 +129,7 @@
|
|||||||
"id": "979c7b6d-1370-4da1-8bfb-a2b27537bf2f"
|
"id": "979c7b6d-1370-4da1-8bfb-a2b27537bf2f"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"### 1.2 Replace LayerNorm with RMSNorm layer"
|
"## 1.1 Replace LayerNorm with RMSNorm layer"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -228,7 +228,7 @@
|
|||||||
"id": "5eb81f83-c38c-46a4-b763-aa630a32e357"
|
"id": "5eb81f83-c38c-46a4-b763-aa630a32e357"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## Replace GELU with SiLU activation"
|
"## 1.2 Replace GELU with SiLU activation"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -300,7 +300,7 @@
|
|||||||
"id": "4f9b5167-1da9-46c8-9964-8036b3b1deb9"
|
"id": "4f9b5167-1da9-46c8-9964-8036b3b1deb9"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## Update the FeedForward module"
|
"## 1.3 Update the FeedForward module"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -388,7 +388,7 @@
|
|||||||
"id": "f6b7bf4f-99d0-42c1-807c-5074d2cc1949"
|
"id": "f6b7bf4f-99d0-42c1-807c-5074d2cc1949"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## Implement RoPE"
|
"## 1.4 Implement RoPE"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -503,7 +503,7 @@
|
|||||||
"id": "f78127b0-dda2-4c5a-98dd-bae8f5fe8297"
|
"id": "f78127b0-dda2-4c5a-98dd-bae8f5fe8297"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## Add RoPE to MultiHeadAttention module"
|
"## 1.5 Add RoPE to MultiHeadAttention module"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -652,7 +652,7 @@
|
|||||||
"id": "e5a1a272-a038-4b8f-aaaa-f4b241e7f23f"
|
"id": "e5a1a272-a038-4b8f-aaaa-f4b241e7f23f"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## Update the TransformerBlock module"
|
"## 1.6 Update the TransformerBlock module"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -727,7 +727,7 @@
|
|||||||
"id": "ada953bc-e2c0-4432-a32d-3f7efa3f6e0f"
|
"id": "ada953bc-e2c0-4432-a32d-3f7efa3f6e0f"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## Update the model class"
|
"## 1.7 Update the model class"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -791,7 +791,7 @@
|
|||||||
"id": "4bc94940-aaeb-45b9-9399-3a69b8043e60"
|
"id": "4bc94940-aaeb-45b9-9399-3a69b8043e60"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## Initialize model"
|
"## 2. Initialize model"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -1029,7 +1029,7 @@
|
|||||||
"id": "5dc64a06-27dc-46ec-9e6d-1700a8227d34"
|
"id": "5dc64a06-27dc-46ec-9e6d-1700a8227d34"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## Load tokenizer"
|
"## 3. Load tokenizer"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -1259,7 +1259,7 @@
|
|||||||
"id": "f63cc248-1d27-4eb6-aa50-173b436652f8"
|
"id": "f63cc248-1d27-4eb6-aa50-173b436652f8"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## Load pretrained weights"
|
"## 4. Load pretrained weights"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user