diff --git a/ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb b/ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb
index 3fb007b..431c8d3 100644
--- a/ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb
+++ b/ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb
@@ -2094,12 +2094,37 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "9bdbe32f-4c96-4e60-8bf4-52b5217df1e6",
- "metadata": {},
- "outputs": [],
+ "execution_count": 10,
+ "id": "a55a8769-1a03-4265-8fd0-15f1c423da53",
+ "metadata": {
+ "id": "a8bc2370-39d2-4bfe-b4c1-6bdd75fe101c"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "New RoPE theta: 31250.0\n"
+ ]
+ }
+ ],
"source": [
- "LLAMA31_CONFIG_8B[\"context_length\"] = 8192"
+ "old_context_length = LLAMA31_CONFIG_8B[\"context_length\"]\n",
+ "LLAMA31_CONFIG_8B[\"context_length\"] = 8192\n",
+ "\n",
+ "\n",
+ "def rescale_theta(theta_old, context_length_old, context_length_new):\n",
+ " scaling_factor = context_length_new / context_length_old\n",
+ " theta_new = theta_old * scaling_factor\n",
+ " return theta_new\n",
+ "\n",
+ "LLAMA31_CONFIG_8B[\"rope_base\"] = rescale_theta(\n",
+ " LLAMA31_CONFIG_8B[\"rope_base\"],\n",
+ " old_context_length,\n",
+ " LLAMA31_CONFIG_8B[\"context_length\"]\n",
+ ")\n",
+ "\n",
+ "print(\"New RoPE theta:\", LLAMA31_CONFIG_8B[\"rope_base\"])"
]
},
{
@@ -2462,12 +2487,31 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "387456c3-c6a1-46fe-8830-6e00eb46ac13",
- "metadata": {},
- "outputs": [],
+ "execution_count": 10,
+ "id": "73f001a6-7ae0-4204-aa83-a27a8878dfd2",
+ "metadata": {
+ "id": "a8bc2370-39d2-4bfe-b4c1-6bdd75fe101c"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "New RoPE theta: 31250.0\n"
+ ]
+ }
+ ],
"source": [
- "LLAMA32_CONFIG_1B[\"context_length\"] = 8192"
+ "old_context_length = LLAMA32_CONFIG_1B[\"context_length\"]\n",
+ "LLAMA32_CONFIG_1B[\"context_length\"] = 8192\n",
+ "\n",
+ "LLAMA32_CONFIG_1B[\"rope_base\"] = rescale_theta(\n",
+ " LLAMA32_CONFIG_1B[\"rope_base\"],\n",
+ " old_context_length,\n",
+ " LLAMA32_CONFIG_1B[\"context_length\"]\n",
+ ")\n",
+ "\n",
+ "print(\"New RoPE theta:\", LLAMA32_CONFIG_1B[\"rope_base\"])"
]
},
{
@@ -2689,7 +2733,7 @@
"provenance": []
},
"kernelspec": {
- "display_name": "pt",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -2703,7 +2747,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.9"
+ "version": "3.11.4"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
diff --git a/ch05/07_gpt_to_llama/standalone-llama32.ipynb b/ch05/07_gpt_to_llama/standalone-llama32.ipynb
index dd8fdf5..8dfb940 100644
--- a/ch05/07_gpt_to_llama/standalone-llama32.ipynb
+++ b/ch05/07_gpt_to_llama/standalone-llama32.ipynb
@@ -3,7 +3,9 @@
{
"cell_type": "markdown",
"id": "e1b280ab-b61f-4d1a-bf7e-44e5f9ed3a5c",
- "metadata": {},
+ "metadata": {
+ "id": "e1b280ab-b61f-4d1a-bf7e-44e5f9ed3a5c"
+ },
"source": [
"
\n",
"\n",
@@ -23,7 +25,9 @@
{
"cell_type": "markdown",
"id": "efde77f2-6af3-4781-8597-89ecd3f41a52",
- "metadata": {},
+ "metadata": {
+ "id": "efde77f2-6af3-4781-8597-89ecd3f41a52"
+ },
"source": [
"# Llama 3.2 From Scratch (A Standalone Notebook)"
]
@@ -31,14 +35,16 @@
{
"cell_type": "markdown",
"id": "55cdef4d-de59-4a65-89f9-fa2a8ef3471d",
- "metadata": {},
+ "metadata": {
+ "id": "55cdef4d-de59-4a65-89f9-fa2a8ef3471d"
+ },
"source": [
"- This notebook is purposefully minimal and focuses on the code to implement the Llama 3.2 1B and 3B LLMs\n",
"- For a step-by-step guide that explains the individual components and the relationship between GPT, Llama 2, and Llama 3, please see the following companion notebooks:\n",
" - [Converting a From-Scratch GPT Architecture to Llama 2](converting-gpt-to-llama2.ipynb)\n",
" - [Converting Llama 2 to Llama 3.2 From Scratch](converting-llama2-to-llama3.ipynb)\n",
" \n",
- " \n",
+ "\n",
"
\n",
" \n",
" \n",
@@ -50,8 +56,8 @@
},
{
"cell_type": "code",
- "execution_count": 1,
- "id": "beef121b-2454-4577-8b56-aa00961089cb",
+ "execution_count": null,
+ "id": "7c201adb-747e-437b-9a62-442802941e01",
"metadata": {},
"outputs": [],
"source": [
@@ -60,18 +66,24 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 3,
"id": "dd1b65a8-4301-444a-bd7c-a6f2bd1df9df",
- "metadata": {},
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "dd1b65a8-4301-444a-bd7c-a6f2bd1df9df",
+ "outputId": "4f762354-e0a3-4cc2-e5d4-e61a227a202c"
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"blobfile version: 3.0.0\n",
- "huggingface_hub version: 0.25.1\n",
- "tiktoken version: 0.7.0\n",
- "torch version: 2.4.0\n"
+ "huggingface_hub version: 0.25.2\n",
+ "tiktoken version: 0.8.0\n",
+ "torch version: 2.5.0\n"
]
}
],
@@ -91,7 +103,9 @@
{
"cell_type": "markdown",
"id": "653410a6-dd2b-4eb2-a722-23d9782e726d",
- "metadata": {},
+ "metadata": {
+ "id": "653410a6-dd2b-4eb2-a722-23d9782e726d"
+ },
"source": [
" \n",
"# 1. Architecture code"
@@ -99,9 +113,11 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 4,
"id": "82076c21-9331-4dcd-b017-42b046cf1a60",
- "metadata": {},
+ "metadata": {
+ "id": "82076c21-9331-4dcd-b017-42b046cf1a60"
+ },
"outputs": [],
"source": [
"import torch\n",
@@ -124,9 +140,11 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"id": "4b9a346f-5826-4083-9162-abd56afc03f0",
- "metadata": {},
+ "metadata": {
+ "id": "4b9a346f-5826-4083-9162-abd56afc03f0"
+ },
"outputs": [],
"source": [
"def precompute_rope_params(head_dim, theta_base=10_000, context_length=4096, freq_config=None):\n",
@@ -196,9 +214,11 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 6,
"id": "e8169ab5-f976-4222-a2e1-eb1cabf267cb",
- "metadata": {},
+ "metadata": {
+ "id": "e8169ab5-f976-4222-a2e1-eb1cabf267cb"
+ },
"outputs": [],
"source": [
"class SharedBuffers:\n",
@@ -308,9 +328,11 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 7,
"id": "457cb2f8-50c1-4045-8a74-f181bfb5fea9",
- "metadata": {},
+ "metadata": {
+ "id": "457cb2f8-50c1-4045-8a74-f181bfb5fea9"
+ },
"outputs": [],
"source": [
"class TransformerBlock(nn.Module):\n",
@@ -348,9 +370,11 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 8,
"id": "e88de3e3-9f07-42cc-816b-28dbd46e96c4",
- "metadata": {},
+ "metadata": {
+ "id": "e88de3e3-9f07-42cc-816b-28dbd46e96c4"
+ },
"outputs": [],
"source": [
"class Llama3Model(nn.Module):\n",
@@ -376,7 +400,9 @@
{
"cell_type": "markdown",
"id": "be2d201f-74ad-4d63-ab9c-601b00674a48",
- "metadata": {},
+ "metadata": {
+ "id": "be2d201f-74ad-4d63-ab9c-601b00674a48"
+ },
"source": [
" \n",
"# 2. Initialize model"
@@ -385,16 +411,20 @@
{
"cell_type": "markdown",
"id": "23dea40c-fe20-4a75-be25-d6fce5863c01",
- "metadata": {},
+ "metadata": {
+ "id": "23dea40c-fe20-4a75-be25-d6fce5863c01"
+ },
"source": [
"- The remainder of this notebook uses the Llama 3.2 1B model; to use the 3B model variant, just uncomment the second configuration file in the following code cell"
]
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 9,
"id": "caa142fa-b375-4e78-b392-2072ced666f3",
- "metadata": {},
+ "metadata": {
+ "id": "caa142fa-b375-4e78-b392-2072ced666f3"
+ },
"outputs": [],
"source": [
"# Llama 3.2 1B\n",
@@ -407,7 +437,7 @@
" \"n_layers\": 16, # Number of layers\n",
" \"hidden_dim\": 8192, # Size of the intermediate dimension in FeedForward\n",
" \"n_kv_groups\": 8, # Key-Value groups for grouped-query attention\n",
- " \"rope_base\": 500_000, # The base in RoPE's \"theta\"\n",
+ " \"rope_base\": 500_000, # The base in RoPE's \"theta\"\n",
" \"dtype\": torch.bfloat16, # Lower-precision dtype to save memory\n",
" \"rope_freq\": { # RoPE frequency scaling\n",
" \"factor\": 32.0,\n",
@@ -443,26 +473,55 @@
{
"cell_type": "markdown",
"id": "34535172-797e-4dd0-84fb-65bc75ad5b06",
- "metadata": {},
+ "metadata": {
+ "id": "34535172-797e-4dd0-84fb-65bc75ad5b06"
+ },
"source": [
"- Reduce the context length so the model would work fine on a MacBook Air (if you have more RAM, feel free to comment out the lines below):"
]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 10,
"id": "a8bc2370-39d2-4bfe-b4c1-6bdd75fe101c",
- "metadata": {},
- "outputs": [],
+ "metadata": {
+ "id": "a8bc2370-39d2-4bfe-b4c1-6bdd75fe101c"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "New RoPE theta: 31250.0\n"
+ ]
+ }
+ ],
"source": [
- "LLAMA32_CONFIG[\"context_length\"] = 8192"
+ "old_context_length = LLAMA32_CONFIG[\"context_length\"]\n",
+ "LLAMA32_CONFIG[\"context_length\"] = 8192\n",
+ "\n",
+ "\n",
+ "def rescale_theta(theta_old, context_length_old, context_length_new):\n",
+ " scaling_factor = context_length_new / context_length_old\n",
+ " theta_new = theta_old * scaling_factor\n",
+ " return theta_new\n",
+ "\n",
+ "LLAMA32_CONFIG[\"rope_base\"] = rescale_theta(\n",
+ " LLAMA32_CONFIG[\"rope_base\"],\n",
+ " old_context_length,\n",
+ " LLAMA32_CONFIG[\"context_length\"]\n",
+ ")\n",
+ "\n",
+ "print(\"New RoPE theta:\", LLAMA32_CONFIG[\"rope_base\"])"
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 11,
"id": "156253fe-aacd-4da2-8f13-705f05c4b11e",
- "metadata": {},
+ "metadata": {
+ "id": "156253fe-aacd-4da2-8f13-705f05c4b11e"
+ },
"outputs": [],
"source": [
"model = Llama3Model(LLAMA32_CONFIG)"
@@ -471,16 +530,24 @@
{
"cell_type": "markdown",
"id": "19de6c2c-83ce-456d-8be9-6ec415fe9eb1",
- "metadata": {},
+ "metadata": {
+ "id": "19de6c2c-83ce-456d-8be9-6ec415fe9eb1"
+ },
"source": [
"- The following is expected to print True to confirm buffers are reused instead of being (wastefully) recreated:"
]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 12,
"id": "0e95db6d-2712-41a5-a5e0-86c49897f4cf",
- "metadata": {},
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "0e95db6d-2712-41a5-a5e0-86c49897f4cf",
+ "outputId": "8efc4937-e616-40d0-cd59-670d7eb3e841"
+ },
"outputs": [
{
"name": "stdout",
@@ -496,14 +563,20 @@
"# Check buffers\n",
"print(model.trf_blocks[0].att.mask is model.trf_blocks[-1].att.mask)\n",
"print(model.trf_blocks[0].att.cos is model.trf_blocks[-1].att.cos)\n",
- "print(model.trf_blocks[0].att.sin is model.trf_blocks[-1].att.sin) "
+ "print(model.trf_blocks[0].att.sin is model.trf_blocks[-1].att.sin)"
]
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 13,
"id": "364e76ca-52f8-4fa5-af37-c4069f9694bc",
- "metadata": {},
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "364e76ca-52f8-4fa5-af37-c4069f9694bc",
+ "outputId": "00d7e983-262e-4c65-f322-f4d999311988"
+ },
"outputs": [
{
"name": "stdout",
@@ -526,9 +599,15 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 14,
"id": "fd5efb03-5a07-46e8-8607-93ed47549d2b",
- "metadata": {},
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "fd5efb03-5a07-46e8-8607-93ed47549d2b",
+ "outputId": "65c1a95e-b502-4150-9e2e-da619d9053d5"
+ },
"outputs": [
{
"name": "stdout",
@@ -570,9 +649,11 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 15,
"id": "31f12baf-f79b-499f-85c0-51328a6a20f5",
- "metadata": {},
+ "metadata": {
+ "id": "31f12baf-f79b-499f-85c0-51328a6a20f5"
+ },
"outputs": [],
"source": [
"if torch.cuda.is_available():\n",
@@ -588,7 +669,9 @@
{
"cell_type": "markdown",
"id": "78e091e1-afa8-4d23-9aea-cced86181bfd",
- "metadata": {},
+ "metadata": {
+ "id": "78e091e1-afa8-4d23-9aea-cced86181bfd"
+ },
"source": [
" \n",
"# 3. Load tokenizer"
@@ -596,9 +679,11 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 16,
"id": "9482b01c-49f9-48e4-ab2c-4a4c75240e77",
- "metadata": {},
+ "metadata": {
+ "id": "9482b01c-49f9-48e4-ab2c-4a4c75240e77"
+ },
"outputs": [],
"source": [
"import os\n",
@@ -647,7 +732,7 @@
"\n",
" def decode(self, tokens):\n",
" return self.model.decode(tokens)\n",
- " \n",
+ "\n",
"\n",
"class ChatFormat:\n",
" def __init__(self, tokenizer):\n",
@@ -681,7 +766,9 @@
{
"cell_type": "markdown",
"id": "b771b60c-c198-4b30-bf10-42031197ae86",
- "metadata": {},
+ "metadata": {
+ "id": "b771b60c-c198-4b30-bf10-42031197ae86"
+ },
"source": [
"- Please note that Meta AI requires that you accept the Llama 3.2 licensing terms before you can download the files; to do this, you have to create a Hugging Face Hub account and visit the [meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) repository to accept the terms\n",
"- Next, you will need to create an access token; to generate an access token with READ permissions, click on the profile picture in the upper right and click on \"Settings\"\n",
@@ -696,23 +783,25 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 17,
"id": "e9d96dc8-603a-4cb5-8c3e-4d2ca56862ed",
- "metadata": {},
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "e9d96dc8-603a-4cb5-8c3e-4d2ca56862ed",
+ "outputId": "e6e6dc05-7330-45bc-a9a7-331919155bdd"
+ },
"outputs": [
{
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "8cdf801700d64fe9b2b827172a8eebcf",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "VBox(children=(HTML(value=' ![]()