diff --git a/appendix-D/01_main-chapter-code/appendix-D.ipynb b/appendix-D/01_main-chapter-code/appendix-D.ipynb index 09db8b5..435c6c2 100644 --- a/appendix-D/01_main-chapter-code/appendix-D.ipynb +++ b/appendix-D/01_main-chapter-code/appendix-D.ipynb @@ -199,8 +199,8 @@ } ], "source": [ - "total_steps = len(train_loader) * n_epochs * train_loader.batch_size\n", - "warmup_steps = int(0.1 * total_steps) # 10% warmup\n", + "total_steps = len(train_loader) * n_epochs\n", + "warmup_steps = int(0.2 * total_steps) # 20% warmup\n", "print(warmup_steps)" ] }, @@ -779,7 +779,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.10.6" } }, "nbformat": 4, diff --git a/ch06/01_main-chapter-code/exercise-solutions.ipynb b/ch06/01_main-chapter-code/exercise-solutions.ipynb new file mode 100644 index 0000000..6b5e9e8 --- /dev/null +++ b/ch06/01_main-chapter-code/exercise-solutions.ipynb @@ -0,0 +1,168 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ba450fb1-8a26-4894-ab7a-5d7bfefe90ce", + "metadata": {}, + "source": [ + "\n", + "Supplementary code for \"Build a Large Language Model From Scratch\": https://www.manning.com/books/build-a-large-language-model-from-scratch by Sebastian Raschka
\n", + "Code repository: https://github.com/rasbt/LLMs-from-scratch\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "51c9672d-8d0c-470d-ac2d-1271f8ec3f14", + "metadata": {}, + "source": [ + "# Chapter 6 Exercise solutions" + ] + }, + { + "cell_type": "markdown", + "id": "5fea8be3-30a1-4623-a6d7-b095c6c1092e", + "metadata": {}, + "source": [ + "## Exercise 6.1: Increasing the context length" + ] + }, + { + "cell_type": "markdown", + "id": "5860ba9f-2db3-4480-b96b-4be1c68981eb", + "metadata": {}, + "source": [ + "We can pad the inputs to the maximum number of tokens to the maximum the model supports by setting the max length to\n", + "\n", + "```python\n", + "max_length = 1024\n", + "\n", + "train_dataset = SpamDataset(base_path / \"train.csv\", max_length=max_length, tokenizer=tokenizer)\n", + "val_dataset = SpamDataset(base_path / \"validation.csv\", max_length=max_length, tokenizer=tokenizer)\n", + "test_dataset = SpamDataset(base_path / \"test.csv\", max_length=max_length, tokenizer=tokenizer)\n", + "\n", + "```\n", + "\n", + "or, equivalently, we can define the `max_length` via:\n", + "\n", + "```python\n", + "max_length = model.pos_emb.weight.shape[0]\n", + "```\n", + "\n", + "or\n", + "\n", + "```python\n", + "max_length = BASE_CONFIG[\"context_length\"]\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "2b0f4d5d-17fd-4265-93d8-ea08a22fdaf8", + "metadata": {}, + "source": [ + "For convenience, you can run this experiment via\n", + "\n", + "```\n", + "python additional-experiments.py --context_length \"model_context_length\"\n", + "```\n", + "\n", + "using the code in the [../02_bonus_additional-experiments](../02_bonus_additional-experiments) folder, which results in a substantially worse test accuracy of 78.33% (versus the 95.67% in the main chapter)." + ] + }, + { + "cell_type": "markdown", + "id": "5a780455-f52a-48d1-ab82-6afd40bcad8b", + "metadata": {}, + "source": [ + "## Exercise 6.2: Finetuning the whole model" + ] + }, + { + "cell_type": "markdown", + "id": "56aa5208-aa29-4165-a0ec-7480754e2a18", + "metadata": {}, + "source": [ + "Instead of finetuning just the final transformer block, we can finetune the entire model by removing the following lines from the code:\n", + "\n", + "```python\n", + "for param in model.parameters():\n", + " param.requires_grad = False\n", + "```\n", + "\n", + "For convenience, you can run this experiment via\n", + "\n", + "```\n", + "python additional-experiments.py --trainable_layers all\n", + "```\n", + "\n", + "using the code in the [../02_bonus_additional-experiments](../02_bonus_additional-experiments) folder, which results in a 1% improved test accuracy of 96.67% (versus the 95.67% in the main chapter)." + ] + }, + { + "cell_type": "markdown", + "id": "2269bce3-f2b5-4a76-a692-5977c75a57b6", + "metadata": {}, + "source": [ + "## Exercise 6.3: Finetuning the first versus last token " + ] + }, + { + "cell_type": "markdown", + "id": "7418a629-51b6-4aa2-83b7-bc0261bc370f", + "metadata": {}, + "source": [ + "ther than finetuning the last output token, we can finetune the first output token by changing \n", + "\n", + "```python\n", + "model(input_batch)[:, -1, :]\n", + "```\n", + "\n", + "to\n", + "\n", + "```python\n", + "model(input_batch)[:, 0, :]\n", + "```\n", + "\n", + "everywhere in the code.\n", + "\n", + "For convenience, you can run this experiment via\n", + "\n", + "```\n", + "python additional-experiments.py --trainable_token first\n", + "```\n", + "\n", + "using the code in the [../02_bonus_additional-experiments](../02_bonus_additional-experiments) folder, which results in a substantially worse test accuracy of 75.00% (versus the 95.67% in the main chapter)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5e6188a-f182-4f26-b9e5-ccae3ecadae0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ch06/01_main-chapter-code/tests.py b/ch06/01_main-chapter-code/tests.py index 91d3a6f..ef74701 100644 --- a/ch06/01_main-chapter-code/tests.py +++ b/ch06/01_main-chapter-code/tests.py @@ -7,7 +7,6 @@ import subprocess -import pytest def test_gpt_class_finetune(): @@ -15,4 +14,3 @@ def test_gpt_class_finetune(): result = subprocess.run(command, capture_output=True, text=True) assert result.returncode == 0, f"Script exited with errors: {result.stderr}" -