From 0ed1e0d099d0409e29eb73a21ec86a02da77da37 Mon Sep 17 00:00:00 2001 From: Daniel Kleine <53251018+d-kleine@users.noreply.github.com> Date: Fri, 25 Oct 2024 01:23:53 +0200 Subject: [PATCH] fixed typos (#414) * fixed typos * fixed formatting * Update ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb * del weights after load into model --------- Co-authored-by: Sebastian Raschka --- .../mha-implementations.ipynb | 20 ++++++++++--------- .../converting-llama2-to-llama3.ipynb | 12 ++++++----- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb b/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb index a1d074b..76f7aaf 100644 --- a/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb +++ b/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb @@ -83,8 +83,8 @@ }, "source": [ "- To run all the code in this notebook, please ensure you update to at least PyTorch 2.5 (FlexAttention is not included in earlier PyTorch releases)\n", - "If the code cell above shows a PyTorch version lower than 2.5, you can upgrade your PyTorch installation by uncommenting and running the following code cell (Please note that PyTorch 2.5 requires Python 3.9 or later)\n", - "- For more specific instructions and CUDA versions, please refer to the official installation guide at https://pytorch.org." + "- If the code cell above shows a PyTorch version lower than 2.5, you can upgrade your PyTorch installation by uncommenting and running the following code cell (Please note that PyTorch 2.5 requires Python 3.9 or later)\n", + "- For more specific instructions and CUDA versions, please refer to the official installation guide at https://pytorch.org" ] }, { @@ -886,12 +886,14 @@ "id": "d2164859-31a0-4537-b4fb-27d57675ba77" }, "source": [ - "- Set `need_weights` (default `True`) to need_weights=False so that `MultiheadAttention` uses `scaled_dot_product_attention` [according to the documentation](https://github.com/pytorch/pytorch/blob/71d020262793542974cf13b30f2a9099773f015c/torch/nn/modules/activation.py#L1096)\n", + "- Set `need_weights` (default `True`) to `False` so that `MultiheadAttention` uses `scaled_dot_product_attention` [according to the documentation](https://github.com/pytorch/pytorch/blob/71d020262793542974cf13b30f2a9099773f015c/torch/nn/modules/activation.py#L1096)\n", "\n", - "> need_weights: If specified, returns ``attn_output_weights`` in addition to ``attn_outputs``.\n", - " Set ``need_weights=False`` to use the optimized ``scaled_dot_product_attention``\n", - " and achieve the best performance for MHA.\n", - " Default: ``True``." + "```markdown\n", + "need_weights: If specified, returns `attn_output_weights` in addition to `attn_outputs`.\n", + " Set `need_weights=False` to use the optimized `scaled_dot_product_attention`\n", + " and achieve the best performance for MHA.\n", + " Default: `True`\n", + "```" ] }, { @@ -1965,7 +1967,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "pt", "language": "python", "name": "python3" }, @@ -1979,7 +1981,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb b/ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb index 1c0dc34..3fb007b 100644 --- a/ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb +++ b/ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb @@ -1843,7 +1843,7 @@ "id": "VlH7qYVdDKQr" }, "source": [ - "- Note that the Llama 3 model should ideally used with the correct prompt template that was used during finetuning (as discussed in chapter 7)\n", + "- Note that the Llama 3 model should ideally be used with the correct prompt template that was used during finetuning (as discussed in chapter 7)\n", "- Below is a wrapper class around the tokenizer based on Meta AI's Llama 3-specific [ChatFormat code](https://github.com/meta-llama/llama3/blob/11817d47e1ba7a4959b025eb1ca308572e0e3963/llama/tokenizer.py#L202) that constructs the prompt template" ] }, @@ -2099,7 +2099,7 @@ "metadata": {}, "outputs": [], "source": [ - "LLAMA32_CONFIG[\"context_length\"] = 8192" + "LLAMA31_CONFIG_8B[\"context_length\"] = 8192" ] }, { @@ -2319,7 +2319,8 @@ " combined_weights.update(current_weights)\n", "\n", "load_weights_into_llama(model, LLAMA31_CONFIG_8B, combined_weights)\n", - "model.to(device);" + "model.to(device);\n", + "del combined_weights # free up memory" ] }, { @@ -2466,7 +2467,7 @@ "metadata": {}, "outputs": [], "source": [ - "LLAMA32_CONFIG[\"context_length\"] = 8192" + "LLAMA32_CONFIG_1B[\"context_length\"] = 8192" ] }, { @@ -2594,7 +2595,8 @@ "current_weights = load_file(weights_file)\n", "\n", "load_weights_into_llama(model, LLAMA32_CONFIG_1B, current_weights)\n", - "model.to(device);" + "model.to(device);\n", + "del current_weights # free up memory" ] }, {