diff --git a/ch03/01_main-chapter-code/ch03.ipynb b/ch03/01_main-chapter-code/ch03.ipynb index eb68a77..d5cb1ec 100644 --- a/ch03/01_main-chapter-code/ch03.ipynb +++ b/ch03/01_main-chapter-code/ch03.ipynb @@ -205,7 +205,10 @@ "id": "fcc7c7a2-b6ab-478f-ae37-faa8eaa8049a", "metadata": {}, "source": [ - "" + "\n", + "\n", + "- (Please note that the numbers in this figure are truncated to one\n", + "digit after the decimal point to reduce visual clutter; similarly, other figures may also contain truncated values)" ] }, { @@ -268,11 +271,13 @@ "id": "299baef3-b1a8-49ba-bad4-f62c8a416d83", "metadata": {}, "source": [ + "- (In this book, we follow the common machine learning and deep learning convention where training examples are represented as rows and feature values as columns; in the caase of the tensor shown above, each row represents a word, and each column represents an embedding dimension)\n", + "\n", "- The primary objective of this section is to demonstrate how the context vector $z^{(2)}$\n", " is calculated using the second input sequence, $x^{(2)}$, as a query\n", "\n", "- The figure depicts the initial step in this process, which involves calculating the attention scores ω between $x^{(2)}$\n", - " and all other input elements through a dot product operation." + " and all other input elements through a dot product operation" ] }, { @@ -517,7 +522,10 @@ "id": "11c0fb55-394f-42f4-ba07-d01ae5c98ab4", "metadata": {}, "source": [ - "" + "\n", + "\n", + "- (Please note that the numbers in this figure are truncated to two\n", + "digits after the decimal point to reduce visual clutter; the values in each row should add up to 1.0 or 100%; similarly, digits in other figures are truncated)" ] }, { @@ -2019,7 +2027,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.10.6" } }, "nbformat": 4,