diff --git a/ch06/01_main-chapter-code/ch06.ipynb b/ch06/01_main-chapter-code/ch06.ipynb index e1e0a76..12cfb26 100644 --- a/ch06/01_main-chapter-code/ch06.ipynb +++ b/ch06/01_main-chapter-code/ch06.ipynb @@ -523,6 +523,14 @@ "- For that, we use `<|endoftext|>` as a padding token, as discussed in chapter 2" ] }, + { + "cell_type": "markdown", + "id": "0829f33f-1428-4f22-9886-7fee633b3666", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "code", "execution_count": 8, @@ -550,25 +558,6 @@ "print(tokenizer.encode(\"<|endoftext|>\", allowed_special={\"<|endoftext|>\"}))" ] }, - { - "cell_type": "code", - "execution_count": 9, - "id": "0ff0f6b2-376b-4740-8858-55b60784be73", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1212, 318, 262, 717, 2420, 3275]\n" - ] - } - ], - "source": [ - "token_ids = tokenizer.encode(\"This is the first text message\")\n", - "print(token_ids)" - ] - }, { "cell_type": "markdown", "id": "04f582ff-68bf-450e-bd87-5fb61afe431c", @@ -579,14 +568,6 @@ "- The `SpamDataset` class below identifies the longest sequence in the training dataset and adds the padding token to the others to match that sequence length" ] }, - { - "cell_type": "markdown", - "id": "0829f33f-1428-4f22-9886-7fee633b3666", - "metadata": {}, - "source": [ - "" - ] - }, { "cell_type": "code", "execution_count": 10, @@ -2284,7 +2265,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.10.12" } }, "nbformat": 4,