From 0a5214b8046f801df038a10dce264a48cc021b24 Mon Sep 17 00:00:00 2001 From: Kasen <18170166+imkasen@users.noreply.github.com> Date: Wed, 19 Feb 2025 04:47:31 +0800 Subject: [PATCH] Fix incorrect indentation (#536) --- ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb b/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb index a750af4..16ce548 100644 --- a/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb +++ b/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb @@ -382,7 +382,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "3e4a15ec-2667-4f56-b7c1-34e8071b621d", "metadata": {}, "outputs": [], @@ -499,7 +499,7 @@ " if lines and lines[0].startswith(\"#\"):\n", " lines = lines[1:]\n", "\n", - " for rank, line in enumerate(lines):\n", + " for line in lines:\n", " pair = tuple(line.strip().split())\n", " if len(pair) == 2:\n", " token1, token2 = pair\n", @@ -511,10 +511,10 @@ " merged_token_id = self.inverse_vocab[merged_token]\n", " self.bpe_merges[(token_id1, token_id2)] = merged_token_id\n", " # print(f\"Loaded merge: '{token1}' + '{token2}' -> '{merged_token}' (ID: {merged_token_id})\")\n", + " else:\n", + " print(f\"Merged token '{merged_token}' not found in vocab. Skipping.\")\n", " else:\n", - " print(f\"Merged token '{merged_token}' not found in vocab. Skipping.\")\n", - " else:\n", - " print(f\"Skipping pair {pair} as one of the tokens is not in the vocabulary.\")\n", + " print(f\"Skipping pair {pair} as one of the tokens is not in the vocabulary.\")\n", "\n", " def encode(self, text):\n", " \"\"\"\n",