diff --git a/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb b/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb index 16ce548..859cc78 100644 --- a/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb +++ b/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb @@ -629,7 +629,7 @@ " \"\"\"\n", " # Save vocabulary\n", " with open(vocab_path, \"w\", encoding=\"utf-8\") as file:\n", - " json.dump({k: v for k, v in self.vocab.items()}, file, ensure_ascii=False, indent=2)\n", + " json.dump(self.vocab, file, ensure_ascii=False, indent=2)\n", "\n", " # Save BPE merges as a list of dictionaries\n", " with open(bpe_merges_path, \"w\", encoding=\"utf-8\") as file:\n", @@ -667,6 +667,9 @@ " def find_freq_pair(token_ids, mode=\"most\"):\n", " pairs = Counter(zip(token_ids, token_ids[1:]))\n", "\n", + " if not pairs:\n", + " return None\n", + "\n", " if mode == \"most\":\n", " return max(pairs.items(), key=lambda x: x[1])[0]\n", " elif mode == \"least\":\n",