diff --git a/ch02/01_main-chapter-code/ch02.ipynb b/ch02/01_main-chapter-code/ch02.ipynb index 06b55ae..938795c 100644 --- a/ch02/01_main-chapter-code/ch02.ipynb +++ b/ch02/01_main-chapter-code/ch02.ipynb @@ -1296,6 +1296,7 @@ "\n", " # Tokenize the entire text\n", " token_ids = tokenizer.encode(txt, allowed_special={\"<|endoftext|>\"})\n", + " assert len(token_ids) > max_length, \"Number of tokenized inputs must at least be equal to max_length+1\"\n", "\n", " # Use a sliding window to chunk the book into overlapping sequences of max_length\n", " for i in range(0, len(token_ids) - max_length, stride):\n",