Fix truncation issue in classify_review function (#373)

2025-11-15 17:44:48 +00:00 · 2024-09-25 19:54:36 -05:00 · 2024-09-25 19:54:36 -05:00 · 7ef5129e18
commit 7ef5129e18
parent b56d0b2942
3 changed files with 5 additions and 3 deletions
--- a/ch06/01_main-chapter-code/ch06.ipynb
+++ b/ch06/01_main-chapter-code/ch06.ipynb
@ -2207,7 +2207,9 @@
    "\n",
    "    # Prepare inputs to the model\n",
    "    input_ids = tokenizer.encode(text)\n",
-    "    supported_context_length = model.pos_emb.weight.shape[1]\n",
+    "    supported_context_length = model.pos_emb.weight.shape[0]\n",
+    "    # Note: In the book, this was originally written as pos_emb.weight.shape[1] by mistake\n",
+    "    # It didn't break the code but would have caused unnecessary truncation (to 768 instead of 1024)\n",
    "\n",
    "    # Truncate sequences if they too long\n",
    "    input_ids = input_ids[:min(max_length, supported_context_length)]\n",
--- a/ch06/01_main-chapter-code/load-finetuned-model.ipynb
+++ b/ch06/01_main-chapter-code/load-finetuned-model.ipynb
@ -179,7 +179,7 @@
    "\n",
    "    # Prepare inputs to the model\n",
    "    input_ids = tokenizer.encode(text)\n",
-    "    supported_context_length = model.pos_emb.weight.shape[1]\n",
+    "    supported_context_length = model.pos_emb.weight.shape[0]\n",
    "\n",
    "    # Truncate sequences if they too long\n",
    "    input_ids = input_ids[:min(max_length, supported_context_length)]\n",
--- a/ch06/04_user_interface/previous_chapters.py
+++ b/ch06/04_user_interface/previous_chapters.py
@ -353,7 +353,7 @@ def classify_review(text, model, tokenizer, device, max_length=None, pad_token_i

    # Prepare inputs to the model
    input_ids = tokenizer.encode(text)
-    supported_context_length = model.pos_emb.weight.shape[1]
+    supported_context_length = model.pos_emb.weight.shape[0]

    # Truncate sequences if they too long
    input_ids = input_ids[:min(max_length, supported_context_length)]