From e9c1c1da38dc2b0a75b75b52822dbb8325b25246 Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Fri, 15 Aug 2025 08:38:48 -0500 Subject: [PATCH] Fix qk_norm comment (#769) --- ch05/11_qwen3/standalone-qwen3.ipynb | 2 +- pkg/llms_from_scratch/qwen3.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ch05/11_qwen3/standalone-qwen3.ipynb b/ch05/11_qwen3/standalone-qwen3.ipynb index 516df0a..63f6cbe 100644 --- a/ch05/11_qwen3/standalone-qwen3.ipynb +++ b/ch05/11_qwen3/standalone-qwen3.ipynb @@ -436,7 +436,7 @@ " \"n_layers\": 28, # Number of layers\n", " \"hidden_dim\": 3072, # Size of the intermediate dimension in FeedForward\n", " \"head_dim\": 128, # Size of the heads in GQA\n", - " \"qk_norm\": True, # Whether to normalize queries and values in GQA\n", + " \"qk_norm\": True, # Whether to normalize queries and keys in GQA\n", " \"n_kv_groups\": 8, # Key-Value groups for grouped-query attention\n", " \"rope_base\": 1_000_000.0, # The base in RoPE's \"theta\"\n", " \"dtype\": torch.bfloat16, # Lower-precision dtype to reduce memory usage\n", diff --git a/pkg/llms_from_scratch/qwen3.py b/pkg/llms_from_scratch/qwen3.py index 3e9f726..dd43645 100644 --- a/pkg/llms_from_scratch/qwen3.py +++ b/pkg/llms_from_scratch/qwen3.py @@ -22,7 +22,7 @@ QWEN_CONFIG_06_B = { "n_layers": 28, # Number of layers "hidden_dim": 3072, # Size of the intermediate dimension in FeedForward "head_dim": 128, # Size of the heads in GQA - "qk_norm": True, # Whether to normalize queries and values in GQA + "qk_norm": True, # Whether to normalize queries and keys in GQA "n_kv_groups": 8, # Key-Value groups for grouped-query attention "rope_base": 1_000_000.0, # The base in RoPE's "theta" "dtype": torch.bfloat16, # Lower-precision dtype to reduce memory usage