mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2025-06-26 23:50:03 +00:00
* fix inverted token and pos layers * remove redundant code --------- Co-authored-by: rasbt <mail@sebastianraschka.com>
This commit is contained in:
parent
0a2e8c39c4
commit
564e986496
@ -46,8 +46,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"torch version: 2.4.0\n",
|
||||
"tiktoken version: 0.7.0\n"
|
||||
"torch version: 2.6.0\n",
|
||||
"tiktoken version: 0.9.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -327,21 +327,13 @@
|
||||
" raw_text = f.read()\n",
|
||||
"\n",
|
||||
"tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
|
||||
"encoded_text = tokenizer.encode(raw_text)\n",
|
||||
"\n",
|
||||
"vocab_size = 50257\n",
|
||||
"output_dim = 256\n",
|
||||
"max_len = 4\n",
|
||||
"context_length = max_len\n",
|
||||
"\n",
|
||||
"token_embedding_layer = torch.nn.Embedding(context_length, output_dim)\n",
|
||||
"pos_embedding_layer = torch.nn.Embedding(vocab_size, output_dim)"
|
||||
"encoded_text = tokenizer.encode(raw_text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "0128eefa-d7c8-4f76-9851-566dfa7c3745",
|
||||
"id": "15c184fe-5553-4df2-a77f-7504901b6709",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -371,7 +363,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "ff5c1e90-c6de-4a87-adf6-7e19f603291c",
|
||||
"id": "739990b2-ce4c-4d17-88e3-547c8c312019",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -415,7 +407,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
Loading…
x
Reference in New Issue
Block a user