mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2025-11-02 19:00:14 +00:00
update figure 6.6
This commit is contained in:
parent
a31d571625
commit
1e7d1f3bcb
@ -523,6 +523,14 @@
|
|||||||
"- For that, we use `<|endoftext|>` as a padding token, as discussed in chapter 2"
|
"- For that, we use `<|endoftext|>` as a padding token, as discussed in chapter 2"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "0829f33f-1428-4f22-9886-7fee633b3666",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/ch06_compressed/pad-input-sequences.webp?123\" width=500px>"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 8,
|
"execution_count": 8,
|
||||||
@ -550,25 +558,6 @@
|
|||||||
"print(tokenizer.encode(\"<|endoftext|>\", allowed_special={\"<|endoftext|>\"}))"
|
"print(tokenizer.encode(\"<|endoftext|>\", allowed_special={\"<|endoftext|>\"}))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 9,
|
|
||||||
"id": "0ff0f6b2-376b-4740-8858-55b60784be73",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"[1212, 318, 262, 717, 2420, 3275]\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"token_ids = tokenizer.encode(\"This is the first text message\")\n",
|
|
||||||
"print(token_ids)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "04f582ff-68bf-450e-bd87-5fb61afe431c",
|
"id": "04f582ff-68bf-450e-bd87-5fb61afe431c",
|
||||||
@ -579,14 +568,6 @@
|
|||||||
"- The `SpamDataset` class below identifies the longest sequence in the training dataset and adds the padding token to the others to match that sequence length"
|
"- The `SpamDataset` class below identifies the longest sequence in the training dataset and adds the padding token to the others to match that sequence length"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "0829f33f-1428-4f22-9886-7fee633b3666",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/ch06_compressed/pad-input-sequences.webp\" width=500px>"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 10,
|
"execution_count": 10,
|
||||||
@ -2284,7 +2265,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.11.4"
|
"version": "3.10.12"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user