mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2025-09-26 00:24:36 +00:00
tokenizing example
This commit is contained in:
parent
15d6f29cf8
commit
78829f28e9
@ -172,7 +172,7 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"File downloaded and saved as sms_spam_collection/SMSSpamCollection.tsv\n"
|
"sms_spam_collection/SMSSpamCollection.tsv already exists. Skipping download and extraction.\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -557,18 +557,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"name": "stdout",
|
||||||
"text/plain": [
|
"output_type": "stream",
|
||||||
"[42, 13, 314, 481, 1908, 340, 757]"
|
"text": [
|
||||||
]
|
"[1212, 318, 262, 717, 2420, 3275]\n"
|
||||||
},
|
]
|
||||||
"execution_count": 9,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"tokenizer.encode(\"K. I will sent it again\")"
|
"token_ids = tokenizer.encode(\"This is the first text message\")\n",
|
||||||
|
"print(token_ids)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user