tokenizing example

This commit is contained in:
rasbt 2024-05-06 07:16:40 -05:00
parent 15d6f29cf8
commit 78829f28e9

View File

@ -172,7 +172,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"File downloaded and saved as sms_spam_collection/SMSSpamCollection.tsv\n" "sms_spam_collection/SMSSpamCollection.tsv already exists. Skipping download and extraction.\n"
] ]
} }
], ],
@ -557,18 +557,16 @@
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "name": "stdout",
"text/plain": [ "output_type": "stream",
"[42, 13, 314, 481, 1908, 340, 757]" "text": [
] "[1212, 318, 262, 717, 2420, 3275]\n"
}, ]
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
} }
], ],
"source": [ "source": [
"tokenizer.encode(\"K. I will sent it again\")" "token_ids = tokenizer.encode(\"This is the first text message\")\n",
"print(token_ids)"
] ]
}, },
{ {