remove redundant code lines (#247)

This commit is contained in:
Sebastian Raschka 2024-06-25 21:44:19 -05:00 committed by GitHub
parent f46441d53f
commit 4fef19e016

View File

@ -785,9 +785,6 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"preprocessed = re.split(r'([,.?_!\"()\\']|--|\\s)', raw_text)\n",
"preprocessed = [item.strip() for item in preprocessed if item.strip()]\n",
"\n",
"all_tokens = sorted(list(set(preprocessed)))\n", "all_tokens = sorted(list(set(preprocessed)))\n",
"all_tokens.extend([\"<|endoftext|>\", \"<|unk|>\"])\n", "all_tokens.extend([\"<|endoftext|>\", \"<|unk|>\"])\n",
"\n", "\n",
@ -803,7 +800,7 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"1161" "1132"
] ]
}, },
"execution_count": 19, "execution_count": 19,
@ -825,11 +822,11 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"('younger', 1156)\n", "('younger', 1127)\n",
"('your', 1157)\n", "('your', 1128)\n",
"('yourself', 1158)\n", "('yourself', 1129)\n",
"('<|endoftext|>', 1159)\n", "('<|endoftext|>', 1130)\n",
"('<|unk|>', 1160)\n" "('<|unk|>', 1131)\n"
] ]
} }
], ],
@ -918,22 +915,7 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"[1160,\n", "[1131, 5, 355, 1126, 628, 975, 10, 1130, 55, 988, 956, 984, 722, 988, 1131, 7]"
" 5,\n",
" 362,\n",
" 1155,\n",
" 642,\n",
" 1000,\n",
" 10,\n",
" 1159,\n",
" 57,\n",
" 1013,\n",
" 981,\n",
" 1009,\n",
" 738,\n",
" 1013,\n",
" 1160,\n",
" 7]"
] ]
}, },
"execution_count": 23, "execution_count": 23,