formating updates

This commit is contained in:
rasbt 2024-06-17 07:40:04 -05:00
parent f6274117b9
commit 7b2174b115
No known key found for this signature in database
GPG Key ID: 3C6E5C7C075611DB

View File

@ -48,10 +48,10 @@
"name": "stdout",
"output_type": "stream",
"text": [
"matplotlib version: 3.7.1\n",
"tiktoken version: 0.7.0\n",
"torch version: 2.3.0+cu121\n",
"tqdm version: 4.66.4\n",
"matplotlib version: 3.9.0\n",
"tiktoken version: 0.5.1\n",
"torch version: 2.2.2\n",
"tqdm version: 4.66.2\n",
"tensorflow version: 2.15.0\n"
]
}
@ -174,7 +174,7 @@
"\n",
" if not os.path.exists(file_path):\n",
" with urllib.request.urlopen(url) as response:\n",
" text_data = response.read().decode('utf-8')\n",
" text_data = response.read().decode(\"utf-8\")\n",
" with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
" file.write(text_data)\n",
" else:\n",
@ -221,13 +221,12 @@
"output_type": "stream",
"text": [
"Example entry:\n",
"\n",
" {'instruction': 'Identify the correct spelling of the following word.', 'input': 'Ocassion', 'output': \"The correct spelling is 'Occasion.'\"}\n"
]
}
],
"source": [
"print(\"Example entry:\\n\\n\", data[50])"
"print(\"Example entry:\\n\", data[50])"
]
},
{
@ -257,13 +256,12 @@
"output_type": "stream",
"text": [
"Another example entry:\n",
"\n",
" {'instruction': \"What is an antonym of 'complicated'?\", 'input': '', 'output': \"An antonym of 'complicated' is 'simple'.\"}\n"
]
}
],
"source": [
"print(\"Another example entry:\\n\\n\", data[999])"
"print(\"Another example entry:\\n\", data[999])"
]
},
{
@ -511,20 +509,6 @@
{
"cell_type": "code",
"execution_count": 10,
"id": "K6MWf0lhu8GP",
"metadata": {
"id": "K6MWf0lhu8GP"
},
"outputs": [],
"source": [
"import tiktoken\n",
"\n",
"tokenizer = tiktoken.get_encoding(\"gpt2\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "adc29dc4-f1c7-4c71-937b-95119d6239bb",
"metadata": {
"id": "adc29dc4-f1c7-4c71-937b-95119d6239bb"
@ -569,7 +553,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 11,
"id": "ff24fe1a-5746-461c-ad3d-b6d84a1a7c96",
"metadata": {
"colab": {
@ -588,6 +572,9 @@
}
],
"source": [
"import tiktoken\n",
"tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
"\n",
"print(tokenizer.encode(\"<|endoftext|>\", allowed_special={\"<|endoftext|>\"}))"
]
},