mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2025-08-15 12:12:10 +00:00
formating updates
This commit is contained in:
parent
f6274117b9
commit
7b2174b115
@ -48,10 +48,10 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"matplotlib version: 3.7.1\n",
|
||||
"tiktoken version: 0.7.0\n",
|
||||
"torch version: 2.3.0+cu121\n",
|
||||
"tqdm version: 4.66.4\n",
|
||||
"matplotlib version: 3.9.0\n",
|
||||
"tiktoken version: 0.5.1\n",
|
||||
"torch version: 2.2.2\n",
|
||||
"tqdm version: 4.66.2\n",
|
||||
"tensorflow version: 2.15.0\n"
|
||||
]
|
||||
}
|
||||
@ -174,7 +174,7 @@
|
||||
"\n",
|
||||
" if not os.path.exists(file_path):\n",
|
||||
" with urllib.request.urlopen(url) as response:\n",
|
||||
" text_data = response.read().decode('utf-8')\n",
|
||||
" text_data = response.read().decode(\"utf-8\")\n",
|
||||
" with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
|
||||
" file.write(text_data)\n",
|
||||
" else:\n",
|
||||
@ -221,13 +221,12 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Example entry:\n",
|
||||
"\n",
|
||||
" {'instruction': 'Identify the correct spelling of the following word.', 'input': 'Ocassion', 'output': \"The correct spelling is 'Occasion.'\"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\"Example entry:\\n\\n\", data[50])"
|
||||
"print(\"Example entry:\\n\", data[50])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -257,13 +256,12 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Another example entry:\n",
|
||||
"\n",
|
||||
" {'instruction': \"What is an antonym of 'complicated'?\", 'input': '', 'output': \"An antonym of 'complicated' is 'simple'.\"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\"Another example entry:\\n\\n\", data[999])"
|
||||
"print(\"Another example entry:\\n\", data[999])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -511,20 +509,6 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "K6MWf0lhu8GP",
|
||||
"metadata": {
|
||||
"id": "K6MWf0lhu8GP"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tiktoken\n",
|
||||
"\n",
|
||||
"tokenizer = tiktoken.get_encoding(\"gpt2\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "adc29dc4-f1c7-4c71-937b-95119d6239bb",
|
||||
"metadata": {
|
||||
"id": "adc29dc4-f1c7-4c71-937b-95119d6239bb"
|
||||
@ -569,7 +553,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 11,
|
||||
"id": "ff24fe1a-5746-461c-ad3d-b6d84a1a7c96",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
@ -588,6 +572,9 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import tiktoken\n",
|
||||
"tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
|
||||
"\n",
|
||||
"print(tokenizer.encode(\"<|endoftext|>\", allowed_special={\"<|endoftext|>\"}))"
|
||||
]
|
||||
},
|
||||
|
Loading…
x
Reference in New Issue
Block a user