mirror of
				https://github.com/rasbt/LLMs-from-scratch.git
				synced 2025-10-31 09:50:23 +00:00 
			
		
		
		
	formating updates
This commit is contained in:
		
							parent
							
								
									0ee9312662
								
							
						
					
					
						commit
						339a7ce040
					
				| @ -48,10 +48,10 @@ | |||||||
|      "name": "stdout", |      "name": "stdout", | ||||||
|      "output_type": "stream", |      "output_type": "stream", | ||||||
|      "text": [ |      "text": [ | ||||||
|       "matplotlib version: 3.7.1\n", |       "matplotlib version: 3.9.0\n", | ||||||
|       "tiktoken version: 0.7.0\n", |       "tiktoken version: 0.5.1\n", | ||||||
|       "torch version: 2.3.0+cu121\n", |       "torch version: 2.2.2\n", | ||||||
|       "tqdm version: 4.66.4\n", |       "tqdm version: 4.66.2\n", | ||||||
|       "tensorflow version: 2.15.0\n" |       "tensorflow version: 2.15.0\n" | ||||||
|      ] |      ] | ||||||
|     } |     } | ||||||
| @ -174,7 +174,7 @@ | |||||||
|     "\n", |     "\n", | ||||||
|     "    if not os.path.exists(file_path):\n", |     "    if not os.path.exists(file_path):\n", | ||||||
|     "        with urllib.request.urlopen(url) as response:\n", |     "        with urllib.request.urlopen(url) as response:\n", | ||||||
|     "            text_data = response.read().decode('utf-8')\n", |     "            text_data = response.read().decode(\"utf-8\")\n", | ||||||
|     "        with open(file_path, \"w\", encoding=\"utf-8\") as file:\n", |     "        with open(file_path, \"w\", encoding=\"utf-8\") as file:\n", | ||||||
|     "            file.write(text_data)\n", |     "            file.write(text_data)\n", | ||||||
|     "    else:\n", |     "    else:\n", | ||||||
| @ -221,13 +221,12 @@ | |||||||
|      "output_type": "stream", |      "output_type": "stream", | ||||||
|      "text": [ |      "text": [ | ||||||
|       "Example entry:\n", |       "Example entry:\n", | ||||||
|       "\n", |  | ||||||
|       " {'instruction': 'Identify the correct spelling of the following word.', 'input': 'Ocassion', 'output': \"The correct spelling is 'Occasion.'\"}\n" |       " {'instruction': 'Identify the correct spelling of the following word.', 'input': 'Ocassion', 'output': \"The correct spelling is 'Occasion.'\"}\n" | ||||||
|      ] |      ] | ||||||
|     } |     } | ||||||
|    ], |    ], | ||||||
|    "source": [ |    "source": [ | ||||||
|     "print(\"Example entry:\\n\\n\", data[50])" |     "print(\"Example entry:\\n\", data[50])" | ||||||
|    ] |    ] | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
| @ -257,13 +256,12 @@ | |||||||
|      "output_type": "stream", |      "output_type": "stream", | ||||||
|      "text": [ |      "text": [ | ||||||
|       "Another example entry:\n", |       "Another example entry:\n", | ||||||
|       "\n", |  | ||||||
|       " {'instruction': \"What is an antonym of 'complicated'?\", 'input': '', 'output': \"An antonym of 'complicated' is 'simple'.\"}\n" |       " {'instruction': \"What is an antonym of 'complicated'?\", 'input': '', 'output': \"An antonym of 'complicated' is 'simple'.\"}\n" | ||||||
|      ] |      ] | ||||||
|     } |     } | ||||||
|    ], |    ], | ||||||
|    "source": [ |    "source": [ | ||||||
|     "print(\"Another example entry:\\n\\n\", data[999])" |     "print(\"Another example entry:\\n\", data[999])" | ||||||
|    ] |    ] | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
| @ -511,20 +509,6 @@ | |||||||
|   { |   { | ||||||
|    "cell_type": "code", |    "cell_type": "code", | ||||||
|    "execution_count": 10, |    "execution_count": 10, | ||||||
|    "id": "K6MWf0lhu8GP", |  | ||||||
|    "metadata": { |  | ||||||
|     "id": "K6MWf0lhu8GP" |  | ||||||
|    }, |  | ||||||
|    "outputs": [], |  | ||||||
|    "source": [ |  | ||||||
|     "import tiktoken\n", |  | ||||||
|     "\n", |  | ||||||
|     "tokenizer = tiktoken.get_encoding(\"gpt2\")" |  | ||||||
|    ] |  | ||||||
|   }, |  | ||||||
|   { |  | ||||||
|    "cell_type": "code", |  | ||||||
|    "execution_count": 11, |  | ||||||
|    "id": "adc29dc4-f1c7-4c71-937b-95119d6239bb", |    "id": "adc29dc4-f1c7-4c71-937b-95119d6239bb", | ||||||
|    "metadata": { |    "metadata": { | ||||||
|     "id": "adc29dc4-f1c7-4c71-937b-95119d6239bb" |     "id": "adc29dc4-f1c7-4c71-937b-95119d6239bb" | ||||||
| @ -569,7 +553,7 @@ | |||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|    "cell_type": "code", |    "cell_type": "code", | ||||||
|    "execution_count": 12, |    "execution_count": 11, | ||||||
|    "id": "ff24fe1a-5746-461c-ad3d-b6d84a1a7c96", |    "id": "ff24fe1a-5746-461c-ad3d-b6d84a1a7c96", | ||||||
|    "metadata": { |    "metadata": { | ||||||
|     "colab": { |     "colab": { | ||||||
| @ -588,6 +572,9 @@ | |||||||
|     } |     } | ||||||
|    ], |    ], | ||||||
|    "source": [ |    "source": [ | ||||||
|  |     "import tiktoken\n", | ||||||
|  |     "tokenizer = tiktoken.get_encoding(\"gpt2\")\n", | ||||||
|  |     "\n", | ||||||
|     "print(tokenizer.encode(\"<|endoftext|>\", allowed_special={\"<|endoftext|>\"}))" |     "print(tokenizer.encode(\"<|endoftext|>\", allowed_special={\"<|endoftext|>\"}))" | ||||||
|    ] |    ] | ||||||
|   }, |   }, | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 rasbt
						rasbt