mirror of
				https://github.com/rasbt/LLMs-from-scratch.git
				synced 2025-10-31 01:41:26 +00:00 
			
		
		
		
	
		
			
	
	
		
			71 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			71 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | # Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt). | ||
|  | # Source for "Build a Large Language Model From Scratch" | ||
|  | #   - https://www.manning.com/books/build-a-large-language-model-from-scratch | ||
|  | # Code: https://github.com/rasbt/LLMs-from-scratch | ||
|  | 
 | ||
|  | from llms_from_scratch.appendix_a import NeuralNetwork, ToyDataset | ||
|  | 
 | ||
|  | import torch | ||
|  | import torch.nn.functional as F | ||
|  | from torch.utils.data import DataLoader | ||
|  | 
 | ||
|  | 
 | ||
|  | def test_dataset(): | ||
|  | 
 | ||
|  |     X_train = torch.tensor([ | ||
|  |         [-1.2, 3.1], | ||
|  |         [-0.9, 2.9], | ||
|  |         [-0.5, 2.6], | ||
|  |         [2.3, -1.1], | ||
|  |         [2.7, -1.5] | ||
|  |     ]) | ||
|  | 
 | ||
|  |     y_train = torch.tensor([0, 0, 0, 1, 1]) | ||
|  |     train_ds = ToyDataset(X_train, y_train) | ||
|  | 
 | ||
|  |     len(train_ds) == 5 | ||
|  |     torch.manual_seed(123) | ||
|  | 
 | ||
|  |     train_loader = DataLoader( | ||
|  |         dataset=train_ds, | ||
|  |         batch_size=2, | ||
|  |         shuffle=True, | ||
|  |         num_workers=0 | ||
|  |     ) | ||
|  | 
 | ||
|  |     torch.manual_seed(123) | ||
|  |     model = NeuralNetwork(num_inputs=2, num_outputs=2) | ||
|  |     optimizer = torch.optim.SGD(model.parameters(), lr=0.5) | ||
|  | 
 | ||
|  |     num_epochs = 3 | ||
|  | 
 | ||
|  |     for epoch in range(num_epochs): | ||
|  | 
 | ||
|  |         model.train() | ||
|  |         for batch_idx, (features, labels) in enumerate(train_loader): | ||
|  | 
 | ||
|  |             logits = model(features) | ||
|  | 
 | ||
|  |             loss = F.cross_entropy(logits, labels) | ||
|  | 
 | ||
|  |             optimizer.zero_grad() | ||
|  |             loss.backward() | ||
|  |             optimizer.step() | ||
|  | 
 | ||
|  |             print(f"Epoch: {epoch+1:03d}/{num_epochs:03d}" | ||
|  |                   f" | Batch {batch_idx:03d}/{len(train_loader):03d}" | ||
|  |                   f" | Train/Val Loss: {loss:.2f}") | ||
|  | 
 | ||
|  |         model.eval() | ||
|  |         with torch.no_grad(): | ||
|  |             outputs = model(X_train) | ||
|  | 
 | ||
|  |         expected = torch.tensor([ | ||
|  |             [2.8569, -4.1618], | ||
|  |             [2.5382, -3.7548], | ||
|  |             [2.0944, -3.1820], | ||
|  |             [-1.4814, 1.4816], | ||
|  |             [-1.7176, 1.7342] | ||
|  |         ]) | ||
|  |         torch.equal(outputs, expected) |