mirror of
				https://github.com/rasbt/LLMs-from-scratch.git
				synced 2025-11-04 11:50:14 +00:00 
			
		
		
		
	Add CI tests for chapter 7 (#239)
This commit is contained in:
		
							parent
							
								
									0114dee9f6
								
							
						
					
					
						commit
						eb85c43bc3
					
				
							
								
								
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@ -1,5 +1,4 @@
 | 
				
			|||||||
# Configs and keys
 | 
					# Configs and keys
 | 
				
			||||||
ch07/01_main-chapter-code/gpt2-medium355M-sft-standalone.pth
 | 
					 | 
				
			||||||
ch07/02_dataset-utilities/config.json
 | 
					ch07/02_dataset-utilities/config.json
 | 
				
			||||||
ch07/03_model-evaluation/config.json
 | 
					ch07/03_model-evaluation/config.json
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -36,6 +35,8 @@ ch06/02_bonus_additional-experiments/gpt2
 | 
				
			|||||||
ch06/03_bonus_imdb-classification/gpt2
 | 
					ch06/03_bonus_imdb-classification/gpt2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ch07/01_main-chapter-code/gpt2-medium355M-sft.pth
 | 
					ch07/01_main-chapter-code/gpt2-medium355M-sft.pth
 | 
				
			||||||
 | 
					ch07/01_main-chapter-code/gpt2-medium355M-sft-standalone.pth
 | 
				
			||||||
 | 
					ch07/01_main-chapter-code/Smalltestmodel-sft-standalone.pth
 | 
				
			||||||
ch07/01_main-chapter-code/gpt2/
 | 
					ch07/01_main-chapter-code/gpt2/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Datasets
 | 
					# Datasets
 | 
				
			||||||
 | 
				
			|||||||
@ -147,7 +147,7 @@ def plot_losses(epochs_seen, tokens_seen, train_losses, val_losses):
 | 
				
			|||||||
    # plt.show()
 | 
					    # plt.show()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def main():
 | 
					def main(test_mode=False):
 | 
				
			||||||
    #######################################
 | 
					    #######################################
 | 
				
			||||||
    # Print package versions
 | 
					    # Print package versions
 | 
				
			||||||
    #######################################
 | 
					    #######################################
 | 
				
			||||||
@ -177,6 +177,12 @@ def main():
 | 
				
			|||||||
    test_data = data[train_portion:train_portion + test_portion]
 | 
					    test_data = data[train_portion:train_portion + test_portion]
 | 
				
			||||||
    val_data = data[train_portion + test_portion:]
 | 
					    val_data = data[train_portion + test_portion:]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Use very small subset for testing purposes
 | 
				
			||||||
 | 
					    if args.test_mode:
 | 
				
			||||||
 | 
					        train_data = train_data[:10]
 | 
				
			||||||
 | 
					        val_data = val_data[:10]
 | 
				
			||||||
 | 
					        test_data = test_data[:10]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    print("Training set length:", len(train_data))
 | 
					    print("Training set length:", len(train_data))
 | 
				
			||||||
    print("Validation set length:", len(val_data))
 | 
					    print("Validation set length:", len(val_data))
 | 
				
			||||||
    print("Test set length:", len(test_data))
 | 
					    print("Test set length:", len(test_data))
 | 
				
			||||||
@ -217,31 +223,50 @@ def main():
 | 
				
			|||||||
    #######################################
 | 
					    #######################################
 | 
				
			||||||
    # Load pretrained model
 | 
					    # Load pretrained model
 | 
				
			||||||
    #######################################
 | 
					    #######################################
 | 
				
			||||||
    BASE_CONFIG = {
 | 
					 | 
				
			||||||
        "vocab_size": 50257,     # Vocabulary size
 | 
					 | 
				
			||||||
        "context_length": 1024,  # Context length
 | 
					 | 
				
			||||||
        "drop_rate": 0.0,        # Dropout rate
 | 
					 | 
				
			||||||
        "qkv_bias": True         # Query-key-value bias
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    model_configs = {
 | 
					    # Small GPT model for testing purposes
 | 
				
			||||||
        "gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12},
 | 
					    if args.test_mode:
 | 
				
			||||||
        "gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16},
 | 
					        BASE_CONFIG = {
 | 
				
			||||||
        "gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20},
 | 
					            "vocab_size": 50257,
 | 
				
			||||||
        "gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25},
 | 
					            "context_length": 120,
 | 
				
			||||||
    }
 | 
					            "drop_rate": 0.0,
 | 
				
			||||||
 | 
					            "qkv_bias": False,
 | 
				
			||||||
 | 
					            "emb_dim": 12,
 | 
				
			||||||
 | 
					            "n_layers": 1,
 | 
				
			||||||
 | 
					            "n_heads": 2
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        model = GPTModel(BASE_CONFIG)
 | 
				
			||||||
 | 
					        model.eval()
 | 
				
			||||||
 | 
					        device = "cpu"
 | 
				
			||||||
 | 
					        CHOOSE_MODEL = "Small test model"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    CHOOSE_MODEL = "gpt2-medium (355M)"
 | 
					    # Code as it is used in the main chapter
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        BASE_CONFIG = {
 | 
				
			||||||
 | 
					            "vocab_size": 50257,     # Vocabulary size
 | 
				
			||||||
 | 
					            "context_length": 1024,  # Context length
 | 
				
			||||||
 | 
					            "drop_rate": 0.0,        # Dropout rate
 | 
				
			||||||
 | 
					            "qkv_bias": True         # Query-key-value bias
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    BASE_CONFIG.update(model_configs[CHOOSE_MODEL])
 | 
					        model_configs = {
 | 
				
			||||||
 | 
					            "gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12},
 | 
				
			||||||
 | 
					            "gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16},
 | 
				
			||||||
 | 
					            "gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20},
 | 
				
			||||||
 | 
					            "gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25},
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    model_size = CHOOSE_MODEL.split(" ")[-1].lstrip("(").rstrip(")")
 | 
					        CHOOSE_MODEL = "gpt2-medium (355M)"
 | 
				
			||||||
    settings, params = download_and_load_gpt2(model_size=model_size, models_dir="gpt2")
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    model = GPTModel(BASE_CONFIG)
 | 
					        BASE_CONFIG.update(model_configs[CHOOSE_MODEL])
 | 
				
			||||||
    load_weights_into_gpt(model, params)
 | 
					
 | 
				
			||||||
    model.eval()
 | 
					        model_size = CHOOSE_MODEL.split(" ")[-1].lstrip("(").rstrip(")")
 | 
				
			||||||
    model.to(device)
 | 
					        settings, params = download_and_load_gpt2(model_size=model_size, models_dir="gpt2")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        model = GPTModel(BASE_CONFIG)
 | 
				
			||||||
 | 
					        load_weights_into_gpt(model, params)
 | 
				
			||||||
 | 
					        model.eval()
 | 
				
			||||||
 | 
					        model.to(device)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    print("Loaded model:", CHOOSE_MODEL)
 | 
					    print("Loaded model:", CHOOSE_MODEL)
 | 
				
			||||||
    print(50*"-")
 | 
					    print(50*"-")
 | 
				
			||||||
@ -259,6 +284,7 @@ def main():
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    start_time = time.time()
 | 
					    start_time = time.time()
 | 
				
			||||||
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.00005, weight_decay=0.1)
 | 
					    optimizer = torch.optim.AdamW(model.parameters(), lr=0.00005, weight_decay=0.1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    num_epochs = 2
 | 
					    num_epochs = 2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    torch.manual_seed(123)
 | 
					    torch.manual_seed(123)
 | 
				
			||||||
@ -307,4 +333,19 @@ def main():
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == "__main__":
 | 
					if __name__ == "__main__":
 | 
				
			||||||
    main()
 | 
					
 | 
				
			||||||
 | 
					    import argparse
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    parser = argparse.ArgumentParser(
 | 
				
			||||||
 | 
					        description="Finetune a GPT model for classification"
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    parser.add_argument(
 | 
				
			||||||
 | 
					        "--test_mode",
 | 
				
			||||||
 | 
					        default=False,
 | 
				
			||||||
 | 
					        action="store_true",
 | 
				
			||||||
 | 
					        help=("This flag runs the model in test mode for internal testing purposes. "
 | 
				
			||||||
 | 
					              "Otherwise, it runs the model as it is used in the chapter (recommended).")
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    args = parser.parse_args()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    main(args.test_mode)
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										16
									
								
								ch07/01_main-chapter-code/tests.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								ch07/01_main-chapter-code/tests.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,16 @@
 | 
				
			|||||||
 | 
					# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
 | 
				
			||||||
 | 
					# Source for "Build a Large Language Model From Scratch"
 | 
				
			||||||
 | 
					#   - https://www.manning.com/books/build-a-large-language-model-from-scratch
 | 
				
			||||||
 | 
					# Code: https://github.com/rasbt/LLMs-from-scratch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# File for internal use (unit tests)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import subprocess
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_gpt_class_finetune():
 | 
				
			||||||
 | 
					    command = ["python", "ch06/01_main-chapter-code/gpt_class_finetune.py", "--test_mode"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    result = subprocess.run(command, capture_output=True, text=True)
 | 
				
			||||||
 | 
					    assert result.returncode == 0, f"Script exited with errors: {result.stderr}"
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user