rasbt
							
						 
					 | 
					
						
						
							
							
						
						
						
							
						
						
							c735c21e87
							
						
					 | 
					
						
						
							
							fix swiglu acronym
						
						
						
						
						
						
					 | 
					
						2024-05-01 20:26:17 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Sebastian Raschka
							
						 
					 | 
					
						
						
							
							
						
						
						
							
						
						
							97ed38116a
							
						
					 | 
					
						
						
							
							Rename drop_resid to drop_shortcut (#136)
						
						
						
						
						
						
					 | 
					
						2024-04-28 14:31:27 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
							
							
						
						
						
							
						
						
							d202cabdee
							
						
					 | 
					
						
						
							
							update figures
						
						
						
						
						
						
					 | 
					
						2024-04-20 11:42:03 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Sebastian Raschka
							
						 
					 | 
					
						
						
							
							
						
						
						
							
						
						
							dd51d4ad83
							
						
					 | 
					
						
						
							
							Make datesets and loaders compatible with multiprocessing (#118)
						
						
						
						
						
						
					 | 
					
						2024-04-13 13:57:56 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								James Holcombe
							
						 
					 | 
					
						
						
							
							
						
						
						
							
						
						
							05718c6b94
							
						
					 | 
					
						
						
							
							Use instance tokenizer (#116)
						
						
						
						
						
						
						
						* Use instance tokenizer
* consistency updates
---------
Co-authored-by: Sebastian Raschka <mail@sebastianraschka.com> 
						
						
					 | 
					
						2024-04-10 21:16:19 -04:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
							
							
						
						
						
							
						
						
							6de0417321
							
						
					 | 
					
						
						
							
							cleanup
						
						
						
						
						
						
					 | 
					
						2024-04-04 07:58:41 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Sebastian Raschka
							
						 
					 | 
					
						
						
							
							
						
						
						
							
						
						
							2de60d1bfb
							
						
					 | 
					
						
						
							
							Rename variable to context_length to make it easier on readers (#106)
						
						
						
						
						
						
						
						* rename to context length
* fix spacing 
						
						
					 | 
					
						2024-04-04 07:27:41 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Sebastian Raschka
							
						 
					 | 
					
						
						
							
							
						
						
						
							
						
						
							3829ccdb34
							
						
					 | 
					
						
						
							
							Remove reundant dropout in MLP module (#105)
						
						
						
						
						
						
					 | 
					
						2024-04-03 20:19:08 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Sebastian Raschka
							
						 
					 | 
					
						
						
						
						
							
						
						
							a2cd8436cb
							
						
					 | 
					
						
						
							
							Ch05 supplementary code (#81)
						
						
						
						
						
						
					 | 
					
						2024-03-19 09:26:26 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Sebastian Raschka
							
						 
					 | 
					
						
						
						
						
							
						
						
							ca96abac8a
							
						
					 | 
					
						
						
							
							Set up basic test gh worklows (#79)
						
						
						
						
						
						
						
						* Set up basic test gh worklows
* update file paths
* env check
* add env check
* Update requirements.txt
* simplify
* upd 
						
						
					 | 
					
						2024-03-18 11:58:37 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Sebastian Raschka
							
						 
					 | 
					
						
						
						
						
							
						
						
							9d6da22ebb
							
						
					 | 
					
						
						
							
							Update pep8 (#78)
						
						
						
						
						
						
						
						* simplify requirements file
* style
* apply linter 
						
						
					 | 
					
						2024-03-18 08:16:17 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							4fc6de7afa
							
						
					 | 
					
						
						
							
							add notes
						
						
						
						
						
						
					 | 
					
						2024-03-17 09:29:06 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							d60da19fd0
							
						
					 | 
					
						
						
							
							add more notes and embed figures externally to save space
						
						
						
						
						
						
					 | 
					
						2024-03-17 09:08:38 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							861c296312
							
						
					 | 
					
						
						
							
							add imports and version on top
						
						
						
						
						
						
					 | 
					
						2024-03-16 09:50:00 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								joel-foo
							
						 
					 | 
					
						
						
						
						
							
						
						
							dbb5e65a29
							
						
					 | 
					
						
						
							
							Remove duplicate cells
						
						
						
						
						
						
					 | 
					
						2024-03-10 21:40:57 +08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							da33ce8054
							
						
					 | 
					
						
						
							
							remove redundant unsqueeze in mask
						
						
						
						
						
						
					 | 
					
						2024-03-09 17:42:31 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							87fcfd9245
							
						
					 | 
					
						
						
							
							mha variants
						
						
						
						
						
						
					 | 
					
						2024-03-06 08:30:32 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							e0df4df433
							
						
					 | 
					
						
						
							
							add dropout for embedding layers
						
						
						
						
						
						
					 | 
					
						2024-03-04 07:05:06 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Sebastian Raschka
							
						 
					 | 
					
						
						
						
						
							
						
						
							c9dccb0c40
							
						
					 | 
					
						
						
							
							Merge pull request #33 from rayedbw/patch-1
						
						
						
						
						
						
						
						Update ch04.ipynb 
						
						
					 | 
					
						2024-02-29 20:00:09 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							267e33cfaf
							
						
					 | 
					
						
						
							
							remove redundant import
						
						
						
						
						
						
					 | 
					
						2024-02-29 19:59:05 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							b827bf4eea
							
						
					 | 
					
						
						
							
							remove redundant double-unsequeeze
						
						
						
						
						
						
					 | 
					
						2024-02-29 08:31:07 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Rayed Bin Wahed
							
						 
					 | 
					
						
						
						
						
							
						
						
							2fb035435e
							
						
					 | 
					
						
						
							
							Update ch04.ipynb
						
						
						
						
						
						
						
						Add missing import 
						
						
					 | 
					
						2024-02-27 23:05:36 +08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							f6266c3756
							
						
					 | 
					
						
						
							
							improve code comments
						
						
						
						
						
						
					 | 
					
						2024-02-27 06:40:35 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							3f186ab072
							
						
					 | 
					
						
						
							
							use .shape instead of .size() for consistency
						
						
						
						
						
						
					 | 
					
						2024-02-25 08:47:25 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							cdcd73ba7f
							
						
					 | 
					
						
						
							
							drop_last=True
						
						
						
						
						
						
					 | 
					
						2024-02-25 07:23:38 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							6243726ab3
							
						
					 | 
					
						
						
							
							rename to dataloader v1
						
						
						
						
						
						
					 | 
					
						2024-02-24 07:48:18 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							4e68649f16
							
						
					 | 
					
						
						
							
							comment update
						
						
						
						
						
						
					 | 
					
						2024-02-24 06:52:17 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							f057156181
							
						
					 | 
					
						
						
							
							use smaller number of tokens to emphasize next token prediction goal
						
						
						
						
						
						
					 | 
					
						2024-02-15 20:09:20 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							557ddfc684
							
						
					 | 
					
						
						
							
							make a new example for shortcut connections
						
						
						
						
						
						
					 | 
					
						2024-02-15 19:34:12 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							250e6306e2
							
						
					 | 
					
						
						
							
							use attn_scores from sec 3.4 instead of 3.3
						
						
						
						
						
						
					 | 
					
						2024-02-14 20:23:59 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							231a854ae7
							
						
					 | 
					
						
						
							
							use less ambiguous var name
						
						
						
						
						
						
					 | 
					
						2024-02-13 07:05:37 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							1d6f2c9084
							
						
					 | 
					
						
						
							
							rearrange exercise order
						
						
						
						
						
						
					 | 
					
						2024-02-11 14:46:05 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							fe332006de
							
						
					 | 
					
						
						
							
							ch4 exercise solutions
						
						
						
						
						
						
					 | 
					
						2024-02-11 11:51:39 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							103f7826ad
							
						
					 | 
					
						
						
							
							use same iter to make figs consistent
						
						
						
						
						
						
					 | 
					
						2024-02-11 09:12:52 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							352b83d225
							
						
					 | 
					
						
						
							
							make softmax explicit
						
						
						
						
						
						
					 | 
					
						2024-02-11 08:42:21 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							7d86023fc4
							
						
					 | 
					
						
						
							
							make softmax explicit
						
						
						
						
						
						
					 | 
					
						2024-02-11 08:41:45 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							5840b4b5f8
							
						
					 | 
					
						
						
							
							update name of last section
						
						
						
						
						
						
					 | 
					
						2024-02-11 07:35:07 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							baa8617921
							
						
					 | 
					
						
						
							
							variable name fix
						
						
						
						
						
						
					 | 
					
						2024-02-10 17:53:54 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							496b52f842
							
						
					 | 
					
						
						
							
							format the other GPT architecture sizes
						
						
						
						
						
						
					 | 
					
						2024-02-10 17:47:56 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							10aa2d099d
							
						
					 | 
					
						
						
							
							add print statements for illustration purposes
						
						
						
						
						
						
					 | 
					
						2024-02-10 10:10:14 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							5d1d8ce511
							
						
					 | 
					
						
						
							
							add shape information for clarity
						
						
						
						
						
						
					 | 
					
						2024-02-08 20:16:54 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							3a5fc79b38
							
						
					 | 
					
						
						
							
							add and update readme files
						
						
						
						
						
						
					 | 
					
						2024-02-05 06:51:58 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							2b38b63a7a
							
						
					 | 
					
						
						
							
							move overview up
						
						
						
						
						
						
					 | 
					
						2024-02-04 15:57:03 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							bb50de7210
							
						
					 | 
					
						
						
							
							adjust figure width
						
						
						
						
						
						
					 | 
					
						2024-02-04 10:12:11 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							1653f6953a
							
						
					 | 
					
						
						
							
							adjust figure width
						
						
						
						
						
						
					 | 
					
						2024-02-04 10:09:36 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							ec312e581b
							
						
					 | 
					
						
						
							
							add chapter 4 code
						
						
						
						
						
						
					 | 
					
						2024-02-04 10:02:05 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							d261abce4c
							
						
					 | 
					
						
						
							
							add forward pass
						
						
						
						
						
						
					 | 
					
						2024-01-31 08:00:19 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								rasbt
							
						 
					 | 
					
						
						
						
						
							
						
						
							fcb13fd636
							
						
					 | 
					
						
						
							
							add code backbone ch04
						
						
						
						
						
						
					 | 
					
						2024-01-29 08:14:23 -06:00 | 
					
					
						
						
							
							
							
						
					 |