From 6fe8d1a10e8311d038306b7524c68c590a252848 Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Sat, 11 May 2024 06:42:05 -0500 Subject: [PATCH] Update README.md --- .../02_bonus_additional-experiments/README.md | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/ch06/02_bonus_additional-experiments/README.md b/ch06/02_bonus_additional-experiments/README.md index 207b2a7..3089341 100644 --- a/ch06/02_bonus_additional-experiments/README.md +++ b/ch06/02_bonus_additional-experiments/README.md @@ -9,18 +9,18 @@ For example,   -| | Model | Weights | Trainable token | Trainable layers | Context length | Training acc | Training time | Validation acc | Test acc | CPU/GPU | -|----|--------------------|------------|-----------------|------------------|-------------------------|--------------|---------------|----------------|----------|---------| -| 1 | gpt2-small (124M) | pretrained | last | last_block | longest train ex. (120) | 96.63% | 0.39 min | 99.33% | 95.00% | V100 | -| 2 | gpt2-small (124M) | pretrained | first | last_block | longest train ex. (120) | 78.46% | 0.37 min | 80.54% | 75.00% | V100 | -| 3 | gpt2-small (124M) | pretrained | last | last_layer | longest train ex. (120) | 78.65% | 0.33 min | 79.87% | 72.00% | V100 | -| 4 | gpt2-small (124M) | pretrained | last | all | longest train ex. (120) | 99.62% | 0.94 min | 96.64% | 96.67% | V100 | -| 5 | gpt2-medium (355M) | pretrained | last | last_block | longest train ex. (120) | 87.50% | 0.91 min | 91.28% | 84.67% | V100 | -| 6 | gpt2-large (774M) | pretrained | last | last_block | longest train ex. (120) | 99.52% | 1.91 min | 98.66% | 96.67% | V100 | -| 7 | gpt2-xl (1558M) | pretrained | last | last_block | longest train ex. (120) | 99.81% | 3.84 min | 99.33% | 98.33% | V100 | -| 8 | gpt2-small (124M) | random | last | all | longest train ex. (120) | 100% | 0.93 min | 96.64% | 93.67% | V100 | -| 9 | gpt2-small (124M) | pretrained | last | LoRA | longest train ex. (120) | 99.52% | 0.82 min | 97.99% | 97.67% | V100 | -| 10 | gpt2-small (124M) | pretrained | last | last_block | context length (1024) | 83.08% | 3.24 min | 87.92% | 78.33% | V100 | +| | Model | Weights | Trainable token | Trainable layers | Context length | Training acc | Test acc | Validation acc | Training time | CPU/GPU | +|----|--------------------|------------|-----------------|------------------|-------------------------|--------------|----------|----------------|---------------|---------| +| 1 | gpt2-small (124M) | pretrained | last | last_block | longest train ex. (120) | 96.63% | 95.00% | 99.33% | 0.39 min | V100 | +| 2 | gpt2-small (124M) | pretrained | first | last_block | longest train ex. (120) | 78.46% | 75.00% | 80.54% | 0.37 min | V100 | +| 3 | gpt2-small (124M) | pretrained | last | last_layer | longest train ex. (120) | 78.65% | 72.00% | 79.87% | 0.33 min | V100 | +| 4 | gpt2-small (124M) | pretrained | last | all | longest train ex. (120) | 99.62% | 96.67% | 96.64% | 0.94 min | V100 | +| 5 | gpt2-medium (355M) | pretrained | last | last_block | longest train ex. (120) | 87.50% | 84.67% | 91.28% | 0.91 min | V100 | +| 6 | gpt2-large (774M) | pretrained | last | last_block | longest train ex. (120) | 99.52% | 96.67% | 98.66% | 1.91 min | V100 | +| 7 | gpt2-xl (1558M) | pretrained | last | last_block | longest train ex. (120) | 99.81% | 98.33% | 99.33% | 3.84 min | V100 | +| 8 | gpt2-small (124M) | random | last | all | longest train ex. (120) | 100% | 93.67% | 96.64% | 0.93 min | V100 | +| 9 | gpt2-small (124M) | pretrained | last | LoRA | longest train ex. (120) | 99.52% | 97.67% | 97.99% | 0.82 min | V100 | +| 10 | gpt2-small (124M) | pretrained | last | last_block | context length (1024) | 83.08% | 78.33% | 87.92% | 3.24 min | V100 |