mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2025-11-03 11:20:49 +00:00
Improve ModernBERT comments (#606)
* Improve modernbert comments * bash code formatting
This commit is contained in:
parent
9e08fff657
commit
b662ec9ada
@ -181,6 +181,12 @@ Test accuracy: 92.95%
|
||||
|
||||
[ModernBERT (2024)](https://arxiv.org/abs/2412.13663) is an optimized reimplementation of BERT that incorporates architectural improvements like parallel residual connections and gated linear units (GLUs) to boost efficiency and performance. It maintains BERT’s original pretraining objectives while achieving faster inference and better scalability on modern hardware.
|
||||
|
||||
```bash
|
||||
python train_bert_hf.py --trainable_layers "all" --num_epochs 1 --model "modernbert-base"
|
||||
```
|
||||
|
||||
|
||||
|
||||
```
|
||||
Ep 1 (Step 000000): Train loss 0.699, Val loss 0.698
|
||||
Ep 1 (Step 000050): Train loss 0.564, Val loss 0.606
|
||||
@ -209,6 +215,10 @@ Test accuracy: 93.79%
|
||||
|
||||
Same as above but using the larger ModernBERT variant.
|
||||
|
||||
```bash
|
||||
python train_bert_hf.py --trainable_layers "all" --num_epochs 1 --model "modernbert-large"
|
||||
```
|
||||
|
||||
|
||||
|
||||
```
|
||||
|
||||
@ -197,7 +197,7 @@ if __name__ == "__main__":
|
||||
type=str,
|
||||
default="distilbert",
|
||||
help=(
|
||||
"Which model to train. Options: 'distilbert', 'bert', 'roberta', 'modern-bert-base', 'modern-bert-large."
|
||||
"Which model to train. Options: 'distilbert', 'bert', 'roberta', 'modernbert-base/-large'."
|
||||
)
|
||||
)
|
||||
parser.add_argument(
|
||||
@ -296,9 +296,9 @@ if __name__ == "__main__":
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-large")
|
||||
|
||||
elif args.model in ("modern-bert-base", "modern-bert-large"):
|
||||
elif args.model in ("modernbert-base", "modernbert-large"):
|
||||
|
||||
if args.model == "modern-bert-base":
|
||||
if args.model == "modernbert-base":
|
||||
model = AutoModelForSequenceClassification.from_pretrained(
|
||||
"answerdotai/ModernBERT-base", num_labels=2
|
||||
)
|
||||
@ -330,7 +330,7 @@ if __name__ == "__main__":
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
|
||||
|
||||
elif args.model == "modern-bert-base":
|
||||
elif args.model == "modernbert-base":
|
||||
model = AutoModelForSequenceClassification.from_pretrained(
|
||||
"answerdotai/ModernBERT-base", num_labels=2
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user