mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2025-08-04 14:52:17 +00:00
ch06/03 fixes (#336)
* fixed bash commands * fixed help docstrings * added missing logreg bash cmd * Update train_bert_hf.py * Update train_bert_hf_spam.py * Update README.md --------- Co-authored-by: Sebastian Raschka <mail@sebastianraschka.com>
This commit is contained in:
parent
91cdfe3309
commit
c7267c3b09
@ -56,7 +56,7 @@ Test accuracy: 91.88%
|
|||||||
A 340M parameter encoder-style [BERT](https://arxiv.org/abs/1810.04805) model:
|
A 340M parameter encoder-style [BERT](https://arxiv.org/abs/1810.04805) model:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python train_bert_hf --trainable_layers "all" --num_epochs 1 --model "bert"
|
python train_bert_hf.py --trainable_layers "all" --num_epochs 1 --model "bert"
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
@ -114,7 +114,7 @@ A 355M parameter encoder-style [RoBERTa](https://arxiv.org/abs/1907.11692) model
|
|||||||
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python train_bert_hf.py --trainable_layers "last_block" --num_epochs 1 --bert_model "roberta"
|
python train_bert_hf.py --trainable_layers "last_block" --num_epochs 1 --model "roberta"
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
@ -140,7 +140,12 @@ Test accuracy: 92.95%
|
|||||||
|
|
||||||
<br>
|
<br>
|
||||||
|
|
||||||
A scikit-learn logistic regression classifier as a baseline.
|
A scikit-learn logistic regression classifier as a baseline:
|
||||||
|
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python train_sklearn_logreg.py
|
||||||
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
Dummy classifier:
|
Dummy classifier:
|
||||||
|
@ -189,15 +189,15 @@ if __name__ == "__main__":
|
|||||||
type=str,
|
type=str,
|
||||||
default="true",
|
default="true",
|
||||||
help=(
|
help=(
|
||||||
"Whether to use a attention mask for padding tokens. Options: 'true', 'false'"
|
"Whether to use a attention mask for padding tokens. Options: 'true', 'false'."
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--bert_model",
|
"--model",
|
||||||
type=str,
|
type=str,
|
||||||
default="distilbert",
|
default="distilbert",
|
||||||
help=(
|
help=(
|
||||||
"Which model to train. Options: 'distilbert', 'bert'."
|
"Which model to train. Options: 'distilbert', 'bert', 'roberta'."
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -223,7 +223,7 @@ if __name__ == "__main__":
|
|||||||
###############################
|
###############################
|
||||||
|
|
||||||
torch.manual_seed(123)
|
torch.manual_seed(123)
|
||||||
if args.bert_model == "distilbert":
|
if args.model == "distilbert":
|
||||||
|
|
||||||
model = AutoModelForSequenceClassification.from_pretrained(
|
model = AutoModelForSequenceClassification.from_pretrained(
|
||||||
"distilbert-base-uncased", num_labels=2
|
"distilbert-base-uncased", num_labels=2
|
||||||
@ -247,7 +247,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
|
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
|
||||||
|
|
||||||
elif args.bert_model == "bert":
|
elif args.model == "bert":
|
||||||
|
|
||||||
model = AutoModelForSequenceClassification.from_pretrained(
|
model = AutoModelForSequenceClassification.from_pretrained(
|
||||||
"bert-base-uncased", num_labels=2
|
"bert-base-uncased", num_labels=2
|
||||||
@ -272,7 +272,7 @@ if __name__ == "__main__":
|
|||||||
raise ValueError("Invalid --trainable_layers argument.")
|
raise ValueError("Invalid --trainable_layers argument.")
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
||||||
elif args.bert_model == "roberta":
|
elif args.model == "roberta":
|
||||||
|
|
||||||
model = AutoModelForSequenceClassification.from_pretrained(
|
model = AutoModelForSequenceClassification.from_pretrained(
|
||||||
"FacebookAI/roberta-large", num_labels=2
|
"FacebookAI/roberta-large", num_labels=2
|
||||||
@ -296,7 +296,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-large")
|
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-large")
|
||||||
else:
|
else:
|
||||||
raise ValueError("Selected --bert_model not supported.")
|
raise ValueError("Selected --model {args.model} not supported.")
|
||||||
|
|
||||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
model.to(device)
|
model.to(device)
|
||||||
|
@ -280,15 +280,15 @@ if __name__ == "__main__":
|
|||||||
type=str,
|
type=str,
|
||||||
default="true",
|
default="true",
|
||||||
help=(
|
help=(
|
||||||
"Whether to use a attention mask for padding tokens. Options: 'true', 'false'"
|
"Whether to use a attention mask for padding tokens. Options: 'true', 'false'."
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--bert_model",
|
"--model",
|
||||||
type=str,
|
type=str,
|
||||||
default="distilbert",
|
default="distilbert",
|
||||||
help=(
|
help=(
|
||||||
"Which model to train. Options: 'distilbert', 'bert'."
|
"Which model to train. Options: 'distilbert', 'bert', 'roberta'."
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -314,7 +314,7 @@ if __name__ == "__main__":
|
|||||||
###############################
|
###############################
|
||||||
|
|
||||||
torch.manual_seed(123)
|
torch.manual_seed(123)
|
||||||
if args.bert_model == "distilbert":
|
if args.model == "distilbert":
|
||||||
|
|
||||||
model = AutoModelForSequenceClassification.from_pretrained(
|
model = AutoModelForSequenceClassification.from_pretrained(
|
||||||
"distilbert-base-uncased", num_labels=2
|
"distilbert-base-uncased", num_labels=2
|
||||||
@ -338,7 +338,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
|
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
|
||||||
|
|
||||||
elif args.bert_model == "bert":
|
elif args.model == "bert":
|
||||||
|
|
||||||
model = AutoModelForSequenceClassification.from_pretrained(
|
model = AutoModelForSequenceClassification.from_pretrained(
|
||||||
"bert-base-uncased", num_labels=2
|
"bert-base-uncased", num_labels=2
|
||||||
@ -363,7 +363,7 @@ if __name__ == "__main__":
|
|||||||
raise ValueError("Invalid --trainable_layers argument.")
|
raise ValueError("Invalid --trainable_layers argument.")
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
||||||
elif args.bert_model == "roberta":
|
elif args.model == "roberta":
|
||||||
|
|
||||||
model = AutoModelForSequenceClassification.from_pretrained(
|
model = AutoModelForSequenceClassification.from_pretrained(
|
||||||
"FacebookAI/roberta-large", num_labels=2
|
"FacebookAI/roberta-large", num_labels=2
|
||||||
@ -387,7 +387,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-large")
|
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-large")
|
||||||
else:
|
else:
|
||||||
raise ValueError("Selected --bert_model not supported.")
|
raise ValueError("Selected --model {args.model} not supported.")
|
||||||
|
|
||||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
model.to(device)
|
model.to(device)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user