2021-09-04 20:28:37 -07:00

106 lines
3.6 KiB
Python

import os
try:
from transformers import Trainer as TFTrainer
except ImportError:
TFTrainer = object
class TrainerForAutoTransformers(TFTrainer):
def evaluate(self,
eval_dataset=None):
"""
Overriding transformers.Trainer.evaluate by saving state with save_state
Args:
eval_dataset:
the dataset to be evaluated
"""
# TODO coverage
from ray import tune
eval_dataloader = self.get_eval_dataloader(eval_dataset)
output = self.prediction_loop(
eval_dataloader, description="Evaluation")
self.log(output.metrics)
self.save_state()
for key in list(output.metrics.keys()):
if key.startswith("eval_"):
output.metrics[key[5:]] = output.metrics[key]
tune.report(**output.metrics)
return output.metrics
def save_state(self):
"""
Overriding transformers.Trainer.save_state. It is only through saving
the states can best_trial.get_best_checkpoint return a non-empty value.
"""
# TODO coverage
import torch
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
from ray import tune
with tune.checkpoint_dir(step=self.state.global_step) as checkpoint_dir:
self.args.output_dir = checkpoint_dir
# This is the directory name that Huggingface requires.
output_dir = os.path.join(
self.args.output_dir,
f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}")
self.save_model(output_dir)
torch.save(self.optimizer.state_dict(),
os.path.join(output_dir, "optimizer.pt"))
torch.save(self.lr_scheduler.state_dict(),
os.path.join(output_dir, "scheduler.pt"))
@staticmethod
def convert_num_train_epochs_to_max_steps(
num_train_epochs: int,
num_train_examples: int,
per_device_train_batch_size: int,
device_count: int):
return int(num_train_epochs * num_train_examples / per_device_train_batch_size / device_count)
@staticmethod
def convert_max_steps_to_num_train_epochs(
max_steps: int,
num_train_examples: int,
per_device_train_batch_size: int,
device_count: int):
return float(max_steps * per_device_train_batch_size * device_count) / num_train_examples
@staticmethod
def convert_warmup_ratio_to_warmup_steps(
warmup_ratio,
max_steps=None,
num_train_epochs=None,
num_train_examples=None,
per_device_train_batch_size=None,
device_count=None):
if max_steps:
return int(warmup_ratio * max_steps)
# TODO coverage
max_steps = TrainerForAutoTransformers.convert_num_train_epochs_to_max_steps(
num_train_epochs,
num_train_examples,
per_device_train_batch_size,
device_count)
return int(warmup_ratio * max_steps)
@staticmethod
def convert_warmup_steps_to_warmup_ratio(
warmup_steps: int,
num_train_epochs: int,
num_train_examples: int,
per_device_train_batch_size: int,
device_count: int):
max_steps = TrainerForAutoTransformers.convert_num_train_epochs_to_max_steps(
num_train_epochs,
num_train_examples,
per_device_train_batch_size,
device_count)
return float(warmup_steps / max_steps)