mirror of
				https://github.com/microsoft/autogen.git
				synced 2025-10-31 09:50:11 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			106 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			106 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import os
 | |
| 
 | |
| try:
 | |
|     from transformers import Trainer as TFTrainer
 | |
| except ImportError:
 | |
|     TFTrainer = object
 | |
| 
 | |
| 
 | |
| class TrainerForAutoTransformers(TFTrainer):
 | |
| 
 | |
|     def evaluate(self,
 | |
|                  eval_dataset=None):
 | |
|         """
 | |
|             Overriding transformers.Trainer.evaluate by saving state with save_state
 | |
| 
 | |
|             Args:
 | |
|                 eval_dataset:
 | |
|                     the dataset to be evaluated
 | |
|         """
 | |
|         # TODO coverage
 | |
|         from ray import tune
 | |
| 
 | |
|         eval_dataloader = self.get_eval_dataloader(eval_dataset)
 | |
|         output = self.prediction_loop(
 | |
|             eval_dataloader, description="Evaluation")
 | |
|         self.log(output.metrics)
 | |
| 
 | |
|         self.save_state()
 | |
| 
 | |
|         for key in list(output.metrics.keys()):
 | |
|             if key.startswith("eval_"):
 | |
|                 output.metrics[key[5:]] = output.metrics[key]
 | |
|         tune.report(**output.metrics)
 | |
| 
 | |
|         return output.metrics
 | |
| 
 | |
|     def save_state(self):
 | |
|         """
 | |
|                 Overriding transformers.Trainer.save_state. It is only through saving
 | |
|                 the states can best_trial.get_best_checkpoint return a non-empty value.
 | |
|         """
 | |
|         # TODO coverage
 | |
|         import torch
 | |
|         from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
 | |
|         from ray import tune
 | |
| 
 | |
|         with tune.checkpoint_dir(step=self.state.global_step) as checkpoint_dir:
 | |
|             self.args.output_dir = checkpoint_dir
 | |
|             # This is the directory name that Huggingface requires.
 | |
|             output_dir = os.path.join(
 | |
|                 self.args.output_dir,
 | |
|                 f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}")
 | |
|             self.save_model(output_dir)
 | |
|             torch.save(self.optimizer.state_dict(),
 | |
|                        os.path.join(output_dir, "optimizer.pt"))
 | |
|             torch.save(self.lr_scheduler.state_dict(),
 | |
|                        os.path.join(output_dir, "scheduler.pt"))
 | |
| 
 | |
|     @staticmethod
 | |
|     def convert_num_train_epochs_to_max_steps(
 | |
|             num_train_epochs: int,
 | |
|             num_train_examples: int,
 | |
|             per_device_train_batch_size: int,
 | |
|             device_count: int):
 | |
|         return int(num_train_epochs * num_train_examples / per_device_train_batch_size / device_count)
 | |
| 
 | |
|     @staticmethod
 | |
|     def convert_max_steps_to_num_train_epochs(
 | |
|             max_steps: int,
 | |
|             num_train_examples: int,
 | |
|             per_device_train_batch_size: int,
 | |
|             device_count: int):
 | |
|         return float(max_steps * per_device_train_batch_size * device_count) / num_train_examples
 | |
| 
 | |
|     @staticmethod
 | |
|     def convert_warmup_ratio_to_warmup_steps(
 | |
|             warmup_ratio,
 | |
|             max_steps=None,
 | |
|             num_train_epochs=None,
 | |
|             num_train_examples=None,
 | |
|             per_device_train_batch_size=None,
 | |
|             device_count=None):
 | |
|         if max_steps:
 | |
|             return int(warmup_ratio * max_steps)
 | |
|         # TODO coverage
 | |
|         max_steps = TrainerForAutoTransformers.convert_num_train_epochs_to_max_steps(
 | |
|             num_train_epochs,
 | |
|             num_train_examples,
 | |
|             per_device_train_batch_size,
 | |
|             device_count)
 | |
|         return int(warmup_ratio * max_steps)
 | |
| 
 | |
|     @staticmethod
 | |
|     def convert_warmup_steps_to_warmup_ratio(
 | |
|             warmup_steps: int,
 | |
|             num_train_epochs: int,
 | |
|             num_train_examples: int,
 | |
|             per_device_train_batch_size: int,
 | |
|             device_count: int):
 | |
|         max_steps = TrainerForAutoTransformers.convert_num_train_epochs_to_max_steps(
 | |
|             num_train_epochs,
 | |
|             num_train_examples,
 | |
|             per_device_train_batch_size,
 | |
|             device_count)
 | |
|         return float(warmup_steps / max_steps)
 | 
