mirror of
				https://github.com/PaddlePaddle/PaddleOCR.git
				synced 2025-10-31 09:49:30 +00:00 
			
		
		
		
	 3f65b360ef
			
		
	
	
		3f65b360ef
		
			
		
	
	
	
	
		
			
			* add vl * add vl * add vl * add ref * fix head out * add visionlan doc * fix vl infer * update dict
		
			
				
	
	
		
			286 lines
		
	
	
		
			9.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			286 lines
		
	
	
		
			9.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 | |
| #
 | |
| # Licensed under the Apache License, Version 2.0 (the "License");
 | |
| # you may not use this file except in compliance with the License.
 | |
| # You may obtain a copy of the License at
 | |
| #
 | |
| #    http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| # Unless required by applicable law or agreed to in writing, software
 | |
| # distributed under the License is distributed on an "AS IS" BASIS,
 | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| # See the License for the specific language governing permissions and
 | |
| # limitations under the License.
 | |
| 
 | |
| from __future__ import absolute_import
 | |
| from __future__ import division
 | |
| from __future__ import print_function
 | |
| from __future__ import unicode_literals
 | |
| 
 | |
| from paddle import optimizer as optim
 | |
| 
 | |
| 
 | |
| class Momentum(object):
 | |
|     """
 | |
|     Simple Momentum optimizer with velocity state.
 | |
|     Args:
 | |
|         learning_rate (float|Variable) - The learning rate used to update parameters.
 | |
|             Can be a float value or a Variable with one float value as data element.
 | |
|         momentum (float) - Momentum factor.
 | |
|         regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
 | |
|     """
 | |
| 
 | |
|     def __init__(self,
 | |
|                  learning_rate,
 | |
|                  momentum,
 | |
|                  weight_decay=None,
 | |
|                  grad_clip=None,
 | |
|                  **args):
 | |
|         super(Momentum, self).__init__()
 | |
|         self.learning_rate = learning_rate
 | |
|         self.momentum = momentum
 | |
|         self.weight_decay = weight_decay
 | |
|         self.grad_clip = grad_clip
 | |
| 
 | |
|     def __call__(self, model):
 | |
|         train_params = [
 | |
|             param for param in model.parameters() if param.trainable is True
 | |
|         ]
 | |
|         opt = optim.Momentum(
 | |
|             learning_rate=self.learning_rate,
 | |
|             momentum=self.momentum,
 | |
|             weight_decay=self.weight_decay,
 | |
|             grad_clip=self.grad_clip,
 | |
|             parameters=train_params)
 | |
|         return opt
 | |
| 
 | |
| 
 | |
| class Adam(object):
 | |
|     def __init__(self,
 | |
|                  learning_rate=0.001,
 | |
|                  beta1=0.9,
 | |
|                  beta2=0.999,
 | |
|                  epsilon=1e-08,
 | |
|                  parameter_list=None,
 | |
|                  weight_decay=None,
 | |
|                  grad_clip=None,
 | |
|                  name=None,
 | |
|                  lazy_mode=False,
 | |
|                  **kwargs):
 | |
|         self.learning_rate = learning_rate
 | |
|         self.beta1 = beta1
 | |
|         self.beta2 = beta2
 | |
|         self.epsilon = epsilon
 | |
|         self.parameter_list = parameter_list
 | |
|         self.learning_rate = learning_rate
 | |
|         self.weight_decay = weight_decay
 | |
|         self.grad_clip = grad_clip
 | |
|         self.name = name
 | |
|         self.lazy_mode = lazy_mode
 | |
|         self.group_lr = kwargs.get('group_lr', False)
 | |
|         self.training_step = kwargs.get('training_step', None)
 | |
| 
 | |
|     def __call__(self, model):
 | |
|         if self.group_lr:
 | |
|             if self.training_step == 'LF_2':
 | |
|                 import paddle
 | |
|                 if isinstance(model, paddle.fluid.dygraph.parallel.
 | |
|                               DataParallel):  # multi gpu
 | |
|                     mlm = model._layers.head.MLM_VRM.MLM.parameters()
 | |
|                     pre_mlm_pp = model._layers.head.MLM_VRM.Prediction.pp_share.parameters(
 | |
|                     )
 | |
|                     pre_mlm_w = model._layers.head.MLM_VRM.Prediction.w_share.parameters(
 | |
|                     )
 | |
|                 else:  # single gpu
 | |
|                     mlm = model.head.MLM_VRM.MLM.parameters()
 | |
|                     pre_mlm_pp = model.head.MLM_VRM.Prediction.pp_share.parameters(
 | |
|                     )
 | |
|                     pre_mlm_w = model.head.MLM_VRM.Prediction.w_share.parameters(
 | |
|                     )
 | |
| 
 | |
|                 total = []
 | |
|                 for param in mlm:
 | |
|                     total.append(id(param))
 | |
|                 for param in pre_mlm_pp:
 | |
|                     total.append(id(param))
 | |
|                 for param in pre_mlm_w:
 | |
|                     total.append(id(param))
 | |
| 
 | |
|                 group_base_params = [
 | |
|                     param for param in model.parameters() if id(param) in total
 | |
|                 ]
 | |
|                 group_small_params = [
 | |
|                     param for param in model.parameters()
 | |
|                     if id(param) not in total
 | |
|                 ]
 | |
|                 train_params = [{
 | |
|                     'params': group_base_params
 | |
|                 }, {
 | |
|                     'params': group_small_params,
 | |
|                     'learning_rate': self.learning_rate.values[0] * 0.1
 | |
|                 }]
 | |
| 
 | |
|             else:
 | |
|                 print(
 | |
|                     'group lr currently only support VisionLAN in LF_2 training step'
 | |
|                 )
 | |
|                 train_params = [
 | |
|                     param for param in model.parameters()
 | |
|                     if param.trainable is True
 | |
|                 ]
 | |
|         else:
 | |
|             train_params = [
 | |
|                 param for param in model.parameters() if param.trainable is True
 | |
|             ]
 | |
| 
 | |
|         opt = optim.Adam(
 | |
|             learning_rate=self.learning_rate,
 | |
|             beta1=self.beta1,
 | |
|             beta2=self.beta2,
 | |
|             epsilon=self.epsilon,
 | |
|             weight_decay=self.weight_decay,
 | |
|             grad_clip=self.grad_clip,
 | |
|             name=self.name,
 | |
|             lazy_mode=self.lazy_mode,
 | |
|             parameters=train_params)
 | |
|         return opt
 | |
| 
 | |
| 
 | |
| class RMSProp(object):
 | |
|     """
 | |
|     Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method.
 | |
|     Args:
 | |
|         learning_rate (float|Variable) - The learning rate used to update parameters.
 | |
|             Can be a float value or a Variable with one float value as data element.
 | |
|         momentum (float) - Momentum factor.
 | |
|         rho (float) - rho value in equation.
 | |
|         epsilon (float) - avoid division by zero, default is 1e-6.
 | |
|         regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
 | |
|     """
 | |
| 
 | |
|     def __init__(self,
 | |
|                  learning_rate,
 | |
|                  momentum=0.0,
 | |
|                  rho=0.95,
 | |
|                  epsilon=1e-6,
 | |
|                  weight_decay=None,
 | |
|                  grad_clip=None,
 | |
|                  **args):
 | |
|         super(RMSProp, self).__init__()
 | |
|         self.learning_rate = learning_rate
 | |
|         self.momentum = momentum
 | |
|         self.rho = rho
 | |
|         self.epsilon = epsilon
 | |
|         self.weight_decay = weight_decay
 | |
|         self.grad_clip = grad_clip
 | |
| 
 | |
|     def __call__(self, model):
 | |
|         train_params = [
 | |
|             param for param in model.parameters() if param.trainable is True
 | |
|         ]
 | |
|         opt = optim.RMSProp(
 | |
|             learning_rate=self.learning_rate,
 | |
|             momentum=self.momentum,
 | |
|             rho=self.rho,
 | |
|             epsilon=self.epsilon,
 | |
|             weight_decay=self.weight_decay,
 | |
|             grad_clip=self.grad_clip,
 | |
|             parameters=train_params)
 | |
|         return opt
 | |
| 
 | |
| 
 | |
| class Adadelta(object):
 | |
|     def __init__(self,
 | |
|                  learning_rate=0.001,
 | |
|                  epsilon=1e-08,
 | |
|                  rho=0.95,
 | |
|                  parameter_list=None,
 | |
|                  weight_decay=None,
 | |
|                  grad_clip=None,
 | |
|                  name=None,
 | |
|                  **kwargs):
 | |
|         self.learning_rate = learning_rate
 | |
|         self.epsilon = epsilon
 | |
|         self.rho = rho
 | |
|         self.parameter_list = parameter_list
 | |
|         self.learning_rate = learning_rate
 | |
|         self.weight_decay = weight_decay
 | |
|         self.grad_clip = grad_clip
 | |
|         self.name = name
 | |
| 
 | |
|     def __call__(self, model):
 | |
|         train_params = [
 | |
|             param for param in model.parameters() if param.trainable is True
 | |
|         ]
 | |
|         opt = optim.Adadelta(
 | |
|             learning_rate=self.learning_rate,
 | |
|             epsilon=self.epsilon,
 | |
|             rho=self.rho,
 | |
|             weight_decay=self.weight_decay,
 | |
|             grad_clip=self.grad_clip,
 | |
|             name=self.name,
 | |
|             parameters=train_params)
 | |
|         return opt
 | |
| 
 | |
| 
 | |
| class AdamW(object):
 | |
|     def __init__(self,
 | |
|                  learning_rate=0.001,
 | |
|                  beta1=0.9,
 | |
|                  beta2=0.999,
 | |
|                  epsilon=1e-8,
 | |
|                  weight_decay=0.01,
 | |
|                  multi_precision=False,
 | |
|                  grad_clip=None,
 | |
|                  no_weight_decay_name=None,
 | |
|                  one_dim_param_no_weight_decay=False,
 | |
|                  name=None,
 | |
|                  lazy_mode=False,
 | |
|                  **args):
 | |
|         super().__init__()
 | |
|         self.learning_rate = learning_rate
 | |
|         self.beta1 = beta1
 | |
|         self.beta2 = beta2
 | |
|         self.epsilon = epsilon
 | |
|         self.grad_clip = grad_clip
 | |
|         self.weight_decay = 0.01 if weight_decay is None else weight_decay
 | |
|         self.grad_clip = grad_clip
 | |
|         self.name = name
 | |
|         self.lazy_mode = lazy_mode
 | |
|         self.multi_precision = multi_precision
 | |
|         self.no_weight_decay_name_list = no_weight_decay_name.split(
 | |
|         ) if no_weight_decay_name else []
 | |
|         self.one_dim_param_no_weight_decay = one_dim_param_no_weight_decay
 | |
| 
 | |
|     def __call__(self, model):
 | |
|         parameters = [
 | |
|             param for param in model.parameters() if param.trainable is True
 | |
|         ]
 | |
| 
 | |
|         self.no_weight_decay_param_name_list = [
 | |
|             p.name for n, p in model.named_parameters()
 | |
|             if any(nd in n for nd in self.no_weight_decay_name_list)
 | |
|         ]
 | |
| 
 | |
|         if self.one_dim_param_no_weight_decay:
 | |
|             self.no_weight_decay_param_name_list += [
 | |
|                 p.name for n, p in model.named_parameters() if len(p.shape) == 1
 | |
|             ]
 | |
| 
 | |
|         opt = optim.AdamW(
 | |
|             learning_rate=self.learning_rate,
 | |
|             beta1=self.beta1,
 | |
|             beta2=self.beta2,
 | |
|             epsilon=self.epsilon,
 | |
|             parameters=parameters,
 | |
|             weight_decay=self.weight_decay,
 | |
|             multi_precision=self.multi_precision,
 | |
|             grad_clip=self.grad_clip,
 | |
|             name=self.name,
 | |
|             lazy_mode=self.lazy_mode,
 | |
|             apply_decay_param_fun=self._apply_decay_param_fun)
 | |
|         return opt
 | |
| 
 | |
|     def _apply_decay_param_fun(self, name):
 | |
|         return name not in self.no_weight_decay_param_name_list
 |